1/* 2 * memset - fill memory with a constant byte 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#include <picolibc.h> 15 16#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) || !defined(__ARM_NEON) 17/* See memset-stub.c */ 18#else 19#include "asmdefs.h" 20 21#define dstin x0 22#define val x1 23#define valw w1 24#define count x2 25#define dst x3 26#define dstend x4 27#define zva_val x5 28 29ENTRY (memset) 30 PTR_ARG (0) 31 SIZE_ARG (2) 32 33 dup v0.16B, valw 34 add dstend, dstin, count 35 36 cmp count, 96 37 b.hi L(set_long) 38 cmp count, 16 39 b.hs L(set_medium) 40 mov val, v0.D[0] 41 42 /* Set 0..15 bytes. */ 43 tbz count, 3, 1f 44 str val, [dstin] 45 str val, [dstend, -8] 46 ret 47 .p2align 4 481: tbz count, 2, 2f 49 str valw, [dstin] 50 str valw, [dstend, -4] 51 ret 522: cbz count, 3f 53 strb valw, [dstin] 54 tbz count, 1, 3f 55 strh valw, [dstend, -2] 563: ret 57 58 /* Set 17..96 bytes. */ 59L(set_medium): 60 str q0, [dstin] 61 tbnz count, 6, L(set96) 62 str q0, [dstend, -16] 63 tbz count, 5, 1f 64 str q0, [dstin, 16] 65 str q0, [dstend, -32] 661: ret 67 68 .p2align 4 69 /* Set 64..96 bytes. Write 64 bytes from the start and 70 32 bytes from the end. */ 71L(set96): 72 str q0, [dstin, 16] 73 stp q0, q0, [dstin, 32] 74 stp q0, q0, [dstend, -32] 75 ret 76 77 .p2align 4 78L(set_long): 79 and valw, valw, 255 80 bic dst, dstin, 15 81 str q0, [dstin] 82 cmp count, 160 83 ccmp valw, 0, 0, hs 84 b.ne L(no_zva) 85 86#ifndef SKIP_ZVA_CHECK 87 mrs zva_val, dczid_el0 88 and zva_val, zva_val, 31 89 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 90 b.ne L(no_zva) 91#endif 92 str q0, [dst, 16] 93 stp q0, q0, [dst, 32] 94 bic dst, dst, 63 95 sub count, dstend, dst /* Count is now 64 too large. */ 96 sub count, count, 128 /* Adjust count and bias for loop. */ 97 98 .p2align 4 99L(zva_loop): 100 add dst, dst, 64 101 dc zva, dst 102 subs count, count, 64 103 b.hi L(zva_loop) 104 stp q0, q0, [dstend, -64] 105 stp q0, q0, [dstend, -32] 106 ret 107 108L(no_zva): 109 sub count, dstend, dst /* Count is 16 too large. */ 110 sub dst, dst, 16 /* Dst is biased by -32. */ 111 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 112L(no_zva_loop): 113 stp q0, q0, [dst, 32] 114 stp q0, q0, [dst, 64]! 115 subs count, count, 64 116 b.hi L(no_zva_loop) 117 stp q0, q0, [dstend, -64] 118 stp q0, q0, [dstend, -32] 119 ret 120 121END (memset) 122#endif 123