1/* 2 * memset - fill memory with a constant byte 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 15/* See memset-stub.c */ 16#else 17#include "asmdefs.h" 18 19#define dstin x0 20#define val x1 21#define valw w1 22#define count x2 23#define dst x3 24#define dstend x4 25#define zva_val x5 26 27ENTRY (memset) 28 PTR_ARG (0) 29 SIZE_ARG (2) 30 31 dup v0.16B, valw 32 add dstend, dstin, count 33 34 cmp count, 96 35 b.hi L(set_long) 36 cmp count, 16 37 b.hs L(set_medium) 38 mov val, v0.D[0] 39 40 /* Set 0..15 bytes. */ 41 tbz count, 3, 1f 42 str val, [dstin] 43 str val, [dstend, -8] 44 ret 45 .p2align 4 461: tbz count, 2, 2f 47 str valw, [dstin] 48 str valw, [dstend, -4] 49 ret 502: cbz count, 3f 51 strb valw, [dstin] 52 tbz count, 1, 3f 53 strh valw, [dstend, -2] 543: ret 55 56 /* Set 17..96 bytes. */ 57L(set_medium): 58 str q0, [dstin] 59 tbnz count, 6, L(set96) 60 str q0, [dstend, -16] 61 tbz count, 5, 1f 62 str q0, [dstin, 16] 63 str q0, [dstend, -32] 641: ret 65 66 .p2align 4 67 /* Set 64..96 bytes. Write 64 bytes from the start and 68 32 bytes from the end. */ 69L(set96): 70 str q0, [dstin, 16] 71 stp q0, q0, [dstin, 32] 72 stp q0, q0, [dstend, -32] 73 ret 74 75 .p2align 4 76L(set_long): 77 and valw, valw, 255 78 bic dst, dstin, 15 79 str q0, [dstin] 80 cmp count, 160 81 ccmp valw, 0, 0, hs 82 b.ne L(no_zva) 83 84#ifndef SKIP_ZVA_CHECK 85 mrs zva_val, dczid_el0 86 and zva_val, zva_val, 31 87 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 88 b.ne L(no_zva) 89#endif 90 str q0, [dst, 16] 91 stp q0, q0, [dst, 32] 92 bic dst, dst, 63 93 sub count, dstend, dst /* Count is now 64 too large. */ 94 sub count, count, 128 /* Adjust count and bias for loop. */ 95 96 .p2align 4 97L(zva_loop): 98 add dst, dst, 64 99 dc zva, dst 100 subs count, count, 64 101 b.hi L(zva_loop) 102 stp q0, q0, [dstend, -64] 103 stp q0, q0, [dstend, -32] 104 ret 105 106L(no_zva): 107 sub count, dstend, dst /* Count is 16 too large. */ 108 sub dst, dst, 16 /* Dst is biased by -32. */ 109 sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 110L(no_zva_loop): 111 stp q0, q0, [dst, 32] 112 stp q0, q0, [dst, 64]! 113 subs count, count, 64 114 b.hi L(no_zva_loop) 115 stp q0, q0, [dstend, -64] 116 stp q0, q0, [dstend, -32] 117 ret 118 119END (memset) 120#endif 121