1/* 2 strcpy/stpcpy - copy a string returning pointer to start/end. 3 4 Copyright (c) 2013, 2014, 2015, 2020-2023 ARM Ltd. 5 All Rights Reserved. 6 7 Redistribution and use in source and binary forms, with or without 8 modification, are permitted provided that the following conditions are met: 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 * Neither the name of the company nor the names of its contributors 15 may be used to endorse or promote products derived from this 16 software without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 29#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 30/* See strcpy-stub.c */ 31#else 32 33/* Assumptions: 34 * 35 * ARMv8-a, AArch64, Advanced SIMD. 36 * MTE compatible. 37 */ 38 39#include "asmdefs.h" 40 41#define dstin x0 42#define srcin x1 43#define result x0 44 45#define src x2 46#define dst x3 47#define len x4 48#define synd x4 49#define tmp x5 50#define shift x5 51#define data1 x6 52#define dataw1 w6 53#define data2 x7 54#define dataw2 w7 55 56#define dataq q0 57#define vdata v0 58#define vhas_nul v1 59#define vend v2 60#define dend d2 61#define dataq2 q1 62 63#ifdef BUILD_STPCPY 64# define STRCPY stpcpy 65# define IFSTPCPY(X,...) X,__VA_ARGS__ 66#else 67# define STRCPY strcpy 68# define IFSTPCPY(X,...) 69#endif 70 71/* 72 Core algorithm: 73 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 74 per byte. We take 4 bits of every comparison byte with shift right and narrow 75 by 4 instruction. Since the bits in the nibble mask reflect the order in 76 which things occur in the original string, counting leading zeros identifies 77 exactly which byte matched. */ 78 79ENTRY (STRCPY) 80 PTR_ARG (0) 81 PTR_ARG (1) 82 bic src, srcin, 15 83 ld1 {vdata.16b}, [src] 84 cmeq vhas_nul.16b, vdata.16b, 0 85 lsl shift, srcin, 2 86 shrn vend.8b, vhas_nul.8h, 4 87 fmov synd, dend 88 lsr synd, synd, shift 89 cbnz synd, L(tail) 90 91 ldr dataq, [src, 16]! 92 cmeq vhas_nul.16b, vdata.16b, 0 93 shrn vend.8b, vhas_nul.8h, 4 94 fmov synd, dend 95 cbz synd, L(start_loop) 96 97#ifndef __AARCH64EB__ 98 rbit synd, synd 99#endif 100 sub tmp, src, srcin 101 clz len, synd 102 add len, tmp, len, lsr 2 103 tbz len, 4, L(less16) 104 sub tmp, len, 15 105 ldr dataq, [srcin] 106 ldr dataq2, [srcin, tmp] 107 str dataq, [dstin] 108 str dataq2, [dstin, tmp] 109 IFSTPCPY (add result, dstin, len) 110 ret 111 112L(tail): 113 rbit synd, synd 114 clz len, synd 115 lsr len, len, 2 116L(less16): 117 tbz len, 3, L(less8) 118 sub tmp, len, 7 119 ldr data1, [srcin] 120 ldr data2, [srcin, tmp] 121 str data1, [dstin] 122 str data2, [dstin, tmp] 123 IFSTPCPY (add result, dstin, len) 124 ret 125 126 .p2align 4 127L(less8): 128 subs tmp, len, 3 129 b.lo L(less4) 130 ldr dataw1, [srcin] 131 ldr dataw2, [srcin, tmp] 132 str dataw1, [dstin] 133 str dataw2, [dstin, tmp] 134 IFSTPCPY (add result, dstin, len) 135 ret 136 137L(less4): 138 cbz len, L(zerobyte) 139 ldrh dataw1, [srcin] 140 strh dataw1, [dstin] 141L(zerobyte): 142 strb wzr, [dstin, len] 143 IFSTPCPY (add result, dstin, len) 144 ret 145 146 .p2align 4 147L(start_loop): 148 sub tmp, srcin, dstin 149 ldr dataq2, [srcin] 150 sub dst, src, tmp 151 str dataq2, [dstin] 152L(loop): 153 str dataq, [dst], 32 154 ldr dataq, [src, 16] 155 cmeq vhas_nul.16b, vdata.16b, 0 156 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 157 fmov synd, dend 158 cbnz synd, L(loopend) 159 str dataq, [dst, -16] 160 ldr dataq, [src, 32]! 161 cmeq vhas_nul.16b, vdata.16b, 0 162 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 163 fmov synd, dend 164 cbz synd, L(loop) 165 add dst, dst, 16 166L(loopend): 167 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 168 fmov synd, dend 169 sub dst, dst, 31 170#ifndef __AARCH64EB__ 171 rbit synd, synd 172#endif 173 clz len, synd 174 lsr len, len, 2 175 add dst, dst, len 176 ldr dataq, [dst, tmp] 177 str dataq, [dst] 178 IFSTPCPY (add result, dst, 15) 179 ret 180 181END (STRCPY) 182#endif 183