1/* ANSI C standard library function strcmp. 2 3 Copyright (c) 2001-20012 Tensilica Inc. 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include <picolibc.h> 25 26#include "xtensa-asm.h" 27 28#define MASK4 0x40404040 29 30 31#if XCHAL_HAVE_L32R 32 .literal .Lmask0, MASK0 33 .literal .Lmask1, MASK1 34 .literal .Lmask2, MASK2 35 .literal .Lmask3, MASK3 36 .literal .Lmask4, MASK4 37#endif /* XCHAL_HAVE_L32R */ 38 39 .text 40 .align 4 41 .literal_position 42 .global strcmp 43 .type strcmp, @function 44strcmp: 45 46 leaf_entry sp, 16 47 /* a2 = s1, a3 = s2 */ 48 49 l8ui a8, a2, 0 // byte 0 from s1 50 l8ui a9, a3, 0 // byte 0 from s2 51 movi a10, 3 // mask 52 bne a8, a9, .Lretdiff 53 54 or a11, a2, a3 55 bnone a11, a10, .Laligned 56 57 xor a11, a2, a3 // compare low two bits of s1 and s2 58 bany a11, a10, .Lunaligned // if they have different alignment 59 60 /* s1/s2 are not word-aligned. */ 61 addi a2, a2, 1 // advance s1 62 beqz a8, .Leq // bytes equal, if zero, strings are equal 63 addi a3, a3, 1 // advance s2 64 bnone a2, a10, .Laligned // if s1/s2 now aligned 65 l8ui a8, a2, 0 // byte 1 from s1 66 l8ui a9, a3, 0 // byte 1 from s2 67 addi a2, a2, 1 // advance s1 68 bne a8, a9, .Lretdiff // if different, return difference 69 beqz a8, .Leq // bytes equal, if zero, strings are equal 70 addi a3, a3, 1 // advance s2 71 bnone a2, a10, .Laligned // if s1/s2 now aligned 72 l8ui a8, a2, 0 // byte 2 from s1 73 l8ui a9, a3, 0 // byte 2 from s2 74 addi a2, a2, 1 // advance s1 75 bne a8, a9, .Lretdiff // if different, return difference 76 beqz a8, .Leq // bytes equal, if zero, strings are equal 77 addi a3, a3, 1 // advance s2 78 j .Laligned 79 80/* s1 and s2 have different alignment. 81 82 If the zero-overhead loop option is available, use an (almost) 83 infinite zero-overhead loop with conditional exits so we only pay 84 for taken branches when exiting the loop. 85 86 Note: It is important for this unaligned case to come before the 87 code for aligned strings, because otherwise some of the branches 88 above cannot reach and have to be transformed to branches around 89 jumps. The unaligned code is smaller and the branches can reach 90 over it. */ 91 92 .align 4 93#if XCHAL_HAVE_LOOPS 94#if XCHAL_HAVE_DENSITY 95 /* (2 mod 4) alignment for loop instruction */ 96#else 97 /* (1 mod 4) alignment for loop instruction */ 98 .byte 0 99 .byte 0 100#endif 101#endif 102.Lunaligned: 103#if XCHAL_HAVE_LOOPS 104#if XCHAL_HAVE_DENSITY 105 _movi.n a8, 0 // set up for the maximum loop count 106#else 107 _movi a8, 0 // set up for the maximum loop count 108#endif 109 loop a8, .Lretdiff // loop forever (almost anyway) 110#endif 111.Lnextbyte: 112 l8ui a8, a2, 0 113 l8ui a9, a3, 0 114 addi a2, a2, 1 115 bne a8, a9, .Lretdiff 116 addi a3, a3, 1 117#if XCHAL_HAVE_LOOPS 118 beqz a8, .Lretdiff 119#else 120 bnez a8, .Lnextbyte 121#endif 122.Lretdiff: 123 sub a2, a8, a9 124 leaf_return 125 126/* s1 is word-aligned; s2 is word-aligned. 127 128 If the zero-overhead loop option is available, use an (almost) 129 infinite zero-overhead loop with conditional exits so we only pay 130 for taken branches when exiting the loop. */ 131 132/* New algorithm, relying on the fact that all normal ASCII is between 133 32 and 127. 134 135 Rather than check all bytes for zero: 136 Take one word (4 bytes). Call it w1. 137 Shift w1 left by one into w1'. 138 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 139 Check that all 4 bit 6's (one for each byte) are one: 140 If they are, we are definitely not done. 141 If they are not, we are probably done, but need to check for zero. */ 142 143 .align 4 144#if XCHAL_HAVE_LOOPS 145#if !XCHAL_HAVE_L32R 146 /* (2 mod 4) alignment for loop instruction */ 147 .byte 0 148 .byte 0 149#endif 150.Laligned: 151#if XCHAL_HAVE_L32R 152 l32r a4, .Lmask0 // mask for byte 0 153 l32r a7, .Lmask4 154#else 155 const16 a4, MASK0@h 156 const16 a4, MASK0@l 157 const16 a7, MASK4@h 158 const16 a7, MASK4@l 159#endif 160 /* Loop forever */ 1611: 162 loop a0, .Laligned_done 163 164 /* First unrolled loop body. */ 165 l32i a8, a2, 0 // get word from s1 166 l32i a9, a3, 0 // get word from s2 167 slli a5, a8, 1 168 bne a8, a9, .Lwne2 169 or a9, a8, a5 170 bnall a9, a7, .Lprobeq 171 172 /* Second unrolled loop body. */ 173 l32i a8, a2, 4 // get word from s1+4 174 l32i a9, a3, 4 // get word from s2+4 175 slli a5, a8, 1 176 bne a8, a9, .Lwne2 177 or a9, a8, a5 178 bnall a9, a7, .Lprobeq2 179 180 addi a2, a2, 8 // advance s1 pointer 181 addi a3, a3, 8 // advance s2 pointer 182.Laligned_done: 183 j 1b 184 185.Lprobeq2: 186 /* Adjust pointers to account for the loop unrolling. */ 187 addi a2, a2, 4 188 addi a3, a3, 4 189 190#else /* !XCHAL_HAVE_LOOPS */ 191 192.Laligned: 193 movi a4, MASK0 // mask for byte 0 194 movi a7, MASK4 195 j .Lfirstword 196.Lnextword: 197 addi a2, a2, 4 // advance s1 pointer 198 addi a3, a3, 4 // advance s2 pointer 199.Lfirstword: 200 l32i a8, a2, 0 // get word from s1 201 l32i a9, a3, 0 // get word from s2 202 slli a5, a8, 1 203 bne a8, a9, .Lwne2 204 or a9, a8, a5 205 ball a9, a7, .Lnextword 206#endif /* !XCHAL_HAVE_LOOPS */ 207 208 /* align (0 mod 4) */ 209.Lprobeq: 210 /* Words are probably equal, but check for sure. 211 If not, loop over the rest of string using normal algorithm. */ 212 213 bnone a8, a4, .Leq // if byte 0 is zero 214#if XCHAL_HAVE_L32R 215 l32r a5, .Lmask1 // mask for byte 1 216 l32r a6, .Lmask2 // mask for byte 2 217 bnone a8, a5, .Leq // if byte 1 is zero 218 l32r a7, .Lmask3 // mask for byte 3 219 bnone a8, a6, .Leq // if byte 2 is zero 220 bnone a8, a7, .Leq // if byte 3 is zero 221 /* align (1 mod 4) */ 222#else 223 const16 a5, MASK1@h // mask for byte 1 224 const16 a5, MASK1@l 225 bnone a8, a5, .Leq // if byte 1 is zero 226 const16 a6, MASK2@h // mask for byte 2 227 const16 a6, MASK2@l 228 bnone a8, a6, .Leq // if byte 2 is zero 229 const16 a7, MASK3@h // mask for byte 3 230 const16 a7, MASK3@l 231 bnone a8, a7, .Leq // if byte 3 is zero 232 /* align (2 mod 4) */ 233#endif /* XCHAL_HAVE_L32R */ 234#if XCHAL_HAVE_DENSITY 235 addi.n a2, a2, 4 // advance s1 pointer 236 addi.n a3, a3, 4 // advance s2 pointer 237 /* align (1 mod 4) or (2 mod 4) */ 238#else 239 addi a2, a2, 4 // advance s1 pointer 240 addi a3, a3, 4 // advance s2 pointer 241 or a1, a1, a1 // nop 242#if !XCHAL_HAVE_L32R 243 or a1, a1, a1 // nop 244#endif 245 /* align (2 mod 4) */ 246#endif /* XCHAL_HAVE_DENSITY */ 247#if XCHAL_HAVE_LOOPS 2481: 249 loop a0, .Leq // loop forever (a4 is bigger than max iters) 250 l32i a8, a2, 0 // get word from s1 251 l32i a9, a3, 0 // get word from s2 252 addi a2, a2, 4 // advance s1 pointer 253 bne a8, a9, .Lwne 254 bnone a8, a4, .Leq // if byte 0 is zero 255 bnone a8, a5, .Leq // if byte 1 is zero 256 bnone a8, a6, .Leq // if byte 2 is zero 257 bnone a8, a7, .Leq // if byte 3 is zero 258 addi a3, a3, 4 // advance s2 pointer 259 j 1b 260#else /* !XCHAL_HAVE_LOOPS */ 261 262 j .Lfirstword2 263.Lnextword2: 264 addi a3, a3, 4 // advance s2 pointer 265.Lfirstword2: 266 l32i a8, a2, 0 // get word from s1 267 l32i a9, a3, 0 // get word from s2 268 addi a2, a2, 4 // advance s1 pointer 269 bne a8, a9, .Lwne 270 bnone a8, a4, .Leq // if byte 0 is zero 271 bnone a8, a5, .Leq // if byte 1 is zero 272 bnone a8, a6, .Leq // if byte 2 is zero 273 bany a8, a7, .Lnextword2 // if byte 3 is zero 274#endif /* !XCHAL_HAVE_LOOPS */ 275 276 /* Words are equal; some byte is zero. */ 277.Leq: movi a2, 0 // return equal 278 leaf_return 279 280.Lwne2: /* Words are not equal. On big-endian processors, if none of the 281 bytes are zero, the return value can be determined by a simple 282 comparison. */ 283#ifdef __XTENSA_EB__ 284 or a10, a8, a5 285 bnall a10, a7, .Lsomezero 286 bgeu a8, a9, .Lposreturn 287 movi a2, -1 288 leaf_return 289.Lposreturn: 290 movi a2, 1 291 leaf_return 292.Lsomezero: // There is probably some zero byte. 293#endif /* __XTENSA_EB__ */ 294.Lwne: /* Words are not equal. */ 295 xor a2, a8, a9 // get word with nonzero in byte that differs 296 bany a2, a4, .Ldiff0 // if byte 0 differs 297 movi a5, MASK1 // mask for byte 1 298 bnone a8, a4, .Leq // if byte 0 is zero 299 bany a2, a5, .Ldiff1 // if byte 1 differs 300 movi a6, MASK2 // mask for byte 2 301 bnone a8, a5, .Leq // if byte 1 is zero 302 bany a2, a6, .Ldiff2 // if byte 2 differs 303 bnone a8, a6, .Leq // if byte 2 is zero 304#ifdef __XTENSA_EB__ 305.Ldiff3: 306.Ldiff2: 307.Ldiff1: 308 /* Byte 0 is equal (at least) and there is a difference before a zero 309 byte. Just subtract words to get the return value. 310 The high order equal bytes cancel, leaving room for the sign. */ 311 sub a2, a8, a9 312 leaf_return 313 314.Ldiff0: 315 /* Need to make room for the sign, so can't subtract whole words. */ 316 extui a10, a8, 24, 8 317 extui a11, a9, 24, 8 318 sub a2, a10, a11 319 leaf_return 320 321#else /* !__XTENSA_EB__ */ 322 /* Little-endian is a little more difficult because can't subtract 323 whole words. */ 324.Ldiff3: 325 /* Bytes 0-2 are equal; byte 3 is different. 326 For little-endian need to have a sign bit for the difference. */ 327 extui a10, a8, 24, 8 328 extui a11, a9, 24, 8 329 sub a2, a10, a11 330 leaf_return 331 332.Ldiff0: 333 /* Byte 0 is different. */ 334 extui a10, a8, 0, 8 335 extui a11, a9, 0, 8 336 sub a2, a10, a11 337 leaf_return 338 339.Ldiff1: 340 /* Byte 0 is equal; byte 1 is different. */ 341 extui a10, a8, 8, 8 342 extui a11, a9, 8, 8 343 sub a2, a10, a11 344 leaf_return 345 346.Ldiff2: 347 /* Bytes 0-1 are equal; byte 2 is different. */ 348 extui a10, a8, 16, 8 349 extui a11, a9, 16, 8 350 sub a2, a10, a11 351 leaf_return 352 353#endif /* !__XTENSA_EB */ 354 355 .size strcmp, . - strcmp 356