1/* ANSI C standard library function strcmp. 2 3 Copyright (c) 2001-20012 Tensilica Inc. 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "xtensa-asm.h" 25 26#define MASK4 0x40404040 27 28 29#if XCHAL_HAVE_L32R 30 .literal .Lmask0, MASK0 31 .literal .Lmask1, MASK1 32 .literal .Lmask2, MASK2 33 .literal .Lmask3, MASK3 34 .literal .Lmask4, MASK4 35#endif /* XCHAL_HAVE_L32R */ 36 37 .text 38 .align 4 39 .literal_position 40 .global strcmp 41 .type strcmp, @function 42strcmp: 43 44 leaf_entry sp, 16 45 /* a2 = s1, a3 = s2 */ 46 47 l8ui a8, a2, 0 // byte 0 from s1 48 l8ui a9, a3, 0 // byte 0 from s2 49 movi a10, 3 // mask 50 bne a8, a9, .Lretdiff 51 52 or a11, a2, a3 53 bnone a11, a10, .Laligned 54 55 xor a11, a2, a3 // compare low two bits of s1 and s2 56 bany a11, a10, .Lunaligned // if they have different alignment 57 58 /* s1/s2 are not word-aligned. */ 59 addi a2, a2, 1 // advance s1 60 beqz a8, .Leq // bytes equal, if zero, strings are equal 61 addi a3, a3, 1 // advance s2 62 bnone a2, a10, .Laligned // if s1/s2 now aligned 63 l8ui a8, a2, 0 // byte 1 from s1 64 l8ui a9, a3, 0 // byte 1 from s2 65 addi a2, a2, 1 // advance s1 66 bne a8, a9, .Lretdiff // if different, return difference 67 beqz a8, .Leq // bytes equal, if zero, strings are equal 68 addi a3, a3, 1 // advance s2 69 bnone a2, a10, .Laligned // if s1/s2 now aligned 70 l8ui a8, a2, 0 // byte 2 from s1 71 l8ui a9, a3, 0 // byte 2 from s2 72 addi a2, a2, 1 // advance s1 73 bne a8, a9, .Lretdiff // if different, return difference 74 beqz a8, .Leq // bytes equal, if zero, strings are equal 75 addi a3, a3, 1 // advance s2 76 j .Laligned 77 78/* s1 and s2 have different alignment. 79 80 If the zero-overhead loop option is available, use an (almost) 81 infinite zero-overhead loop with conditional exits so we only pay 82 for taken branches when exiting the loop. 83 84 Note: It is important for this unaligned case to come before the 85 code for aligned strings, because otherwise some of the branches 86 above cannot reach and have to be transformed to branches around 87 jumps. The unaligned code is smaller and the branches can reach 88 over it. */ 89 90 .align 4 91#if XCHAL_HAVE_LOOPS 92#if XCHAL_HAVE_DENSITY 93 /* (2 mod 4) alignment for loop instruction */ 94#else 95 /* (1 mod 4) alignment for loop instruction */ 96 .byte 0 97 .byte 0 98#endif 99#endif 100.Lunaligned: 101#if XCHAL_HAVE_LOOPS 102#if XCHAL_HAVE_DENSITY 103 _movi.n a8, 0 // set up for the maximum loop count 104#else 105 _movi a8, 0 // set up for the maximum loop count 106#endif 107 loop a8, .Lretdiff // loop forever (almost anyway) 108#endif 109.Lnextbyte: 110 l8ui a8, a2, 0 111 l8ui a9, a3, 0 112 addi a2, a2, 1 113 bne a8, a9, .Lretdiff 114 addi a3, a3, 1 115#if XCHAL_HAVE_LOOPS 116 beqz a8, .Lretdiff 117#else 118 bnez a8, .Lnextbyte 119#endif 120.Lretdiff: 121 sub a2, a8, a9 122 leaf_return 123 124/* s1 is word-aligned; s2 is word-aligned. 125 126 If the zero-overhead loop option is available, use an (almost) 127 infinite zero-overhead loop with conditional exits so we only pay 128 for taken branches when exiting the loop. */ 129 130/* New algorithm, relying on the fact that all normal ASCII is between 131 32 and 127. 132 133 Rather than check all bytes for zero: 134 Take one word (4 bytes). Call it w1. 135 Shift w1 left by one into w1'. 136 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 137 Check that all 4 bit 6's (one for each byte) are one: 138 If they are, we are definitely not done. 139 If they are not, we are probably done, but need to check for zero. */ 140 141 .align 4 142#if XCHAL_HAVE_LOOPS 143#if !XCHAL_HAVE_L32R 144 /* (2 mod 4) alignment for loop instruction */ 145 .byte 0 146 .byte 0 147#endif 148.Laligned: 149#if XCHAL_HAVE_L32R 150 l32r a4, .Lmask0 // mask for byte 0 151 l32r a7, .Lmask4 152#else 153 const16 a4, MASK0@h 154 const16 a4, MASK0@l 155 const16 a7, MASK4@h 156 const16 a7, MASK4@l 157#endif 158 /* Loop forever */ 1591: 160 loop a0, .Laligned_done 161 162 /* First unrolled loop body. */ 163 l32i a8, a2, 0 // get word from s1 164 l32i a9, a3, 0 // get word from s2 165 slli a5, a8, 1 166 bne a8, a9, .Lwne2 167 or a9, a8, a5 168 bnall a9, a7, .Lprobeq 169 170 /* Second unrolled loop body. */ 171 l32i a8, a2, 4 // get word from s1+4 172 l32i a9, a3, 4 // get word from s2+4 173 slli a5, a8, 1 174 bne a8, a9, .Lwne2 175 or a9, a8, a5 176 bnall a9, a7, .Lprobeq2 177 178 addi a2, a2, 8 // advance s1 pointer 179 addi a3, a3, 8 // advance s2 pointer 180.Laligned_done: 181 j 1b 182 183.Lprobeq2: 184 /* Adjust pointers to account for the loop unrolling. */ 185 addi a2, a2, 4 186 addi a3, a3, 4 187 188#else /* !XCHAL_HAVE_LOOPS */ 189 190.Laligned: 191 movi a4, MASK0 // mask for byte 0 192 movi a7, MASK4 193 j .Lfirstword 194.Lnextword: 195 addi a2, a2, 4 // advance s1 pointer 196 addi a3, a3, 4 // advance s2 pointer 197.Lfirstword: 198 l32i a8, a2, 0 // get word from s1 199 l32i a9, a3, 0 // get word from s2 200 slli a5, a8, 1 201 bne a8, a9, .Lwne2 202 or a9, a8, a5 203 ball a9, a7, .Lnextword 204#endif /* !XCHAL_HAVE_LOOPS */ 205 206 /* align (0 mod 4) */ 207.Lprobeq: 208 /* Words are probably equal, but check for sure. 209 If not, loop over the rest of string using normal algorithm. */ 210 211 bnone a8, a4, .Leq // if byte 0 is zero 212#if XCHAL_HAVE_L32R 213 l32r a5, .Lmask1 // mask for byte 1 214 l32r a6, .Lmask2 // mask for byte 2 215 bnone a8, a5, .Leq // if byte 1 is zero 216 l32r a7, .Lmask3 // mask for byte 3 217 bnone a8, a6, .Leq // if byte 2 is zero 218 bnone a8, a7, .Leq // if byte 3 is zero 219 /* align (1 mod 4) */ 220#else 221 const16 a5, MASK1@h // mask for byte 1 222 const16 a5, MASK1@l 223 bnone a8, a5, .Leq // if byte 1 is zero 224 const16 a6, MASK2@h // mask for byte 2 225 const16 a6, MASK2@l 226 bnone a8, a6, .Leq // if byte 2 is zero 227 const16 a7, MASK3@h // mask for byte 3 228 const16 a7, MASK3@l 229 bnone a8, a7, .Leq // if byte 3 is zero 230 /* align (2 mod 4) */ 231#endif /* XCHAL_HAVE_L32R */ 232#if XCHAL_HAVE_DENSITY 233 addi.n a2, a2, 4 // advance s1 pointer 234 addi.n a3, a3, 4 // advance s2 pointer 235 /* align (1 mod 4) or (2 mod 4) */ 236#else 237 addi a2, a2, 4 // advance s1 pointer 238 addi a3, a3, 4 // advance s2 pointer 239 or a1, a1, a1 // nop 240#if !XCHAL_HAVE_L32R 241 or a1, a1, a1 // nop 242#endif 243 /* align (2 mod 4) */ 244#endif /* XCHAL_HAVE_DENSITY */ 245#if XCHAL_HAVE_LOOPS 2461: 247 loop a0, .Leq // loop forever (a4 is bigger than max iters) 248 l32i a8, a2, 0 // get word from s1 249 l32i a9, a3, 0 // get word from s2 250 addi a2, a2, 4 // advance s1 pointer 251 bne a8, a9, .Lwne 252 bnone a8, a4, .Leq // if byte 0 is zero 253 bnone a8, a5, .Leq // if byte 1 is zero 254 bnone a8, a6, .Leq // if byte 2 is zero 255 bnone a8, a7, .Leq // if byte 3 is zero 256 addi a3, a3, 4 // advance s2 pointer 257 j 1b 258#else /* !XCHAL_HAVE_LOOPS */ 259 260 j .Lfirstword2 261.Lnextword2: 262 addi a3, a3, 4 // advance s2 pointer 263.Lfirstword2: 264 l32i a8, a2, 0 // get word from s1 265 l32i a9, a3, 0 // get word from s2 266 addi a2, a2, 4 // advance s1 pointer 267 bne a8, a9, .Lwne 268 bnone a8, a4, .Leq // if byte 0 is zero 269 bnone a8, a5, .Leq // if byte 1 is zero 270 bnone a8, a6, .Leq // if byte 2 is zero 271 bany a8, a7, .Lnextword2 // if byte 3 is zero 272#endif /* !XCHAL_HAVE_LOOPS */ 273 274 /* Words are equal; some byte is zero. */ 275.Leq: movi a2, 0 // return equal 276 leaf_return 277 278.Lwne2: /* Words are not equal. On big-endian processors, if none of the 279 bytes are zero, the return value can be determined by a simple 280 comparison. */ 281#ifdef __XTENSA_EB__ 282 or a10, a8, a5 283 bnall a10, a7, .Lsomezero 284 bgeu a8, a9, .Lposreturn 285 movi a2, -1 286 leaf_return 287.Lposreturn: 288 movi a2, 1 289 leaf_return 290.Lsomezero: // There is probably some zero byte. 291#endif /* __XTENSA_EB__ */ 292.Lwne: /* Words are not equal. */ 293 xor a2, a8, a9 // get word with nonzero in byte that differs 294 bany a2, a4, .Ldiff0 // if byte 0 differs 295 movi a5, MASK1 // mask for byte 1 296 bnone a8, a4, .Leq // if byte 0 is zero 297 bany a2, a5, .Ldiff1 // if byte 1 differs 298 movi a6, MASK2 // mask for byte 2 299 bnone a8, a5, .Leq // if byte 1 is zero 300 bany a2, a6, .Ldiff2 // if byte 2 differs 301 bnone a8, a6, .Leq // if byte 2 is zero 302#ifdef __XTENSA_EB__ 303.Ldiff3: 304.Ldiff2: 305.Ldiff1: 306 /* Byte 0 is equal (at least) and there is a difference before a zero 307 byte. Just subtract words to get the return value. 308 The high order equal bytes cancel, leaving room for the sign. */ 309 sub a2, a8, a9 310 leaf_return 311 312.Ldiff0: 313 /* Need to make room for the sign, so can't subtract whole words. */ 314 extui a10, a8, 24, 8 315 extui a11, a9, 24, 8 316 sub a2, a10, a11 317 leaf_return 318 319#else /* !__XTENSA_EB__ */ 320 /* Little-endian is a little more difficult because can't subtract 321 whole words. */ 322.Ldiff3: 323 /* Bytes 0-2 are equal; byte 3 is different. 324 For little-endian need to have a sign bit for the difference. */ 325 extui a10, a8, 24, 8 326 extui a11, a9, 24, 8 327 sub a2, a10, a11 328 leaf_return 329 330.Ldiff0: 331 /* Byte 0 is different. */ 332 extui a10, a8, 0, 8 333 extui a11, a9, 0, 8 334 sub a2, a10, a11 335 leaf_return 336 337.Ldiff1: 338 /* Byte 0 is equal; byte 1 is different. */ 339 extui a10, a8, 8, 8 340 extui a11, a9, 8, 8 341 sub a2, a10, a11 342 leaf_return 343 344.Ldiff2: 345 /* Bytes 0-1 are equal; byte 2 is different. */ 346 extui a10, a8, 16, 8 347 extui a11, a9, 16, 8 348 sub a2, a10, a11 349 leaf_return 350 351#endif /* !__XTENSA_EB */ 352 353 .size strcmp, . - strcmp 354