1/* ANSI C standard library function strcmp. 2 3 Copyright (c) 2001-20012 Tensilica Inc. 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "xtensa-asm.h" 25 26#define MASK4 0x40404040 27 28 29#if XCHAL_HAVE_L32R 30 .literal .Lmask0, MASK0 31 .literal .Lmask1, MASK1 32 .literal .Lmask2, MASK2 33 .literal .Lmask3, MASK3 34 .literal .Lmask4, MASK4 35#endif /* XCHAL_HAVE_L32R */ 36 37 .text 38 .align 4 39 .literal_position 40 .global strcmp 41 .type strcmp, @function 42strcmp: 43 44#if XCHAL_HAVE_L32R && XCHAL_HAVE_LOOPS && XCHAL_HAVE_DENSITY && __XTENSA_EL__ && XCHAL_HAVE_FLIX3 45/* Fast version for FLIX3 Little Endian */ 46 47 48 leaf_entry sp, 16 49 /* a2 = s1, a3 = s2 */ 50 51 l8ui a8, a2, 0 // byte 0 from s1 52 l8ui a9, a3, 0 // byte 0 from s2 53 movi a10, 3 // mask 54 movi a5, 0xfffffffc 55 or a11, a2, a3 56 l32r a4, .Lmask0 // mask for byte 0 57 l32r a7, .Lmask4 58 addi a3, a3, -8 59 addi a2, a2, -8 60 and a5, a5, a2 61 bne.w18 a8, a9, .Lretdiff 62 l32i a8, a5, 8 // get word from aligned variant of s1 63 64 bany.w18 a11, a10, .Lnot_aligned 65 66/* s1 is word-aligned; s2 is word-aligned. 67 68 If the zero-overhead loop option is available, use an (almost) 69 infinite zero-overhead loop with conditional exits so we only pay 70 for taken branches when exiting the loop. */ 71 72/* New algorithm, relying on the fact that all normal ASCII is between 73 32 and 127. 74 75 Rather than check all bytes for zero: 76 Take one word (4 bytes). Call it w1. 77 Shift w1 left by one into w1'. 78 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 79 Check that all 4 bit 6's (one for each byte) are one: 80 If they are, we are definitely not done. 81 If they are not, we are probably done, but need to check for zero. */ 82 83.Laligned: 84 /* Loop forever */ 851: 86 loop a0, .Laligned_done 87 88 /* First unrolled loop body. */ 89 l32i a9, a3, 8 // get word from s2 90 addi a3, a3, 8 // advance s2 pointer 91 slli a5, a8, 1 92 or a10, a8, a5 93 {l32i a11, a2, 12 // get word from s1+4 94 bne.w18 a8, a9, .Lwne2} 95 l32i a9, a3, 4 // get word from s2+4 96 bnall.w18 a10, a7, .Lprobeq 97 98 /* Second unrolled loop body. */ 99 slli a5, a11, 1 100 or a10, a11, a5 101 addi a2, a2, 8 // advance s1 pointer 102 mov a8, a11 103 bne.w18 a11, a9, .Lwne2 104 l32i a8, a2, 8 // get word from s1 105 bnall.w18 a10, a7, .Lprobeq2 106 107.Laligned_done: 108 l32i a8, a2, 8 // get word from s1 109 j 1b 110 111.Lnot_aligned: 112 xor a11, a2, a3 // compare low two bits of s1 and s2 113 bany a11, a10, .Lunaligned // if they have different alignment 114 115 /* s1/s2 are not word-aligned. */ 116 movi a5, 0xfffffffc 117 addi a2, a2, 1 // advance s1 118 beqz a9, .Leq // bytes equal, if zero, strings are equal 119 addi a3, a3, 1 // advance s2 120 and a6, a2, a5 121 l32i a8, a6, 8 // get word from s1 122 bnone a2, a10, .Laligned // if s1/s2 now aligned 123 l8ui a8, a2, 8 // byte 1 from s1 124 l8ui a9, a3, 8 // byte 1 from s2 125 addi a2, a2, 1 // advance s1 126 bne a8, a9, .Lretdiff // if different, return difference 127 beqz a8, .Leq // bytes equal, if zero, strings are equal 128 addi a3, a3, 1 // advance s2 129 and a6, a2, a5 130 l32i a8, a6, 8 // get word from s1 131 bnone a2, a10, .Laligned // if s1/s2 now aligned 132 l8ui a8, a2, 8 // byte 2 from s1 133 l8ui a9, a3, 8 // byte 2 from s2 134 addi a2, a2, 1 // advance s1 135 bne a8, a9, .Lretdiff // if different, return difference 136 beqz a8, .Leq // bytes equal, if zero, strings are equal 137 addi a3, a3, 1 // advance s2 138 l32i a8, a2, 8 // get word from s1 139 j .Laligned 140 141/* s1 and s2 have different alignment. 142 143 If the zero-overhead loop option is available, use an (almost) 144 infinite zero-overhead loop with conditional exits so we only pay 145 for taken branches when exiting the loop. 146 147 Note: It is important for this unaligned case to come before the 148 code for aligned strings, because otherwise some of the branches 149 above cannot reach and have to be transformed to branches around 150 jumps. The unaligned code is smaller and the branches can reach 151 over it. */ 152 153.Lunaligned: 154 movi.n a8, 0 // set up for the maximum loop count 155 loop a8, .Lretdiff // loop forever (almost anyway) 156 l8ui a8, a2, 8 157 l8ui a9, a3, 8 158 addi a2, a2, 1 159 bne a8, a9, .Lretdiff 160 addi a3, a3, 1 161 beqz a8, .Lretdiff 162.Lretdiff: 163 sub a2, a8, a9 164 leaf_return 165 166 167.Lprobeq2: 168 /* Adjust pointers to account for the loop unrolling. */ 169 mov a8, a11 170 addi a2, a2, -4 171 addi a3, a3, 4 172 173 /* align (0 mod 4) */ 174.Lprobeq: 175 /* Words are probably equal, but check for sure. 176 If not, loop over the rest of string using normal algorithm. */ 177 178 bnone a8, a4, .Leq // if byte 0 is zero 179 l32r a5, .Lmask1 // mask for byte 1 180 l32r a6, .Lmask2 // mask for byte 2 181 bnone a8, a5, .Leq // if byte 1 is zero 182 l32r a7, .Lmask3 // mask for byte 3 183 bnone a8, a6, .Leq // if byte 2 is zero 184 bnone a8, a7, .Leq // if byte 3 is zero 185 /* align (1 mod 4) */ 186 addi.n a2, a2, 12 // advance s1 pointer 187 addi.n a3, a3, 4 // advance s2 pointer 188 /* align (1 mod 4) or (2 mod 4) */ 1891: 190 loop a0, .Lend // loop forever (a4 is bigger than max iters) 191 192 l32i a11, a2, 0 // get word from s1 193 l32i a9, a3, 0 // get word from s2 194 addi a2, a2, 4 // advance s1 pointer 195 bne a11, a9, .Lwne 196 bnone a11, a4, .Leq // if byte 0 is zero 197 bnone a11, a5, .Leq // if byte 1 is zero 198 bnone a11, a6, .Leq // if byte 2 is zero 199 bnone a11, a7, .Leq // if byte 3 is zero 200 addi a3, a3, 4 // advance s2 pointer 201.Lend: 202 j 1b 203 204 /* Words are equal; some byte is zero. */ 205.Leq: movi a2, 0 // return equal 206 leaf_return 207 208.Lwne2: /* Words are not equal. On big-endian processors, if none of the 209 bytes are zero, the return value can be determined by a simple 210 comparison. */ 211.Lwne: /* Words are not equal. */ 212 xor a2, a8, a9 // get word with nonzero in byte that differs 213 extui a10, a8, 0, 8 214 extui a11, a9, 0, 8 215 movi a5, MASK1 // mask for byte 1 216 bany.w18 a2, a4, .Ldiff0 // if byte 0 differs 217 218 bnone.w18 a8, a4, .Leq // if byte 0 is zero 219 movi a6, MASK2 // mask for byte 2 220 bany.w18 a2, a5, .Ldiff1 // if byte 1 differs 221 extui a10, a8, 24, 8 222 bnone.w18 a8, a5, .Leq // if byte 1 is zero 223 extui a11, a9, 24, 8 224 bany.w18 a2, a6, .Ldiff2 // if byte 2 differs 225 sub a2, a10, a11 226 bnone.w18 a8, a6, .Leq // if byte 2 is zero 227 /* Little-endian is a little more difficult because can't subtract 228 whole words. */ 229.Ldiff3: 230 /* Bytes 0-2 are equal; byte 3 is different. 231 For little-endian need to have a sign bit for the difference. */ 232 leaf_return 233.Ldiff0: 234 /* Byte 0 is different. */ 235 sub a2, a10, a11 236 leaf_return 237 238.Ldiff1: 239 /* Byte 0 is equal; byte 1 is different. */ 240 extui a10, a8, 8, 8 241 extui a11, a9, 8, 8 242 sub a2, a10, a11 243 leaf_return 244 245.Ldiff2: 246 /* Bytes 0-1 are equal; byte 2 is different. */ 247 extui a10, a8, 16, 8 248 extui a11, a9, 16, 8 249 sub a2, a10, a11 250 leaf_return 251 252#else 253#if XCHAL_HAVE_L32R && XCHAL_HAVE_LOOPS && XCHAL_HAVE_DENSITY && __XTENSA_EL__ && XCHAL_HAVE_PDX4 254/* Fast version for FLIX3 Little Endian */ 255 256 257 leaf_entry sp, 16 258 /* a2 = s1, a3 = s2 */ 259 260 l8ui a8, a2, 0 // byte 0 from s1 261 l8ui a9, a3, 0 // byte 0 from s2 262 movi a10, 3 // mask 263 movi a5, 0xfffffffc 264 or a11, a2, a3 265 l32r a4, .Lmask0 // mask for byte 0 266 l32r a7, .Lmask4 267 addi a3, a3, -8 268 addi a2, a2, -8 269 and a5, a5, a2 270 bne.w15 a8, a9, .Lretdiff 271 l32i a8, a5, 8 // get word from aligned variant of s1 272 273 bany.w15 a11, a10, .Lnot_aligned 274 275/* s1 is word-aligned; s2 is word-aligned. 276 277 If the zero-overhead loop option is available, use an (almost) 278 infinite zero-overhead loop with conditional exits so we only pay 279 for taken branches when exiting the loop. */ 280 281/* New algorithm, relying on the fact that all normal ASCII is between 282 32 and 127. 283 284 Rather than check all bytes for zero: 285 Take one word (4 bytes). Call it w1. 286 Shift w1 left by one into w1'. 287 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 288 Check that all 4 bit 6's (one for each byte) are one: 289 If they are, we are definitely not done. 290 If they are not, we are probably done, but need to check for zero. */ 291 292.Laligned: 293 /* Loop forever */ 2941: 295 loop a0, .Laligned_done 296 297 /* First unrolled loop body. */ 298 l32i a9, a3, 8 // get word from s2 299 addi a3, a3, 8 // advance s2 pointer 300 slli a5, a8, 1 301 or a10, a8, a5 302 { 303 bne.w15 a8, a9, .Lwne2 304 l32i a11, a2, 12 // get word from s1+4 305 nop 306 nop 307 } 308 l32i a9, a3, 4 // get word from s2+4 309 bnall.w15 a10, a7, .Lprobeq 310 311 /* Second unrolled loop body. */ 312 slli a5, a11, 1 313 or a10, a11, a5 314 addi a2, a2, 8 // advance s1 pointer 315 mov a8, a11 316 bne.w15 a11, a9, .Lwne2 317 l32i a8, a2, 8 // get word from s1 318 bnall.w15 a10, a7, .Lprobeq2 319 320.Laligned_done: 321 l32i a8, a2, 8 // get word from s1 322 j 1b 323 324.Lnot_aligned: 325 xor a11, a2, a3 // compare low two bits of s1 and s2 326 bany a11, a10, .Lunaligned // if they have different alignment 327 328 /* s1/s2 are not word-aligned. */ 329 movi a5, 0xfffffffc 330 addi a2, a2, 1 // advance s1 331 beqz a9, .Leq // bytes equal, if zero, strings are equal 332 addi a3, a3, 1 // advance s2 333 and a6, a2, a5 334 l32i a8, a6, 8 // get word from s1 335 bnone a2, a10, .Laligned // if s1/s2 now aligned 336 l8ui a8, a2, 8 // byte 1 from s1 337 l8ui a9, a3, 8 // byte 1 from s2 338 addi a2, a2, 1 // advance s1 339 bne a8, a9, .Lretdiff // if different, return difference 340 beqz a8, .Leq // bytes equal, if zero, strings are equal 341 addi a3, a3, 1 // advance s2 342 and a6, a2, a5 343 l32i a8, a6, 8 // get word from s1 344 bnone a2, a10, .Laligned // if s1/s2 now aligned 345 l8ui a8, a2, 8 // byte 2 from s1 346 l8ui a9, a3, 8 // byte 2 from s2 347 addi a2, a2, 1 // advance s1 348 bne a8, a9, .Lretdiff // if different, return difference 349 beqz a8, .Leq // bytes equal, if zero, strings are equal 350 addi a3, a3, 1 // advance s2 351 l32i a8, a2, 8 // get word from s1 352 j .Laligned 353 354/* s1 and s2 have different alignment. 355 356 If the zero-overhead loop option is available, use an (almost) 357 infinite zero-overhead loop with conditional exits so we only pay 358 for taken branches when exiting the loop. 359 360 Note: It is important for this unaligned case to come before the 361 code for aligned strings, because otherwise some of the branches 362 above cannot reach and have to be transformed to branches around 363 jumps. The unaligned code is smaller and the branches can reach 364 over it. */ 365 366.Lunaligned: 367 movi.n a8, 0 // set up for the maximum loop count 368 loop a8, .Lretdiff // loop forever (almost anyway) 369 l8ui a8, a2, 8 370 l8ui a9, a3, 8 371 addi a2, a2, 1 372 bne a8, a9, .Lretdiff 373 addi a3, a3, 1 374 beqz a8, .Lretdiff 375.Lretdiff: 376 sub a2, a8, a9 377 leaf_return 378 379 380.Lprobeq2: 381 /* Adjust pointers to account for the loop unrolling. */ 382 mov a8, a11 383 addi a2, a2, -4 384 addi a3, a3, 4 385 386 /* align (0 mod 4) */ 387.Lprobeq: 388 /* Words are probably equal, but check for sure. 389 If not, loop over the rest of string using normal algorithm. */ 390 391 bnone a8, a4, .Leq // if byte 0 is zero 392 l32r a5, .Lmask1 // mask for byte 1 393 l32r a6, .Lmask2 // mask for byte 2 394 bnone a8, a5, .Leq // if byte 1 is zero 395 l32r a7, .Lmask3 // mask for byte 3 396 bnone a8, a6, .Leq // if byte 2 is zero 397 bnone a8, a7, .Leq // if byte 3 is zero 398 /* align (1 mod 4) */ 399 addi.n a2, a2, 12 // advance s1 pointer 400 addi.n a3, a3, 4 // advance s2 pointer 401 /* align (1 mod 4) or (2 mod 4) */ 4021: 403 loop a0, .Lend // loop forever (a4 is bigger than max iters) 404 405 l32i a11, a2, 0 // get word from s1 406 l32i a9, a3, 0 // get word from s2 407 addi a2, a2, 4 // advance s1 pointer 408 bne a11, a9, .Lwne 409 bnone a11, a4, .Leq // if byte 0 is zero 410 bnone a11, a5, .Leq // if byte 1 is zero 411 bnone a11, a6, .Leq // if byte 2 is zero 412 bnone a11, a7, .Leq // if byte 3 is zero 413 addi a3, a3, 4 // advance s2 pointer 414.Lend: 415 j 1b 416 417 /* Words are equal; some byte is zero. */ 418.Leq: movi a2, 0 // return equal 419 leaf_return 420 421.Lwne2: /* Words are not equal. On big-endian processors, if none of the 422 bytes are zero, the return value can be determined by a simple 423 comparison. */ 424.Lwne: /* Words are not equal. */ 425 xor a2, a8, a9 // get word with nonzero in byte that differs 426 extui a10, a8, 0, 8 427 extui a11, a9, 0, 8 428 movi a5, MASK1 // mask for byte 1 429 bany.w15 a2, a4, .Ldiff0 // if byte 0 differs 430 431 bnone.w15 a8, a4, .Leq // if byte 0 is zero 432 movi a6, MASK2 // mask for byte 2 433 bany.w15 a2, a5, .Ldiff1 // if byte 1 differs 434 extui a10, a8, 24, 8 435 bnone.w15 a8, a5, .Leq // if byte 1 is zero 436 extui a11, a9, 24, 8 437 bany.w15 a2, a6, .Ldiff2 // if byte 2 differs 438 sub a2, a10, a11 439 bnone.w15 a8, a6, .Leq // if byte 2 is zero 440 /* Little-endian is a little more difficult because can't subtract 441 whole words. */ 442.Ldiff3: 443 /* Bytes 0-2 are equal; byte 3 is different. 444 For little-endian need to have a sign bit for the difference. */ 445 leaf_return 446.Ldiff0: 447 /* Byte 0 is different. */ 448 sub a2, a10, a11 449 leaf_return 450 451.Ldiff1: 452 /* Byte 0 is equal; byte 1 is different. */ 453 extui a10, a8, 8, 8 454 extui a11, a9, 8, 8 455 sub a2, a10, a11 456 leaf_return 457 458.Ldiff2: 459 /* Bytes 0-1 are equal; byte 2 is different. */ 460 extui a10, a8, 16, 8 461 extui a11, a9, 16, 8 462 sub a2, a10, a11 463 leaf_return 464 465 466#else /* Not FLIX3 */ 467 leaf_entry sp, 16 468 /* a2 = s1, a3 = s2 */ 469 470 l8ui a8, a2, 0 // byte 0 from s1 471 l8ui a9, a3, 0 // byte 0 from s2 472 movi a10, 3 // mask 473 bne a8, a9, .Lretdiff 474 475 or a11, a2, a3 476 bnone a11, a10, .Laligned 477 478 xor a11, a2, a3 // compare low two bits of s1 and s2 479 bany a11, a10, .Lunaligned // if they have different alignment 480 481 /* s1/s2 are not word-aligned. */ 482 addi a2, a2, 1 // advance s1 483 beqz a8, .Leq // bytes equal, if zero, strings are equal 484 addi a3, a3, 1 // advance s2 485 bnone a2, a10, .Laligned // if s1/s2 now aligned 486 l8ui a8, a2, 0 // byte 1 from s1 487 l8ui a9, a3, 0 // byte 1 from s2 488 addi a2, a2, 1 // advance s1 489 bne a8, a9, .Lretdiff // if different, return difference 490 beqz a8, .Leq // bytes equal, if zero, strings are equal 491 addi a3, a3, 1 // advance s2 492 bnone a2, a10, .Laligned // if s1/s2 now aligned 493 l8ui a8, a2, 0 // byte 2 from s1 494 l8ui a9, a3, 0 // byte 2 from s2 495 addi a2, a2, 1 // advance s1 496 bne a8, a9, .Lretdiff // if different, return difference 497 beqz a8, .Leq // bytes equal, if zero, strings are equal 498 addi a3, a3, 1 // advance s2 499 j .Laligned 500 501/* s1 and s2 have different alignment. 502 503 If the zero-overhead loop option is available, use an (almost) 504 infinite zero-overhead loop with conditional exits so we only pay 505 for taken branches when exiting the loop. 506 507 Note: It is important for this unaligned case to come before the 508 code for aligned strings, because otherwise some of the branches 509 above cannot reach and have to be transformed to branches around 510 jumps. The unaligned code is smaller and the branches can reach 511 over it. */ 512 513 .align 4 514#if XCHAL_HAVE_LOOPS 515#if XCHAL_HAVE_DENSITY 516 /* (2 mod 4) alignment for loop instruction */ 517#else 518 /* (1 mod 4) alignment for loop instruction */ 519 .byte 0 520 .byte 0 521#endif 522#endif 523.Lunaligned: 524#if XCHAL_HAVE_LOOPS 525#if XCHAL_HAVE_DENSITY 526 _movi.n a8, 0 // set up for the maximum loop count 527#else 528 _movi a8, 0 // set up for the maximum loop count 529#endif 530 loop a8, .Lretdiff // loop forever (almost anyway) 531#endif 532.Lnextbyte: 533 l8ui a8, a2, 0 534 l8ui a9, a3, 0 535 addi a2, a2, 1 536 bne a8, a9, .Lretdiff 537 addi a3, a3, 1 538#if XCHAL_HAVE_LOOPS 539 beqz a8, .Lretdiff 540#else 541 bnez a8, .Lnextbyte 542#endif 543.Lretdiff: 544 sub a2, a8, a9 545 leaf_return 546 547/* s1 is word-aligned; s2 is word-aligned. 548 549 If the zero-overhead loop option is available, use an (almost) 550 infinite zero-overhead loop with conditional exits so we only pay 551 for taken branches when exiting the loop. */ 552 553/* New algorithm, relying on the fact that all normal ASCII is between 554 32 and 127. 555 556 Rather than check all bytes for zero: 557 Take one word (4 bytes). Call it w1. 558 Shift w1 left by one into w1'. 559 Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. 560 Check that all 4 bit 6's (one for each byte) are one: 561 If they are, we are definitely not done. 562 If they are not, we are probably done, but need to check for zero. */ 563 564 .align 4 565#if XCHAL_HAVE_LOOPS 566#if !XCHAL_HAVE_L32R 567 /* (2 mod 4) alignment for loop instruction */ 568 .byte 0 569 .byte 0 570#endif 571.Laligned: 572#if XCHAL_HAVE_L32R 573 l32r a4, .Lmask0 // mask for byte 0 574 l32r a7, .Lmask4 575#else 576 const16 a4, MASK0@h 577 const16 a4, MASK0@l 578 const16 a7, MASK4@h 579 const16 a7, MASK4@l 580#endif 581 /* Loop forever */ 5821: 583 loop a0, .Laligned_done 584 585 /* First unrolled loop body. */ 586 l32i a8, a2, 0 // get word from s1 587 l32i a9, a3, 0 // get word from s2 588 slli a5, a8, 1 589 bne a8, a9, .Lwne2 590 or a9, a8, a5 591 bnall a9, a7, .Lprobeq 592 593 /* Second unrolled loop body. */ 594 l32i a8, a2, 4 // get word from s1+4 595 l32i a9, a3, 4 // get word from s2+4 596 slli a5, a8, 1 597 bne a8, a9, .Lwne2 598 or a9, a8, a5 599 bnall a9, a7, .Lprobeq2 600 601 addi a2, a2, 8 // advance s1 pointer 602 addi a3, a3, 8 // advance s2 pointer 603.Laligned_done: 604 j 1b 605 606.Lprobeq2: 607 /* Adjust pointers to account for the loop unrolling. */ 608 addi a2, a2, 4 609 addi a3, a3, 4 610 611#else /* !XCHAL_HAVE_LOOPS */ 612 613.Laligned: 614 movi a4, MASK0 // mask for byte 0 615 movi a7, MASK4 616 j .Lfirstword 617.Lnextword: 618 addi a2, a2, 4 // advance s1 pointer 619 addi a3, a3, 4 // advance s2 pointer 620.Lfirstword: 621 l32i a8, a2, 0 // get word from s1 622 l32i a9, a3, 0 // get word from s2 623 slli a5, a8, 1 624 bne a8, a9, .Lwne2 625 or a9, a8, a5 626 ball a9, a7, .Lnextword 627#endif /* !XCHAL_HAVE_LOOPS */ 628 629 /* align (0 mod 4) */ 630.Lprobeq: 631 /* Words are probably equal, but check for sure. 632 If not, loop over the rest of string using normal algorithm. */ 633 634 bnone a8, a4, .Leq // if byte 0 is zero 635#if XCHAL_HAVE_L32R 636 l32r a5, .Lmask1 // mask for byte 1 637 l32r a6, .Lmask2 // mask for byte 2 638 bnone a8, a5, .Leq // if byte 1 is zero 639 l32r a7, .Lmask3 // mask for byte 3 640 bnone a8, a6, .Leq // if byte 2 is zero 641 bnone a8, a7, .Leq // if byte 3 is zero 642 /* align (1 mod 4) */ 643#else 644 const16 a5, MASK1@h // mask for byte 1 645 const16 a5, MASK1@l 646 bnone a8, a5, .Leq // if byte 1 is zero 647 const16 a6, MASK2@h // mask for byte 2 648 const16 a6, MASK2@l 649 bnone a8, a6, .Leq // if byte 2 is zero 650 const16 a7, MASK3@h // mask for byte 3 651 const16 a7, MASK3@l 652 bnone a8, a7, .Leq // if byte 3 is zero 653 /* align (2 mod 4) */ 654#endif /* XCHAL_HAVE_L32R */ 655#if XCHAL_HAVE_DENSITY 656 addi.n a2, a2, 4 // advance s1 pointer 657 addi.n a3, a3, 4 // advance s2 pointer 658 /* align (1 mod 4) or (2 mod 4) */ 659#else 660 addi a2, a2, 4 // advance s1 pointer 661 addi a3, a3, 4 // advance s2 pointer 662 or a1, a1, a1 // nop 663#if !XCHAL_HAVE_L32R 664 or a1, a1, a1 // nop 665#endif 666 /* align (2 mod 4) */ 667#endif /* XCHAL_HAVE_DENSITY */ 668#if XCHAL_HAVE_LOOPS 6691: 670 loop a0, .Leq // loop forever (a4 is bigger than max iters) 671 l32i a8, a2, 0 // get word from s1 672 l32i a9, a3, 0 // get word from s2 673 addi a2, a2, 4 // advance s1 pointer 674 bne a8, a9, .Lwne 675 bnone a8, a4, .Leq // if byte 0 is zero 676 bnone a8, a5, .Leq // if byte 1 is zero 677 bnone a8, a6, .Leq // if byte 2 is zero 678 bnone a8, a7, .Leq // if byte 3 is zero 679 addi a3, a3, 4 // advance s2 pointer 680 j 1b 681#else /* !XCHAL_HAVE_LOOPS */ 682 683 j .Lfirstword2 684.Lnextword2: 685 addi a3, a3, 4 // advance s2 pointer 686.Lfirstword2: 687 l32i a8, a2, 0 // get word from s1 688 l32i a9, a3, 0 // get word from s2 689 addi a2, a2, 4 // advance s1 pointer 690 bne a8, a9, .Lwne 691 bnone a8, a4, .Leq // if byte 0 is zero 692 bnone a8, a5, .Leq // if byte 1 is zero 693 bnone a8, a6, .Leq // if byte 2 is zero 694 bany a8, a7, .Lnextword2 // if byte 3 is zero 695#endif /* !XCHAL_HAVE_LOOPS */ 696 697 /* Words are equal; some byte is zero. */ 698.Leq: movi a2, 0 // return equal 699 leaf_return 700 701.Lwne2: /* Words are not equal. On big-endian processors, if none of the 702 bytes are zero, the return value can be determined by a simple 703 comparison. */ 704#ifdef __XTENSA_EB__ 705 or a10, a8, a5 706 bnall a10, a7, .Lsomezero 707 bgeu a8, a9, .Lposreturn 708 movi a2, -1 709 leaf_return 710.Lposreturn: 711 movi a2, 1 712 leaf_return 713.Lsomezero: // There is probably some zero byte. 714#endif /* __XTENSA_EB__ */ 715.Lwne: /* Words are not equal. */ 716 xor a2, a8, a9 // get word with nonzero in byte that differs 717 bany a2, a4, .Ldiff0 // if byte 0 differs 718 movi a5, MASK1 // mask for byte 1 719 bnone a8, a4, .Leq // if byte 0 is zero 720 bany a2, a5, .Ldiff1 // if byte 1 differs 721 movi a6, MASK2 // mask for byte 2 722 bnone a8, a5, .Leq // if byte 1 is zero 723 bany a2, a6, .Ldiff2 // if byte 2 differs 724 bnone a8, a6, .Leq // if byte 2 is zero 725#ifdef __XTENSA_EB__ 726.Ldiff3: 727.Ldiff2: 728.Ldiff1: 729 /* Byte 0 is equal (at least) and there is a difference before a zero 730 byte. Just subtract words to get the return value. 731 The high order equal bytes cancel, leaving room for the sign. */ 732 sub a2, a8, a9 733 leaf_return 734 735.Ldiff0: 736 /* Need to make room for the sign, so can't subtract whole words. */ 737 extui a10, a8, 24, 8 738 extui a11, a9, 24, 8 739 sub a2, a10, a11 740 leaf_return 741 742#else /* !__XTENSA_EB__ */ 743 /* Little-endian is a little more difficult because can't subtract 744 whole words. */ 745.Ldiff3: 746 /* Bytes 0-2 are equal; byte 3 is different. 747 For little-endian need to have a sign bit for the difference. */ 748 extui a10, a8, 24, 8 749 extui a11, a9, 24, 8 750 sub a2, a10, a11 751 leaf_return 752 753.Ldiff0: 754 /* Byte 0 is different. */ 755 extui a10, a8, 0, 8 756 extui a11, a9, 0, 8 757 sub a2, a10, a11 758 leaf_return 759 760.Ldiff1: 761 /* Byte 0 is equal; byte 1 is different. */ 762 extui a10, a8, 8, 8 763 extui a11, a9, 8, 8 764 sub a2, a10, a11 765 leaf_return 766 767.Ldiff2: 768 /* Bytes 0-1 are equal; byte 2 is different. */ 769 extui a10, a8, 16, 8 770 extui a11, a9, 16, 8 771 sub a2, a10, a11 772 leaf_return 773 774#endif /* !__XTENSA_EB */ 775#endif /* FLIX3*/ 776#endif /* FLIX3*/ 777 778 .size strcmp, . - strcmp 779