1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Implementation of strcmp for ARMv6. Use ldrd to support wider 30 loads, provided the data is sufficiently aligned. Use 31 saturating arithmetic to optimize the compares. */ 32 33 /* Build Options: 34 STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first 35 byte in the string. If comparing completely random strings 36 the pre-check will save time, since there is a very high 37 probability of a mismatch in the first character: we save 38 significant overhead if this is the common case. However, 39 if strings are likely to be identical (eg because we're 40 verifying a hit in a hash table), then this check is largely 41 redundant. */ 42 43 .arm 44 45/* Parameters and result. */ 46#define src1 r0 47#define src2 r1 48#define result r0 /* Overlaps src1. */ 49 50/* Internal variables. */ 51#define tmp1 r4 52#define tmp2 r5 53#define const_m1 r12 54 55/* Additional internal variables for 64-bit aligned data. */ 56#define data1a r2 57#define data1b r3 58#define data2a r6 59#define data2b r7 60#define syndrome_a tmp1 61#define syndrome_b tmp2 62 63/* Additional internal variables for 32-bit aligned data. */ 64#define data1 r2 65#define data2 r3 66#define syndrome tmp2 67 68 69 /* Macro to compute and return the result value for word-aligned 70 cases. */ 71 .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 72#ifdef __ARM_BIG_ENDIAN 73 /* If data1 contains a zero byte, then syndrome will contain a 1 in 74 bit 7 of that byte. Otherwise, the highest set bit in the 75 syndrome will highlight the first different bit. It is therefore 76 sufficient to extract the eight bits starting with the syndrome 77 bit. */ 78 clz tmp1, \synd 79 lsl r1, \d2, tmp1 80 .if \restore_r6 81 ldrd r6, r7, [sp, #8] 82 .endif 83 .cfi_restore 6 84 .cfi_restore 7 85 lsl \d1, \d1, tmp1 86 .cfi_remember_state 87 lsr result, \d1, #24 88 ldrd r4, r5, [sp], #16 89 .cfi_restore 4 90 .cfi_restore 5 91 sub result, result, r1, lsr #24 92 bx lr 93#else 94 /* To use the big-endian trick we'd have to reverse all three words. 95 that's slower than this approach. */ 96 rev \synd, \synd 97 clz tmp1, \synd 98 bic tmp1, tmp1, #7 99 lsr r1, \d2, tmp1 100 .cfi_remember_state 101 .if \restore_r6 102 ldrd r6, r7, [sp, #8] 103 .endif 104 .cfi_restore 6 105 .cfi_restore 7 106 lsr \d1, \d1, tmp1 107 and result, \d1, #255 108 and r1, r1, #255 109 ldrd r4, r5, [sp], #16 110 .cfi_restore 4 111 .cfi_restore 5 112 sub result, result, r1 113 114 bx lr 115#endif 116 .endm 117 118 .text 119 .p2align 5 120.Lstrcmp_start_addr: 121#ifndef STRCMP_NO_PRECHECK 122.Lfastpath_exit: 123 sub r0, r2, r3 124 bx lr 125#endif 126def_fn strcmp 127#ifndef STRCMP_NO_PRECHECK 128 ldrb r2, [src1] 129 ldrb r3, [src2] 130 cmp r2, #1 131 cmpcs r2, r3 132 bne .Lfastpath_exit 133#endif 134 .cfi_sections .debug_frame 135 .cfi_startproc 136 strd r4, r5, [sp, #-16]! 137 .cfi_def_cfa_offset 16 138 .cfi_offset 4, -16 139 .cfi_offset 5, -12 140 orr tmp1, src1, src2 141 strd r6, r7, [sp, #8] 142 .cfi_offset 6, -8 143 .cfi_offset 7, -4 144 mvn const_m1, #0 145 tst tmp1, #7 146 beq .Lloop_aligned8 147 148.Lnot_aligned: 149 eor tmp1, src1, src2 150 tst tmp1, #7 151 bne .Lmisaligned8 152 153 /* Deal with mutual misalignment by aligning downwards and then 154 masking off the unwanted loaded data to prevent a difference. */ 155 and tmp1, src1, #7 156 bic src1, src1, #7 157 and tmp2, tmp1, #3 158 bic src2, src2, #7 159 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 160 ldrd data1a, data1b, [src1], #16 161 tst tmp1, #4 162 ldrd data2a, data2b, [src2], #16 163 /* In ARM code we can't use ORN, but with do have MVN with a 164 register shift. */ 165 mvn tmp1, const_m1, S2HI tmp2 166 orr data1a, data1a, tmp1 167 orr data2a, data2a, tmp1 168 beq .Lstart_realigned8 169 orr data1b, data1b, tmp1 170 mov data1a, const_m1 171 orr data2b, data2b, tmp1 172 mov data2a, const_m1 173 b .Lstart_realigned8 174 175 /* Unwind the inner loop by a factor of 2, giving 16 bytes per 176 pass. */ 177 .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 178 .p2align 2 /* Always word aligned. */ 179.Lloop_aligned8: 180 ldrd data1a, data1b, [src1], #16 181 ldrd data2a, data2b, [src2], #16 182.Lstart_realigned8: 183 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 184 eor syndrome_a, data1a, data2a 185 sel syndrome_a, syndrome_a, const_m1 186 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 187 eor syndrome_b, data1b, data2b 188 sel syndrome_b, syndrome_b, const_m1 189 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 190 bne .Ldiff_found 191 192 ldrd data1a, data1b, [src1, #-8] 193 ldrd data2a, data2b, [src2, #-8] 194 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 195 eor syndrome_a, data1a, data2a 196 sel syndrome_a, syndrome_a, const_m1 197 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 198 eor syndrome_b, data1b, data2b 199 sel syndrome_b, syndrome_b, const_m1 200 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 201 beq .Lloop_aligned8 202 203.Ldiff_found: 204 cmp syndrome_a, #0 205 bne .Ldiff_in_a 206 207.Ldiff_in_b: 208 strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 209 210.Ldiff_in_a: 211 .cfi_restore_state 212 strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 213 214 .cfi_restore_state 215.Lmisaligned8: 216 tst tmp1, #3 217 bne .Lmisaligned4 218 ands tmp1, src1, #3 219 bne .Lmutual_align4 220 221 /* Unrolled by a factor of 2, to reduce the number of post-increment 222 operations. */ 223.Lloop_aligned4: 224 ldr data1, [src1], #8 225 ldr data2, [src2], #8 226.Lstart_realigned4: 227 uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 228 eor syndrome, data1, data2 229 sel syndrome, syndrome, const_m1 230 cmp syndrome, #0 231 bne .Laligned4_done 232 233 ldr data1, [src1, #-4] 234 ldr data2, [src2, #-4] 235 uadd8 syndrome, data1, const_m1 236 eor syndrome, data1, data2 237 sel syndrome, syndrome, const_m1 238 cmp syndrome, #0 239 beq .Lloop_aligned4 240 241.Laligned4_done: 242 strcmp_epilogue_aligned syndrome, data1, data2, 0 243 244.Lmutual_align4: 245 .cfi_restore_state 246 /* Deal with mutual misalignment by aligning downwards and then 247 masking off the unwanted loaded data to prevent a difference. */ 248 lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 249 bic src1, src1, #3 250 ldr data1, [src1], #8 251 bic src2, src2, #3 252 ldr data2, [src2], #8 253 254 /* In ARM code we can't use ORN, but with do have MVN with a 255 register shift. */ 256 mvn tmp1, const_m1, S2HI tmp1 257 orr data1, data1, tmp1 258 orr data2, data2, tmp1 259 b .Lstart_realigned4 260 261.Lmisaligned4: 262 ands tmp1, src1, #3 263 beq .Lsrc1_aligned 264 sub src2, src2, tmp1 265 bic src1, src1, #3 266 lsls tmp1, tmp1, #31 267 ldr data1, [src1], #4 268 beq .Laligned_m2 269 bcs .Laligned_m1 270 271#ifdef STRCMP_NO_PRECHECK 272 ldrb data2, [src2, #1] 273 uxtb tmp1, data1, ror #BYTE1_OFFSET 274 cmp tmp1, #1 275 cmpcs tmp1, data2 276 bne .Lmisaligned_exit 277 278.Laligned_m2: 279 ldrb data2, [src2, #2] 280 uxtb tmp1, data1, ror #BYTE2_OFFSET 281 cmp tmp1, #1 282 cmpcs tmp1, data2 283 bne .Lmisaligned_exit 284 285.Laligned_m1: 286 ldrb data2, [src2, #3] 287 uxtb tmp1, data1, ror #BYTE3_OFFSET 288 cmp tmp1, #1 289 cmpcs tmp1, data2 290 beq .Lsrc1_aligned 291 292#else /* STRCMP_NO_PRECHECK */ 293 /* If we've done the pre-check, then we don't need to check the 294 first byte again here. */ 295 ldrb data2, [src2, #2] 296 uxtb tmp1, data1, ror #BYTE2_OFFSET 297 cmp tmp1, #1 298 cmpcs tmp1, data2 299 bne .Lmisaligned_exit 300 301.Laligned_m2: 302 ldrb data2, [src2, #3] 303 uxtb tmp1, data1, ror #BYTE3_OFFSET 304 cmp tmp1, #1 305 cmpcs tmp1, data2 306 beq .Laligned_m1 307#endif 308 309.Lmisaligned_exit: 310 .cfi_remember_state 311 sub result, tmp1, data2 312 ldr r4, [sp], #16 313 .cfi_restore 4 314 bx lr 315 316#ifndef STRCMP_NO_PRECHECK 317.Laligned_m1: 318 add src2, src2, #4 319#endif 320.Lsrc1_aligned: 321 .cfi_restore_state 322 /* src1 is word aligned, but src2 has no common alignment 323 with it. */ 324 ldr data1, [src1], #4 325 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 326 327 bic src2, src2, #3 328 ldr data2, [src2], #4 329 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ 330 bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ 331 332 /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 333.Loverlap3: 334 bic tmp1, data1, #MSB 335 uadd8 syndrome, data1, const_m1 336 eors syndrome, tmp1, data2, S2LO #8 337 sel syndrome, syndrome, const_m1 338 bne 4f 339 cmp syndrome, #0 340 ldreq data2, [src2], #4 341 bne 5f 342 343 eor tmp1, tmp1, data1 344 cmp tmp1, data2, S2HI #24 345 bne 6f 346 ldr data1, [src1], #4 347 b .Loverlap3 3484: 349 S2LO data2, data2, #8 350 b .Lstrcmp_tail 351 3525: 353 bics syndrome, syndrome, #MSB 354 bne .Lstrcmp_done_equal 355 356 /* We can only get here if the MSB of data1 contains 0, so 357 fast-path the exit. */ 358 ldrb result, [src2] 359 .cfi_remember_state 360 ldrd r4, r5, [sp], #16 361 .cfi_restore 4 362 .cfi_restore 5 363 /* R6/7 Not used in this sequence. */ 364 .cfi_restore 6 365 .cfi_restore 7 366 neg result, result 367 bx lr 368 3696: 370 .cfi_restore_state 371 S2LO data1, data1, #24 372 and data2, data2, #LSB 373 b .Lstrcmp_tail 374 375 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 376.Loverlap2: 377 and tmp1, data1, const_m1, S2LO #16 378 uadd8 syndrome, data1, const_m1 379 eors syndrome, tmp1, data2, S2LO #16 380 sel syndrome, syndrome, const_m1 381 bne 4f 382 cmp syndrome, #0 383 ldreq data2, [src2], #4 384 bne 5f 385 eor tmp1, tmp1, data1 386 cmp tmp1, data2, S2HI #16 387 bne 6f 388 ldr data1, [src1], #4 389 b .Loverlap2 3904: 391 S2LO data2, data2, #16 392 b .Lstrcmp_tail 3935: 394 ands syndrome, syndrome, const_m1, S2LO #16 395 bne .Lstrcmp_done_equal 396 397 ldrh data2, [src2] 398 S2LO data1, data1, #16 399#ifdef __ARM_BIG_ENDIAN 400 lsl data2, data2, #16 401#endif 402 b .Lstrcmp_tail 403 4046: 405 S2LO data1, data1, #16 406 and data2, data2, const_m1, S2LO #16 407 b .Lstrcmp_tail 408 409 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 410.Loverlap1: 411 and tmp1, data1, #LSB 412 uadd8 syndrome, data1, const_m1 413 eors syndrome, tmp1, data2, S2LO #24 414 sel syndrome, syndrome, const_m1 415 bne 4f 416 cmp syndrome, #0 417 ldreq data2, [src2], #4 418 bne 5f 419 eor tmp1, tmp1, data1 420 cmp tmp1, data2, S2HI #8 421 bne 6f 422 ldr data1, [src1], #4 423 b .Loverlap1 4244: 425 S2LO data2, data2, #24 426 b .Lstrcmp_tail 4275: 428 tst syndrome, #LSB 429 bne .Lstrcmp_done_equal 430 ldr data2, [src2] 4316: 432 S2LO data1, data1, #8 433 bic data2, data2, #MSB 434 b .Lstrcmp_tail 435 436.Lstrcmp_done_equal: 437 mov result, #0 438 .cfi_remember_state 439 ldrd r4, r5, [sp], #16 440 .cfi_restore 4 441 .cfi_restore 5 442 /* R6/7 not used in this sequence. */ 443 .cfi_restore 6 444 .cfi_restore 7 445 bx lr 446 447.Lstrcmp_tail: 448 .cfi_restore_state 449#ifndef __ARM_BIG_ENDIAN 450 rev data1, data1 451 rev data2, data2 452 /* Now everything looks big-endian... */ 453#endif 454 uadd8 tmp1, data1, const_m1 455 eor tmp1, data1, data2 456 sel syndrome, tmp1, const_m1 457 clz tmp1, syndrome 458 lsl data1, data1, tmp1 459 lsl data2, data2, tmp1 460 lsr result, data1, #24 461 ldrd r4, r5, [sp], #16 462 .cfi_restore 4 463 .cfi_restore 5 464 /* R6/7 not used in this sequence. */ 465 .cfi_restore 6 466 .cfi_restore 7 467 sub result, result, data2, lsr #24 468 bx lr 469 .cfi_endproc 470 .size strcmp, . - .Lstrcmp_start_addr 471