1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Implementation of strcmp for ARMv6. Use ldrd to support wider 30 loads, provided the data is sufficiently aligned. Use 31 saturating arithmetic to optimize the compares. */ 32 33 /* Build Options: 34 STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first 35 byte in the string. If comparing completely random strings 36 the pre-check will save time, since there is a very high 37 probability of a mismatch in the first character: we save 38 significant overhead if this is the common case. However, 39 if strings are likely to be identical (eg because we're 40 verifying a hit in a hash table), then this check is largely 41 redundant. */ 42 43#include <picolibc.h> 44 45 .arm 46 47/* Parameters and result. */ 48#define src1 r0 49#define src2 r1 50#define result r0 /* Overlaps src1. */ 51 52/* Internal variables. */ 53#define tmp1 r4 54#define tmp2 r5 55#define const_m1 r12 56 57/* Additional internal variables for 64-bit aligned data. */ 58#define data1a r2 59#define data1b r3 60#define data2a r6 61#define data2b r7 62#define syndrome_a tmp1 63#define syndrome_b tmp2 64 65/* Additional internal variables for 32-bit aligned data. */ 66#define data1 r2 67#define data2 r3 68#define syndrome tmp2 69 70 71 /* Macro to compute and return the result value for word-aligned 72 cases. */ 73 .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 74#ifdef __ARM_BIG_ENDIAN 75 /* If data1 contains a zero byte, then syndrome will contain a 1 in 76 bit 7 of that byte. Otherwise, the highest set bit in the 77 syndrome will highlight the first different bit. It is therefore 78 sufficient to extract the eight bits starting with the syndrome 79 bit. */ 80 clz tmp1, \synd 81 lsl r1, \d2, tmp1 82 .if \restore_r6 83 ldrd r6, r7, [sp, #8] 84 .endif 85 .cfi_restore 6 86 .cfi_restore 7 87 lsl \d1, \d1, tmp1 88 .cfi_remember_state 89 lsr result, \d1, #24 90 ldrd r4, r5, [sp], #16 91 .cfi_restore 4 92 .cfi_restore 5 93 sub result, result, r1, lsr #24 94 bx lr 95#else 96 /* To use the big-endian trick we'd have to reverse all three words. 97 that's slower than this approach. */ 98 rev \synd, \synd 99 clz tmp1, \synd 100 bic tmp1, tmp1, #7 101 lsr r1, \d2, tmp1 102 .cfi_remember_state 103 .if \restore_r6 104 ldrd r6, r7, [sp, #8] 105 .endif 106 .cfi_restore 6 107 .cfi_restore 7 108 lsr \d1, \d1, tmp1 109 and result, \d1, #255 110 and r1, r1, #255 111 ldrd r4, r5, [sp], #16 112 .cfi_restore 4 113 .cfi_restore 5 114 sub result, result, r1 115 116 bx lr 117#endif 118 .endm 119 120 .text 121 .p2align 5 122.Lstrcmp_start_addr: 123#ifndef STRCMP_NO_PRECHECK 124.Lfastpath_exit: 125 sub r0, r2, r3 126 bx lr 127#endif 128def_fn strcmp 129#ifndef STRCMP_NO_PRECHECK 130 ldrb r2, [src1] 131 ldrb r3, [src2] 132 cmp r2, #1 133 cmpcs r2, r3 134 bne .Lfastpath_exit 135#endif 136 .cfi_sections .debug_frame 137 .cfi_startproc 138 strd r4, r5, [sp, #-16]! 139 .cfi_def_cfa_offset 16 140 .cfi_offset 4, -16 141 .cfi_offset 5, -12 142 orr tmp1, src1, src2 143 strd r6, r7, [sp, #8] 144 .cfi_offset 6, -8 145 .cfi_offset 7, -4 146 mvn const_m1, #0 147 tst tmp1, #7 148 beq .Lloop_aligned8 149 150.Lnot_aligned: 151 eor tmp1, src1, src2 152 tst tmp1, #7 153 bne .Lmisaligned8 154 155 /* Deal with mutual misalignment by aligning downwards and then 156 masking off the unwanted loaded data to prevent a difference. */ 157 and tmp1, src1, #7 158 bic src1, src1, #7 159 and tmp2, tmp1, #3 160 bic src2, src2, #7 161 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ 162 ldrd data1a, data1b, [src1], #16 163 tst tmp1, #4 164 ldrd data2a, data2b, [src2], #16 165 /* In ARM code we can't use ORN, but with do have MVN with a 166 register shift. */ 167 mvn tmp1, const_m1, S2HI tmp2 168 orr data1a, data1a, tmp1 169 orr data2a, data2a, tmp1 170 beq .Lstart_realigned8 171 orr data1b, data1b, tmp1 172 mov data1a, const_m1 173 orr data2b, data2b, tmp1 174 mov data2a, const_m1 175 b .Lstart_realigned8 176 177 /* Unwind the inner loop by a factor of 2, giving 16 bytes per 178 pass. */ 179 .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ 180 .p2align 2 /* Always word aligned. */ 181.Lloop_aligned8: 182 ldrd data1a, data1b, [src1], #16 183 ldrd data2a, data2b, [src2], #16 184.Lstart_realigned8: 185 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 186 eor syndrome_a, data1a, data2a 187 sel syndrome_a, syndrome_a, const_m1 188 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 189 eor syndrome_b, data1b, data2b 190 sel syndrome_b, syndrome_b, const_m1 191 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 192 bne .Ldiff_found 193 194 ldrd data1a, data1b, [src1, #-8] 195 ldrd data2a, data2b, [src2, #-8] 196 uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ 197 eor syndrome_a, data1a, data2a 198 sel syndrome_a, syndrome_a, const_m1 199 uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ 200 eor syndrome_b, data1b, data2b 201 sel syndrome_b, syndrome_b, const_m1 202 orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ 203 beq .Lloop_aligned8 204 205.Ldiff_found: 206 cmp syndrome_a, #0 207 bne .Ldiff_in_a 208 209.Ldiff_in_b: 210 strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 211 212.Ldiff_in_a: 213 .cfi_restore_state 214 strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 215 216 .cfi_restore_state 217.Lmisaligned8: 218 tst tmp1, #3 219 bne .Lmisaligned4 220 ands tmp1, src1, #3 221 bne .Lmutual_align4 222 223 /* Unrolled by a factor of 2, to reduce the number of post-increment 224 operations. */ 225.Lloop_aligned4: 226 ldr data1, [src1], #8 227 ldr data2, [src2], #8 228.Lstart_realigned4: 229 uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ 230 eor syndrome, data1, data2 231 sel syndrome, syndrome, const_m1 232 cmp syndrome, #0 233 bne .Laligned4_done 234 235 ldr data1, [src1, #-4] 236 ldr data2, [src2, #-4] 237 uadd8 syndrome, data1, const_m1 238 eor syndrome, data1, data2 239 sel syndrome, syndrome, const_m1 240 cmp syndrome, #0 241 beq .Lloop_aligned4 242 243.Laligned4_done: 244 strcmp_epilogue_aligned syndrome, data1, data2, 0 245 246.Lmutual_align4: 247 .cfi_restore_state 248 /* Deal with mutual misalignment by aligning downwards and then 249 masking off the unwanted loaded data to prevent a difference. */ 250 lsl tmp1, tmp1, #3 /* Bytes -> bits. */ 251 bic src1, src1, #3 252 ldr data1, [src1], #8 253 bic src2, src2, #3 254 ldr data2, [src2], #8 255 256 /* In ARM code we can't use ORN, but with do have MVN with a 257 register shift. */ 258 mvn tmp1, const_m1, S2HI tmp1 259 orr data1, data1, tmp1 260 orr data2, data2, tmp1 261 b .Lstart_realigned4 262 263.Lmisaligned4: 264 ands tmp1, src1, #3 265 beq .Lsrc1_aligned 266 sub src2, src2, tmp1 267 bic src1, src1, #3 268 lsls tmp1, tmp1, #31 269 ldr data1, [src1], #4 270 beq .Laligned_m2 271 bcs .Laligned_m1 272 273#ifdef STRCMP_NO_PRECHECK 274 ldrb data2, [src2, #1] 275 uxtb tmp1, data1, ror #BYTE1_OFFSET 276 cmp tmp1, #1 277 cmpcs tmp1, data2 278 bne .Lmisaligned_exit 279 280.Laligned_m2: 281 ldrb data2, [src2, #2] 282 uxtb tmp1, data1, ror #BYTE2_OFFSET 283 cmp tmp1, #1 284 cmpcs tmp1, data2 285 bne .Lmisaligned_exit 286 287.Laligned_m1: 288 ldrb data2, [src2, #3] 289 uxtb tmp1, data1, ror #BYTE3_OFFSET 290 cmp tmp1, #1 291 cmpcs tmp1, data2 292 beq .Lsrc1_aligned 293 294#else /* STRCMP_NO_PRECHECK */ 295 /* If we've done the pre-check, then we don't need to check the 296 first byte again here. */ 297 ldrb data2, [src2, #2] 298 uxtb tmp1, data1, ror #BYTE2_OFFSET 299 cmp tmp1, #1 300 cmpcs tmp1, data2 301 bne .Lmisaligned_exit 302 303.Laligned_m2: 304 ldrb data2, [src2, #3] 305 uxtb tmp1, data1, ror #BYTE3_OFFSET 306 cmp tmp1, #1 307 cmpcs tmp1, data2 308 beq .Laligned_m1 309#endif 310 311.Lmisaligned_exit: 312 .cfi_remember_state 313 sub result, tmp1, data2 314 ldr r4, [sp], #16 315 .cfi_restore 4 316 bx lr 317 318#ifndef STRCMP_NO_PRECHECK 319.Laligned_m1: 320 add src2, src2, #4 321#endif 322.Lsrc1_aligned: 323 .cfi_restore_state 324 /* src1 is word aligned, but src2 has no common alignment 325 with it. */ 326 ldr data1, [src1], #4 327 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ 328 329 bic src2, src2, #3 330 ldr data2, [src2], #4 331 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ 332 bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ 333 334 /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ 335.Loverlap3: 336 bic tmp1, data1, #MSB 337 uadd8 syndrome, data1, const_m1 338 eors syndrome, tmp1, data2, S2LO #8 339 sel syndrome, syndrome, const_m1 340 bne 4f 341 cmp syndrome, #0 342 ldreq data2, [src2], #4 343 bne 5f 344 345 eor tmp1, tmp1, data1 346 cmp tmp1, data2, S2HI #24 347 bne 6f 348 ldr data1, [src1], #4 349 b .Loverlap3 3504: 351 S2LO data2, data2, #8 352 b .Lstrcmp_tail 353 3545: 355 bics syndrome, syndrome, #MSB 356 bne .Lstrcmp_done_equal 357 358 /* We can only get here if the MSB of data1 contains 0, so 359 fast-path the exit. */ 360 ldrb result, [src2] 361 .cfi_remember_state 362 ldrd r4, r5, [sp], #16 363 .cfi_restore 4 364 .cfi_restore 5 365 /* R6/7 Not used in this sequence. */ 366 .cfi_restore 6 367 .cfi_restore 7 368 neg result, result 369 bx lr 370 3716: 372 .cfi_restore_state 373 S2LO data1, data1, #24 374 and data2, data2, #LSB 375 b .Lstrcmp_tail 376 377 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 378.Loverlap2: 379 and tmp1, data1, const_m1, S2LO #16 380 uadd8 syndrome, data1, const_m1 381 eors syndrome, tmp1, data2, S2LO #16 382 sel syndrome, syndrome, const_m1 383 bne 4f 384 cmp syndrome, #0 385 ldreq data2, [src2], #4 386 bne 5f 387 eor tmp1, tmp1, data1 388 cmp tmp1, data2, S2HI #16 389 bne 6f 390 ldr data1, [src1], #4 391 b .Loverlap2 3924: 393 S2LO data2, data2, #16 394 b .Lstrcmp_tail 3955: 396 ands syndrome, syndrome, const_m1, S2LO #16 397 bne .Lstrcmp_done_equal 398 399 ldrh data2, [src2] 400 S2LO data1, data1, #16 401#ifdef __ARM_BIG_ENDIAN 402 lsl data2, data2, #16 403#endif 404 b .Lstrcmp_tail 405 4066: 407 S2LO data1, data1, #16 408 and data2, data2, const_m1, S2LO #16 409 b .Lstrcmp_tail 410 411 .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ 412.Loverlap1: 413 and tmp1, data1, #LSB 414 uadd8 syndrome, data1, const_m1 415 eors syndrome, tmp1, data2, S2LO #24 416 sel syndrome, syndrome, const_m1 417 bne 4f 418 cmp syndrome, #0 419 ldreq data2, [src2], #4 420 bne 5f 421 eor tmp1, tmp1, data1 422 cmp tmp1, data2, S2HI #8 423 bne 6f 424 ldr data1, [src1], #4 425 b .Loverlap1 4264: 427 S2LO data2, data2, #24 428 b .Lstrcmp_tail 4295: 430 tst syndrome, #LSB 431 bne .Lstrcmp_done_equal 432 ldr data2, [src2] 4336: 434 S2LO data1, data1, #8 435 bic data2, data2, #MSB 436 b .Lstrcmp_tail 437 438.Lstrcmp_done_equal: 439 mov result, #0 440 .cfi_remember_state 441 ldrd r4, r5, [sp], #16 442 .cfi_restore 4 443 .cfi_restore 5 444 /* R6/7 not used in this sequence. */ 445 .cfi_restore 6 446 .cfi_restore 7 447 bx lr 448 449.Lstrcmp_tail: 450 .cfi_restore_state 451#ifndef __ARM_BIG_ENDIAN 452 rev data1, data1 453 rev data2, data2 454 /* Now everything looks big-endian... */ 455#endif 456 uadd8 tmp1, data1, const_m1 457 eor tmp1, data1, data2 458 sel syndrome, tmp1, const_m1 459 clz tmp1, syndrome 460 lsl data1, data1, tmp1 461 lsl data2, data2, tmp1 462 lsr result, data1, #24 463 ldrd r4, r5, [sp], #16 464 .cfi_restore 4 465 .cfi_restore 5 466 /* R6/7 not used in this sequence. */ 467 .cfi_restore 6 468 .cfi_restore 7 469 sub result, result, data2, lsr #24 470 bx lr 471 .cfi_endproc 472 .size strcmp, . - .Lstrcmp_start_addr 473