1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Basic ARM implementation. This should run on anything except 30 for ARMv6-M, but there are better implementations for later 31 revisions of the architecture. This version can support ARMv4T 32 ARM/Thumb interworking. */ 33/* Parameters and result. */ 34#define src1 r0 35#define src2 r1 36#define result r0 /* Overlaps src1. */ 37 38/* Internal variables. */ 39#define data1 r2 40#define data2 r3 41#define magic1 r4 42#define tmp2 r5 43#define tmp1 r12 44#define syndrome r12 /* Overlaps tmp1 */ 45 46/* For armv4t and newer, toolchains will transparently convert 47 'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support 48 for anything older than armv4t, but this should handle that 49 corner case in case anyone needs it anyway */ 50.macro RETURN 51#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0 52 mov pc, lr 53#else 54 bx lr 55#endif 56.endm 57 58 .arm 59def_fn strcmp 60 .cfi_sections .debug_frame 61 .cfi_startproc 62 eor tmp1, src1, src2 63 tst tmp1, #3 64 /* Strings not at same byte offset from a word boundary. */ 65 bne .Lstrcmp_unaligned 66 ands tmp1, src1, #3 67 bic src1, src1, #3 68 bic src2, src2, #3 69 ldr data1, [src1], #4 70 ldreq data2, [src2], #4 71 beq 1f 72 /* Although s1 and s2 have identical initial alignment, they are 73 not currently word aligned. Rather than comparing bytes, 74 make sure that any bytes fetched from before the addressed 75 bytes are forced to 0xff. Then they will always compare 76 equal. */ 77 eor tmp1, tmp1, #3 78 mvn data2, #MSB 79 lsl tmp1, tmp1, #3 80 S2LO tmp1, data2, tmp1 81 ldr data2, [src2], #4 82 orr data1, data1, tmp1 83 orr data2, data2, tmp1 841: 85 /* Load the 'magic' constant 0x01010101. */ 86 str r4, [sp, #-4]! 87 .cfi_def_cfa_offset 4 88 .cfi_offset 4, -4 89 mov magic1, #1 90 orr magic1, magic1, magic1, lsl #8 91 orr magic1, magic1, magic1, lsl #16 92 .p2align 2 934: 94 sub syndrome, data1, magic1 95 cmp data1, data2 96 /* check for any zero bytes in first word */ 97 biceq syndrome, syndrome, data1 98 tsteq syndrome, magic1, lsl #7 99 ldreq data1, [src1], #4 100 ldreq data2, [src2], #4 101 beq 4b 1022: 103 /* There's a zero or a different byte in the word */ 104 S2HI result, data1, #24 105 S2LO data1, data1, #8 106 cmp result, #1 107 cmpcs result, data2, S2HI #24 108 S2LOEQ data2, data2, #8 109 beq 2b 110 /* On a big-endian machine, RESULT contains the desired byte in bits 111 0-7; on a little-endian machine they are in bits 24-31. In 112 both cases the other bits in RESULT are all zero. For DATA2 the 113 interesting byte is at the other end of the word, but the 114 other bits are not necessarily zero. We need a signed result 115 representing the differnece in the unsigned bytes, so for the 116 little-endian case we can't just shift the interesting bits 117 up. */ 118#ifdef __ARM_BIG_ENDIAN 119 sub result, result, data2, lsr #24 120#else 121 and data2, data2, #255 122 rsb result, data2, result, lsr #24 123#endif 124 ldr r4, [sp], #4 125 .cfi_restore 4 126 .cfi_def_cfa_offset 0 127 RETURN 128 129 130#if 0 131 /* The assembly code below is based on the following alogrithm. */ 132#ifdef __ARM_BIG_ENDIAN 133#define RSHIFT << 134#define LSHIFT >> 135#else 136#define RSHIFT >> 137#define LSHIFT << 138#endif 139 140#define body(shift) \ 141 mask = 0xffffffffU RSHIFT shift; \ 142 data1 = *src1++; \ 143 data2 = *src2++; \ 144 do \ 145 { \ 146 tmp2 = data1 & mask; \ 147 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ 148 { \ 149 data2 RSHIFT= shift; \ 150 break; \ 151 } \ 152 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ 153 { \ 154 /* See comment in assembler below re syndrome on big-endian */\ 155 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ 156 data2 RSHIFT= shift; \ 157 else \ 158 { \ 159 data2 = *src2; \ 160 tmp2 = data1 RSHIFT (32 - shift); \ 161 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ 162 } \ 163 break; \ 164 } \ 165 data2 = *src2++; \ 166 tmp2 ^= data1; \ 167 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ 168 { \ 169 tmp2 = data1 >> (32 - shift); \ 170 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ 171 break; \ 172 } \ 173 data1 = *src1++; \ 174 } while (1) 175 176 const unsigned* src1; 177 const unsigned* src2; 178 unsigned data1, data2; 179 unsigned mask; 180 unsigned shift; 181 unsigned b1 = 0x01010101; 182 char c1, c2; 183 unsigned tmp2; 184 185 while (((unsigned) s1) & 3) 186 { 187 c1 = *s1++; 188 c2 = *s2++; 189 if (c1 == 0 || c1 != c2) 190 return c1 - (int)c2; 191 } 192 src1 = (unsigned*) (((unsigned)s1) & ~3); 193 src2 = (unsigned*) (((unsigned)s2) & ~3); 194 tmp2 = ((unsigned) s2) & 3; 195 if (tmp2 == 1) 196 { 197 body(8); 198 } 199 else if (tmp2 == 2) 200 { 201 body(16); 202 } 203 else 204 { 205 body (24); 206 } 207 208 do 209 { 210#ifdef __ARM_BIG_ENDIAN 211 c1 = (char) tmp2 >> 24; 212 c2 = (char) data2 >> 24; 213#else /* not __ARM_BIG_ENDIAN */ 214 c1 = (char) tmp2; 215 c2 = (char) data2; 216#endif /* not __ARM_BIG_ENDIAN */ 217 tmp2 RSHIFT= 8; 218 data2 RSHIFT= 8; 219 } while (c1 != 0 && c1 == c2); 220 return c1 - c2; 221#endif /* 0 */ 222 223 224 /* First of all, compare bytes until src1(sp1) is word-aligned. */ 225.Lstrcmp_unaligned: 226 tst src1, #3 227 beq 2f 228 ldrb data1, [src1], #1 229 ldrb data2, [src2], #1 230 cmp data1, #1 231 cmpcs data1, data2 232 beq .Lstrcmp_unaligned 233 sub result, data1, data2 234 RETURN 235 2362: 237 stmfd sp!, {r4, r5} 238 .cfi_def_cfa_offset 8 239 .cfi_offset 4, -8 240 .cfi_offset 5, -4 241 mov magic1, #1 242 orr magic1, magic1, magic1, lsl #8 243 orr magic1, magic1, magic1, lsl #16 244 245 ldr data1, [src1], #4 246 and tmp2, src2, #3 247 bic src2, src2, #3 248 ldr data2, [src2], #4 249 cmp tmp2, #2 250 beq .Loverlap2 251 bhi .Loverlap1 252 253 /* Critical inner Loop: Block with 3 bytes initial overlap */ 254 .p2align 2 255.Loverlap3: 256 bic tmp2, data1, #MSB 257 cmp tmp2, data2, S2LO #8 258 sub syndrome, data1, magic1 259 bic syndrome, syndrome, data1 260 bne 4f 261 ands syndrome, syndrome, magic1, lsl #7 262 ldreq data2, [src2], #4 263 bne 5f 264 eor tmp2, tmp2, data1 265 cmp tmp2, data2, S2HI #24 266 bne 6f 267 ldr data1, [src1], #4 268 b .Loverlap3 2694: 270 S2LO data2, data2, #8 271 b .Lstrcmp_tail 272 2735: 274#ifdef __ARM_BIG_ENDIAN 275 /* The syndrome value may contain false ones if the string ends 276 with the bytes 0x01 0x00. */ 277 tst data1, #0xff000000 278 tstne data1, #0x00ff0000 279 tstne data1, #0x0000ff00 280 beq .Lstrcmp_done_equal 281#else 282 bics syndrome, syndrome, #0xff000000 283 bne .Lstrcmp_done_equal 284#endif 285 ldrb data2, [src2] 286 S2LO tmp2, data1, #24 287#ifdef __ARM_BIG_ENDIAN 288 lsl data2, data2, #24 289#endif 290 b .Lstrcmp_tail 291 2926: 293 S2LO tmp2, data1, #24 294 and data2, data2, #LSB 295 b .Lstrcmp_tail 296 297 /* Critical inner Loop: Block with 2 bytes initial overlap. */ 298 .p2align 2 299.Loverlap2: 300 S2HI tmp2, data1, #16 301 sub syndrome, data1, magic1 302 S2LO tmp2, tmp2, #16 303 bic syndrome, syndrome, data1 304 cmp tmp2, data2, S2LO #16 305 bne 4f 306 ands syndrome, syndrome, magic1, lsl #7 307 ldreq data2, [src2], #4 308 bne 5f 309 eor tmp2, tmp2, data1 310 cmp tmp2, data2, S2HI #16 311 bne 6f 312 ldr data1, [src1], #4 313 b .Loverlap2 314 3155: 316#ifdef __ARM_BIG_ENDIAN 317 /* The syndrome value may contain false ones if the string ends 318 with the bytes 0x01 0x00 */ 319 tst data1, #0xff000000 320 tstne data1, #0x00ff0000 321 beq .Lstrcmp_done_equal 322#else 323 lsls syndrome, syndrome, #16 324 bne .Lstrcmp_done_equal 325#endif 326 ldrh data2, [src2] 327 S2LO tmp2, data1, #16 328#ifdef __ARM_BIG_ENDIAN 329 lsl data2, data2, #16 330#endif 331 b .Lstrcmp_tail 332 3336: 334 S2HI data2, data2, #16 335 S2LO tmp2, data1, #16 3364: 337 S2LO data2, data2, #16 338 b .Lstrcmp_tail 339 340 /* Critical inner Loop: Block with 1 byte initial overlap. */ 341 .p2align 2 342.Loverlap1: 343 and tmp2, data1, #LSB 344 cmp tmp2, data2, S2LO #24 345 sub syndrome, data1, magic1 346 bic syndrome, syndrome, data1 347 bne 4f 348 ands syndrome, syndrome, magic1, lsl #7 349 ldreq data2, [src2], #4 350 bne 5f 351 eor tmp2, tmp2, data1 352 cmp tmp2, data2, S2HI #8 353 bne 6f 354 ldr data1, [src1], #4 355 b .Loverlap1 3564: 357 S2LO data2, data2, #24 358 b .Lstrcmp_tail 3595: 360 /* The syndrome value may contain false ones if the string ends 361 with the bytes 0x01 0x00. */ 362 tst data1, #LSB 363 beq .Lstrcmp_done_equal 364 ldr data2, [src2], #4 3656: 366 S2LO tmp2, data1, #8 367 bic data2, data2, #MSB 368 b .Lstrcmp_tail 369.Lstrcmp_done_equal: 370 mov result, #0 371 .cfi_remember_state 372 ldmfd sp!, {r4, r5} 373 .cfi_restore 4 374 .cfi_restore 5 375 .cfi_def_cfa_offset 0 376 RETURN 377 378.Lstrcmp_tail: 379 .cfi_restore_state 380 and r2, tmp2, #LSB 381 and result, data2, #LSB 382 cmp result, #1 383 cmpcs result, r2 384 S2LOEQ tmp2, tmp2, #8 385 S2LOEQ data2, data2, #8 386 beq .Lstrcmp_tail 387 sub result, r2, result 388 ldmfd sp!, {r4, r5} 389 .cfi_restore 4 390 .cfi_restore 5 391 .cfi_def_cfa_offset 0 392 RETURN 393 .cfi_endproc 394 .size strcmp, . - strcmp 395