1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Basic ARM implementation. This should run on anything except 30 for ARMv6-M, but there are better implementations for later 31 revisions of the architecture. This version can support ARMv4T 32 ARM/Thumb interworking. */ 33/* Parameters and result. */ 34#include <picolibc.h> 35 36#define src1 r0 37#define src2 r1 38#define result r0 /* Overlaps src1. */ 39 40/* Internal variables. */ 41#define data1 r2 42#define data2 r3 43#define magic1 r4 44#define tmp2 r5 45#define tmp1 r12 46#define syndrome r12 /* Overlaps tmp1 */ 47 48/* For armv4t and newer, toolchains will transparently convert 49 'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support 50 for anything older than armv4t, but this should handle that 51 corner case in case anyone needs it anyway */ 52.macro RETURN 53#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0 54 mov pc, lr 55#else 56 bx lr 57#endif 58.endm 59 60 .arm 61def_fn strcmp 62 .cfi_sections .debug_frame 63 .cfi_startproc 64 eor tmp1, src1, src2 65 tst tmp1, #3 66 /* Strings not at same byte offset from a word boundary. */ 67 bne .Lstrcmp_unaligned 68 ands tmp1, src1, #3 69 bic src1, src1, #3 70 bic src2, src2, #3 71 ldr data1, [src1], #4 72 ldreq data2, [src2], #4 73 beq 1f 74 /* Although s1 and s2 have identical initial alignment, they are 75 not currently word aligned. Rather than comparing bytes, 76 make sure that any bytes fetched from before the addressed 77 bytes are forced to 0xff. Then they will always compare 78 equal. */ 79 eor tmp1, tmp1, #3 80 mvn data2, #MSB 81 lsl tmp1, tmp1, #3 82 S2LO tmp1, data2, tmp1 83 ldr data2, [src2], #4 84 orr data1, data1, tmp1 85 orr data2, data2, tmp1 861: 87 /* Load the 'magic' constant 0x01010101. */ 88 str r4, [sp, #-4]! 89 .cfi_def_cfa_offset 4 90 .cfi_offset 4, -4 91 mov magic1, #1 92 orr magic1, magic1, magic1, lsl #8 93 orr magic1, magic1, magic1, lsl #16 94 .p2align 2 954: 96 sub syndrome, data1, magic1 97 cmp data1, data2 98 /* check for any zero bytes in first word */ 99 biceq syndrome, syndrome, data1 100 tsteq syndrome, magic1, lsl #7 101 ldreq data1, [src1], #4 102 ldreq data2, [src2], #4 103 beq 4b 1042: 105 /* There's a zero or a different byte in the word */ 106 S2HI result, data1, #24 107 S2LO data1, data1, #8 108 cmp result, #1 109 cmpcs result, data2, S2HI #24 110 S2LOEQ data2, data2, #8 111 beq 2b 112 /* On a big-endian machine, RESULT contains the desired byte in bits 113 0-7; on a little-endian machine they are in bits 24-31. In 114 both cases the other bits in RESULT are all zero. For DATA2 the 115 interesting byte is at the other end of the word, but the 116 other bits are not necessarily zero. We need a signed result 117 representing the differnece in the unsigned bytes, so for the 118 little-endian case we can't just shift the interesting bits 119 up. */ 120#ifdef __ARM_BIG_ENDIAN 121 sub result, result, data2, lsr #24 122#else 123 and data2, data2, #255 124 rsb result, data2, result, lsr #24 125#endif 126 ldr r4, [sp], #4 127 .cfi_restore 4 128 .cfi_def_cfa_offset 0 129 RETURN 130 131 132#if 0 133 /* The assembly code below is based on the following alogrithm. */ 134#ifdef __ARM_BIG_ENDIAN 135#define RSHIFT << 136#define LSHIFT >> 137#else 138#define RSHIFT >> 139#define LSHIFT << 140#endif 141 142#define body(shift) \ 143 mask = 0xffffffffU RSHIFT shift; \ 144 data1 = *src1++; \ 145 data2 = *src2++; \ 146 do \ 147 { \ 148 tmp2 = data1 & mask; \ 149 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ 150 { \ 151 data2 RSHIFT= shift; \ 152 break; \ 153 } \ 154 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ 155 { \ 156 /* See comment in assembler below re syndrome on big-endian */\ 157 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ 158 data2 RSHIFT= shift; \ 159 else \ 160 { \ 161 data2 = *src2; \ 162 tmp2 = data1 RSHIFT (32 - shift); \ 163 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ 164 } \ 165 break; \ 166 } \ 167 data2 = *src2++; \ 168 tmp2 ^= data1; \ 169 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ 170 { \ 171 tmp2 = data1 >> (32 - shift); \ 172 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ 173 break; \ 174 } \ 175 data1 = *src1++; \ 176 } while (1) 177 178 const unsigned* src1; 179 const unsigned* src2; 180 unsigned data1, data2; 181 unsigned mask; 182 unsigned shift; 183 unsigned b1 = 0x01010101; 184 char c1, c2; 185 unsigned tmp2; 186 187 while (((unsigned) s1) & 3) 188 { 189 c1 = *s1++; 190 c2 = *s2++; 191 if (c1 == 0 || c1 != c2) 192 return c1 - (int)c2; 193 } 194 src1 = (unsigned*) (((unsigned)s1) & ~3); 195 src2 = (unsigned*) (((unsigned)s2) & ~3); 196 tmp2 = ((unsigned) s2) & 3; 197 if (tmp2 == 1) 198 { 199 body(8); 200 } 201 else if (tmp2 == 2) 202 { 203 body(16); 204 } 205 else 206 { 207 body (24); 208 } 209 210 do 211 { 212#ifdef __ARM_BIG_ENDIAN 213 c1 = (char) tmp2 >> 24; 214 c2 = (char) data2 >> 24; 215#else /* not __ARM_BIG_ENDIAN */ 216 c1 = (char) tmp2; 217 c2 = (char) data2; 218#endif /* not __ARM_BIG_ENDIAN */ 219 tmp2 RSHIFT= 8; 220 data2 RSHIFT= 8; 221 } while (c1 != 0 && c1 == c2); 222 return c1 - c2; 223#endif /* 0 */ 224 225 226 /* First of all, compare bytes until src1(sp1) is word-aligned. */ 227.Lstrcmp_unaligned: 228 tst src1, #3 229 beq 2f 230 ldrb data1, [src1], #1 231 ldrb data2, [src2], #1 232 cmp data1, #1 233 cmpcs data1, data2 234 beq .Lstrcmp_unaligned 235 sub result, data1, data2 236 RETURN 237 2382: 239 stmfd sp!, {r4, r5} 240 .cfi_def_cfa_offset 8 241 .cfi_offset 4, -8 242 .cfi_offset 5, -4 243 mov magic1, #1 244 orr magic1, magic1, magic1, lsl #8 245 orr magic1, magic1, magic1, lsl #16 246 247 ldr data1, [src1], #4 248 and tmp2, src2, #3 249 bic src2, src2, #3 250 ldr data2, [src2], #4 251 cmp tmp2, #2 252 beq .Loverlap2 253 bhi .Loverlap1 254 255 /* Critical inner Loop: Block with 3 bytes initial overlap */ 256 .p2align 2 257.Loverlap3: 258 bic tmp2, data1, #MSB 259 cmp tmp2, data2, S2LO #8 260 sub syndrome, data1, magic1 261 bic syndrome, syndrome, data1 262 bne 4f 263 ands syndrome, syndrome, magic1, lsl #7 264 ldreq data2, [src2], #4 265 bne 5f 266 eor tmp2, tmp2, data1 267 cmp tmp2, data2, S2HI #24 268 bne 6f 269 ldr data1, [src1], #4 270 b .Loverlap3 2714: 272 S2LO data2, data2, #8 273 b .Lstrcmp_tail 274 2755: 276#ifdef __ARM_BIG_ENDIAN 277 /* The syndrome value may contain false ones if the string ends 278 with the bytes 0x01 0x00. */ 279 tst data1, #0xff000000 280 tstne data1, #0x00ff0000 281 tstne data1, #0x0000ff00 282 beq .Lstrcmp_done_equal 283#else 284 bics syndrome, syndrome, #0xff000000 285 bne .Lstrcmp_done_equal 286#endif 287 ldrb data2, [src2] 288 S2LO tmp2, data1, #24 289#ifdef __ARM_BIG_ENDIAN 290 lsl data2, data2, #24 291#endif 292 b .Lstrcmp_tail 293 2946: 295 S2LO tmp2, data1, #24 296 and data2, data2, #LSB 297 b .Lstrcmp_tail 298 299 /* Critical inner Loop: Block with 2 bytes initial overlap. */ 300 .p2align 2 301.Loverlap2: 302 S2HI tmp2, data1, #16 303 sub syndrome, data1, magic1 304 S2LO tmp2, tmp2, #16 305 bic syndrome, syndrome, data1 306 cmp tmp2, data2, S2LO #16 307 bne 4f 308 ands syndrome, syndrome, magic1, lsl #7 309 ldreq data2, [src2], #4 310 bne 5f 311 eor tmp2, tmp2, data1 312 cmp tmp2, data2, S2HI #16 313 bne 6f 314 ldr data1, [src1], #4 315 b .Loverlap2 316 3175: 318#ifdef __ARM_BIG_ENDIAN 319 /* The syndrome value may contain false ones if the string ends 320 with the bytes 0x01 0x00 */ 321 tst data1, #0xff000000 322 tstne data1, #0x00ff0000 323 beq .Lstrcmp_done_equal 324#else 325 lsls syndrome, syndrome, #16 326 bne .Lstrcmp_done_equal 327#endif 328 ldrh data2, [src2] 329 S2LO tmp2, data1, #16 330#ifdef __ARM_BIG_ENDIAN 331 lsl data2, data2, #16 332#endif 333 b .Lstrcmp_tail 334 3356: 336 S2HI data2, data2, #16 337 S2LO tmp2, data1, #16 3384: 339 S2LO data2, data2, #16 340 b .Lstrcmp_tail 341 342 /* Critical inner Loop: Block with 1 byte initial overlap. */ 343 .p2align 2 344.Loverlap1: 345 and tmp2, data1, #LSB 346 cmp tmp2, data2, S2LO #24 347 sub syndrome, data1, magic1 348 bic syndrome, syndrome, data1 349 bne 4f 350 ands syndrome, syndrome, magic1, lsl #7 351 ldreq data2, [src2], #4 352 bne 5f 353 eor tmp2, tmp2, data1 354 cmp tmp2, data2, S2HI #8 355 bne 6f 356 ldr data1, [src1], #4 357 b .Loverlap1 3584: 359 S2LO data2, data2, #24 360 b .Lstrcmp_tail 3615: 362 /* The syndrome value may contain false ones if the string ends 363 with the bytes 0x01 0x00. */ 364 tst data1, #LSB 365 beq .Lstrcmp_done_equal 366 ldr data2, [src2], #4 3676: 368 S2LO tmp2, data1, #8 369 bic data2, data2, #MSB 370 b .Lstrcmp_tail 371.Lstrcmp_done_equal: 372 mov result, #0 373 .cfi_remember_state 374 ldmfd sp!, {r4, r5} 375 .cfi_restore 4 376 .cfi_restore 5 377 .cfi_def_cfa_offset 0 378 RETURN 379 380.Lstrcmp_tail: 381 .cfi_restore_state 382 and r2, tmp2, #LSB 383 and result, data2, #LSB 384 cmp result, #1 385 cmpcs result, r2 386 S2LOEQ tmp2, tmp2, #8 387 S2LOEQ data2, data2, #8 388 beq .Lstrcmp_tail 389 sub result, r2, result 390 ldmfd sp!, {r4, r5} 391 .cfi_restore 4 392 .cfi_restore 5 393 .cfi_def_cfa_offset 0 394 RETURN 395 .cfi_endproc 396 .size strcmp, . - strcmp 397