1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* Very similar to the generic code, but uses Thumb2 as implemented 30 in ARMv7-M. */ 31 32#include "arm_asm.h" 33 34/* Parameters and result. */ 35#define src1 r0 36#define src2 r1 37#define result r0 /* Overlaps src1. */ 38 39/* Internal variables. */ 40#define data1 r2 41#define data2 r3 42#define tmp2 r5 43#define tmp1 r12 44#define syndrome r12 /* Overlaps tmp1 */ 45 46 .thumb 47 .syntax unified 48def_fn strcmp 49 .fnstart 50 .cfi_sections .debug_frame 51 .cfi_startproc 52 prologue push_ip=HAVE_PAC_LEAF 53 eor tmp1, src1, src2 54 tst tmp1, #3 55 /* Strings not at same byte offset from a word boundary. */ 56 bne .Lstrcmp_unaligned 57 ands tmp1, src1, #3 58 bic src1, src1, #3 59 bic src2, src2, #3 60 ldr data1, [src1], #4 61 it eq 62 ldreq data2, [src2], #4 63 beq 4f 64 /* Although s1 and s2 have identical initial alignment, they are 65 not currently word aligned. Rather than comparing bytes, 66 make sure that any bytes fetched from before the addressed 67 bytes are forced to 0xff. Then they will always compare 68 equal. */ 69 eor tmp1, tmp1, #3 70 mvn data2, #MSB 71 lsl tmp1, tmp1, #3 72 S2LO tmp1, data2, tmp1 73 ldr data2, [src2], #4 74 orr data1, data1, tmp1 75 orr data2, data2, tmp1 76 .p2align 2 77 /* Critical loop. */ 784: 79 sub syndrome, data1, #0x01010101 80 cmp data1, data2 81 /* check for any zero bytes in first word */ 82 itttt eq 83 biceq syndrome, syndrome, data1 84 tsteq syndrome, #0x80808080 85 ldreq data1, [src1], #4 86 ldreq data2, [src2], #4 87 beq 4b 882: 89 .cfi_remember_state 90 /* There's a zero or a different byte in the word */ 91 S2HI result, data1, #24 92 S2LO data1, data1, #8 93 cmp result, #1 94 it cs 95 cmpcs result, data2, S2HI #24 96 it eq 97 S2LOEQ data2, data2, #8 98 beq 2b 99 /* On a big-endian machine, RESULT contains the desired byte in bits 100 0-7; on a little-endian machine they are in bits 24-31. In 101 both cases the other bits in RESULT are all zero. For DATA2 the 102 interesting byte is at the other end of the word, but the 103 other bits are not necessarily zero. We need a signed result 104 representing the differnece in the unsigned bytes, so for the 105 little-endian case we can't just shift the interesting bits 106 up. */ 107#ifdef __ARM_BIG_ENDIAN 108 sub result, result, data2, lsr #24 109#else 110 and data2, data2, #255 111 lsrs result, result, #24 112 subs result, result, data2 113#endif 114 epilogue push_ip=HAVE_PAC_LEAF 115 116 117#if 0 118 /* The assembly code below is based on the following alogrithm. */ 119#ifdef __ARM_BIG_ENDIAN 120#define RSHIFT << 121#define LSHIFT >> 122#else 123#define RSHIFT >> 124#define LSHIFT << 125#endif 126 127#define body(shift) \ 128 mask = 0xffffffffU RSHIFT shift; \ 129 data1 = *src1++; \ 130 data2 = *src2++; \ 131 do \ 132 { \ 133 tmp2 = data1 & mask; \ 134 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ 135 { \ 136 data2 RSHIFT= shift; \ 137 break; \ 138 } \ 139 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ 140 { \ 141 /* See comment in assembler below re syndrome on big-endian */\ 142 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ 143 data2 RSHIFT= shift; \ 144 else \ 145 { \ 146 data2 = *src2; \ 147 tmp2 = data1 RSHIFT (32 - shift); \ 148 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ 149 } \ 150 break; \ 151 } \ 152 data2 = *src2++; \ 153 tmp2 ^= data1; \ 154 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ 155 { \ 156 tmp2 = data1 >> (32 - shift); \ 157 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ 158 break; \ 159 } \ 160 data1 = *src1++; \ 161 } while (1) 162 163 const unsigned* src1; 164 const unsigned* src2; 165 unsigned data1, data2; 166 unsigned mask; 167 unsigned shift; 168 unsigned b1 = 0x01010101; 169 char c1, c2; 170 unsigned tmp2; 171 172 while (((unsigned) s1) & 3) 173 { 174 c1 = *s1++; 175 c2 = *s2++; 176 if (c1 == 0 || c1 != c2) 177 return c1 - (int)c2; 178 } 179 src1 = (unsigned*) (((unsigned)s1) & ~3); 180 src2 = (unsigned*) (((unsigned)s2) & ~3); 181 tmp2 = ((unsigned) s2) & 3; 182 if (tmp2 == 1) 183 { 184 body(8); 185 } 186 else if (tmp2 == 2) 187 { 188 body(16); 189 } 190 else 191 { 192 body (24); 193 } 194 195 do 196 { 197#ifdef __ARM_BIG_ENDIAN 198 c1 = (char) tmp2 >> 24; 199 c2 = (char) data2 >> 24; 200#else /* not __ARM_BIG_ENDIAN */ 201 c1 = (char) tmp2; 202 c2 = (char) data2; 203#endif /* not __ARM_BIG_ENDIAN */ 204 tmp2 RSHIFT= 8; 205 data2 RSHIFT= 8; 206 } while (c1 != 0 && c1 == c2); 207 return c1 - c2; 208#endif /* 0 */ 209 210 211 /* First of all, compare bytes until src1(sp1) is word-aligned. */ 212.Lstrcmp_unaligned: 213 .cfi_restore_state 214 tst src1, #3 215 beq 2f 216 .cfi_remember_state 217 ldrb data1, [src1], #1 218 ldrb data2, [src2], #1 219 cmp data1, #1 220 it cs 221 cmpcs data1, data2 222 beq .Lstrcmp_unaligned 223 sub result, data1, data2 224 epilogue push_ip=HAVE_PAC_LEAF 225 2262: 227 .cfi_restore_state 228 stmfd sp!, {r5} 229 .cfi_adjust_cfa_offset 4 230 .cfi_rel_offset 5, 0 231 232 ldr data1, [src1], #4 233 and tmp2, src2, #3 234 bic src2, src2, #3 235 ldr data2, [src2], #4 236 cmp tmp2, #2 237 beq .Loverlap2 238 bhi .Loverlap1 239 240 /* Critical inner Loop: Block with 3 bytes initial overlap */ 241 .p2align 2 242.Loverlap3: 243 bic tmp2, data1, #MSB 244 cmp tmp2, data2, S2LO #8 245 sub syndrome, data1, #0x01010101 246 bic syndrome, syndrome, data1 247 bne 4f 248 ands syndrome, syndrome, #0x80808080 249 it eq 250 ldreq data2, [src2], #4 251 bne 5f 252 eor tmp2, tmp2, data1 253 cmp tmp2, data2, S2HI #24 254 bne 6f 255 ldr data1, [src1], #4 256 b .Loverlap3 2574: 258 S2LO data2, data2, #8 259 b .Lstrcmp_tail 260 2615: 262#ifdef __ARM_BIG_ENDIAN 263 /* The syndrome value may contain false ones if the string ends 264 with the bytes 0x01 0x00. */ 265 tst data1, #0xff000000 266 itt ne 267 tstne data1, #0x00ff0000 268 tstne data1, #0x0000ff00 269 beq .Lstrcmp_done_equal 270#else 271 bics syndrome, syndrome, #0xff000000 272 bne .Lstrcmp_done_equal 273#endif 274 ldrb data2, [src2] 275 S2LO tmp2, data1, #24 276#ifdef __ARM_BIG_ENDIAN 277 lsl data2, data2, #24 278#endif 279 b .Lstrcmp_tail 280 2816: 282 S2LO tmp2, data1, #24 283 and data2, data2, #LSB 284 b .Lstrcmp_tail 285 286 /* Critical inner Loop: Block with 2 bytes initial overlap. */ 287 .p2align 2 288.Loverlap2: 289 S2HI tmp2, data1, #16 290 sub syndrome, data1, #0x01010101 291 S2LO tmp2, tmp2, #16 292 bic syndrome, syndrome, data1 293 cmp tmp2, data2, S2LO #16 294 bne 4f 295 ands syndrome, syndrome, #0x80808080 296 it eq 297 ldreq data2, [src2], #4 298 bne 5f 299 eor tmp2, tmp2, data1 300 cmp tmp2, data2, S2HI #16 301 bne 6f 302 ldr data1, [src1], #4 303 b .Loverlap2 304 3055: 306#ifdef __ARM_BIG_ENDIAN 307 /* The syndrome value may contain false ones if the string ends 308 with the bytes 0x01 0x00 */ 309 tst data1, #0xff000000 310 it ne 311 tstne data1, #0x00ff0000 312 beq .Lstrcmp_done_equal 313#else 314 lsls syndrome, syndrome, #16 315 bne .Lstrcmp_done_equal 316#endif 317 ldrh data2, [src2] 318 S2LO tmp2, data1, #16 319#ifdef __ARM_BIG_ENDIAN 320 lsl data2, data2, #16 321#endif 322 b .Lstrcmp_tail 323 3246: 325 S2HI data2, data2, #16 326 S2LO tmp2, data1, #16 3274: 328 S2LO data2, data2, #16 329 b .Lstrcmp_tail 330 331 /* Critical inner Loop: Block with 1 byte initial overlap. */ 332 .p2align 2 333.Loverlap1: 334 and tmp2, data1, #LSB 335 cmp tmp2, data2, S2LO #24 336 sub syndrome, data1, #0x01010101 337 bic syndrome, syndrome, data1 338 bne 4f 339 ands syndrome, syndrome, #0x80808080 340 it eq 341 ldreq data2, [src2], #4 342 bne 5f 343 eor tmp2, tmp2, data1 344 cmp tmp2, data2, S2HI #8 345 bne 6f 346 ldr data1, [src1], #4 347 b .Loverlap1 3484: 349 S2LO data2, data2, #24 350 b .Lstrcmp_tail 3515: 352 /* The syndrome value may contain false ones if the string ends 353 with the bytes 0x01 0x00. */ 354 tst data1, #LSB 355 beq .Lstrcmp_done_equal 356 ldr data2, [src2], #4 3576: 358 S2LO tmp2, data1, #8 359 bic data2, data2, #MSB 360 b .Lstrcmp_tail 361.Lstrcmp_done_equal: 362 mov result, #0 363 .cfi_remember_state 364 ldmfd sp!, {r5} 365 .cfi_restore 5 366 .cfi_adjust_cfa_offset -4 367 epilogue push_ip=HAVE_PAC_LEAF 368 369.Lstrcmp_tail: 370 .cfi_restore_state 371 and r2, tmp2, #LSB 372 and result, data2, #LSB 373 cmp result, #1 374 it cs 375 cmpcs result, r2 376 itt eq 377 S2LOEQ tmp2, tmp2, #8 378 S2LOEQ data2, data2, #8 379 beq .Lstrcmp_tail 380 sub result, r2, result 381 ldmfd sp!, {r5} 382 .cfi_restore 5 383 .cfi_adjust_cfa_offset -4 384 epilogue push_ip=HAVE_PAC_LEAF 385 .cfi_endproc 386 .cantunwind 387 .fnend 388 .size strcmp, . - strcmp 389