1/* 2 * Copyright (c) 2012-2014 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29/* Very similar to the generic code, but uses Thumb2 as implemented 30 in ARMv7-M. */ 31 32#include <picolibc.h> 33 34#include "arm_asm.h" 35 36/* Parameters and result. */ 37#define src1 r0 38#define src2 r1 39#define result r0 /* Overlaps src1. */ 40 41/* Internal variables. */ 42#define data1 r2 43#define data2 r3 44#define tmp2 r5 45#define tmp1 r12 46#define syndrome r12 /* Overlaps tmp1 */ 47 48 .thumb 49 .syntax unified 50def_fn strcmp 51 .fnstart 52 .cfi_sections .debug_frame 53 .cfi_startproc 54 prologue push_ip=HAVE_PAC_LEAF 55 eor tmp1, src1, src2 56 tst tmp1, #3 57 /* Strings not at same byte offset from a word boundary. */ 58 bne .Lstrcmp_unaligned 59 ands tmp1, src1, #3 60 bic src1, src1, #3 61 bic src2, src2, #3 62 ldr data1, [src1], #4 63 it eq 64 ldreq data2, [src2], #4 65 beq 4f 66 /* Although s1 and s2 have identical initial alignment, they are 67 not currently word aligned. Rather than comparing bytes, 68 make sure that any bytes fetched from before the addressed 69 bytes are forced to 0xff. Then they will always compare 70 equal. */ 71 eor tmp1, tmp1, #3 72 mvn data2, #MSB 73 lsl tmp1, tmp1, #3 74 S2LO tmp1, data2, tmp1 75 ldr data2, [src2], #4 76 orr data1, data1, tmp1 77 orr data2, data2, tmp1 78 .p2align 2 79 /* Critical loop. */ 804: 81 sub syndrome, data1, #0x01010101 82 cmp data1, data2 83 /* check for any zero bytes in first word */ 84 itttt eq 85 biceq syndrome, syndrome, data1 86 tsteq syndrome, #0x80808080 87 ldreq data1, [src1], #4 88 ldreq data2, [src2], #4 89 beq 4b 902: 91 .cfi_remember_state 92 /* There's a zero or a different byte in the word */ 93 S2HI result, data1, #24 94 S2LO data1, data1, #8 95 cmp result, #1 96 it cs 97 cmpcs result, data2, S2HI #24 98 it eq 99 S2LOEQ data2, data2, #8 100 beq 2b 101 /* On a big-endian machine, RESULT contains the desired byte in bits 102 0-7; on a little-endian machine they are in bits 24-31. In 103 both cases the other bits in RESULT are all zero. For DATA2 the 104 interesting byte is at the other end of the word, but the 105 other bits are not necessarily zero. We need a signed result 106 representing the differnece in the unsigned bytes, so for the 107 little-endian case we can't just shift the interesting bits 108 up. */ 109#ifdef __ARM_BIG_ENDIAN 110 sub result, result, data2, lsr #24 111#else 112 and data2, data2, #255 113 lsrs result, result, #24 114 subs result, result, data2 115#endif 116 epilogue push_ip=HAVE_PAC_LEAF 117 118 119#if 0 120 /* The assembly code below is based on the following alogrithm. */ 121#ifdef __ARM_BIG_ENDIAN 122#define RSHIFT << 123#define LSHIFT >> 124#else 125#define RSHIFT >> 126#define LSHIFT << 127#endif 128 129#define body(shift) \ 130 mask = 0xffffffffU RSHIFT shift; \ 131 data1 = *src1++; \ 132 data2 = *src2++; \ 133 do \ 134 { \ 135 tmp2 = data1 & mask; \ 136 if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0)) \ 137 { \ 138 data2 RSHIFT= shift; \ 139 break; \ 140 } \ 141 if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0)) \ 142 { \ 143 /* See comment in assembler below re syndrome on big-endian */\ 144 if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask) \ 145 data2 RSHIFT= shift; \ 146 else \ 147 { \ 148 data2 = *src2; \ 149 tmp2 = data1 RSHIFT (32 - shift); \ 150 data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ 151 } \ 152 break; \ 153 } \ 154 data2 = *src2++; \ 155 tmp2 ^= data1; \ 156 if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0)) \ 157 { \ 158 tmp2 = data1 >> (32 - shift); \ 159 data2 = (data2 << (32 - shift)) RSHIFT (32 - shift); \ 160 break; \ 161 } \ 162 data1 = *src1++; \ 163 } while (1) 164 165 const unsigned* src1; 166 const unsigned* src2; 167 unsigned data1, data2; 168 unsigned mask; 169 unsigned shift; 170 unsigned b1 = 0x01010101; 171 char c1, c2; 172 unsigned tmp2; 173 174 while (((unsigned) s1) & 3) 175 { 176 c1 = *s1++; 177 c2 = *s2++; 178 if (c1 == 0 || c1 != c2) 179 return c1 - (int)c2; 180 } 181 src1 = (unsigned*) (((unsigned)s1) & ~3); 182 src2 = (unsigned*) (((unsigned)s2) & ~3); 183 tmp2 = ((unsigned) s2) & 3; 184 if (tmp2 == 1) 185 { 186 body(8); 187 } 188 else if (tmp2 == 2) 189 { 190 body(16); 191 } 192 else 193 { 194 body (24); 195 } 196 197 do 198 { 199#ifdef __ARM_BIG_ENDIAN 200 c1 = (char) tmp2 >> 24; 201 c2 = (char) data2 >> 24; 202#else /* not __ARM_BIG_ENDIAN */ 203 c1 = (char) tmp2; 204 c2 = (char) data2; 205#endif /* not __ARM_BIG_ENDIAN */ 206 tmp2 RSHIFT= 8; 207 data2 RSHIFT= 8; 208 } while (c1 != 0 && c1 == c2); 209 return c1 - c2; 210#endif /* 0 */ 211 212 213 /* First of all, compare bytes until src1(sp1) is word-aligned. */ 214.Lstrcmp_unaligned: 215 .cfi_restore_state 216 tst src1, #3 217 beq 2f 218 .cfi_remember_state 219 ldrb data1, [src1], #1 220 ldrb data2, [src2], #1 221 cmp data1, #1 222 it cs 223 cmpcs data1, data2 224 beq .Lstrcmp_unaligned 225 sub result, data1, data2 226 epilogue push_ip=HAVE_PAC_LEAF 227 2282: 229 .cfi_restore_state 230 stmfd sp!, {r5} 231 .cfi_adjust_cfa_offset 4 232 .cfi_rel_offset 5, 0 233 234 ldr data1, [src1], #4 235 and tmp2, src2, #3 236 bic src2, src2, #3 237 ldr data2, [src2], #4 238 cmp tmp2, #2 239 beq .Loverlap2 240 bhi .Loverlap1 241 242 /* Critical inner Loop: Block with 3 bytes initial overlap */ 243 .p2align 2 244.Loverlap3: 245 bic tmp2, data1, #MSB 246 cmp tmp2, data2, S2LO #8 247 sub syndrome, data1, #0x01010101 248 bic syndrome, syndrome, data1 249 bne 4f 250 ands syndrome, syndrome, #0x80808080 251 it eq 252 ldreq data2, [src2], #4 253 bne 5f 254 eor tmp2, tmp2, data1 255 cmp tmp2, data2, S2HI #24 256 bne 6f 257 ldr data1, [src1], #4 258 b .Loverlap3 2594: 260 S2LO data2, data2, #8 261 b .Lstrcmp_tail 262 2635: 264#ifdef __ARM_BIG_ENDIAN 265 /* The syndrome value may contain false ones if the string ends 266 with the bytes 0x01 0x00. */ 267 tst data1, #0xff000000 268 itt ne 269 tstne data1, #0x00ff0000 270 tstne data1, #0x0000ff00 271 beq .Lstrcmp_done_equal 272#else 273 bics syndrome, syndrome, #0xff000000 274 bne .Lstrcmp_done_equal 275#endif 276 ldrb data2, [src2] 277 S2LO tmp2, data1, #24 278#ifdef __ARM_BIG_ENDIAN 279 lsl data2, data2, #24 280#endif 281 b .Lstrcmp_tail 282 2836: 284 S2LO tmp2, data1, #24 285 and data2, data2, #LSB 286 b .Lstrcmp_tail 287 288 /* Critical inner Loop: Block with 2 bytes initial overlap. */ 289 .p2align 2 290.Loverlap2: 291 S2HI tmp2, data1, #16 292 sub syndrome, data1, #0x01010101 293 S2LO tmp2, tmp2, #16 294 bic syndrome, syndrome, data1 295 cmp tmp2, data2, S2LO #16 296 bne 4f 297 ands syndrome, syndrome, #0x80808080 298 it eq 299 ldreq data2, [src2], #4 300 bne 5f 301 eor tmp2, tmp2, data1 302 cmp tmp2, data2, S2HI #16 303 bne 6f 304 ldr data1, [src1], #4 305 b .Loverlap2 306 3075: 308#ifdef __ARM_BIG_ENDIAN 309 /* The syndrome value may contain false ones if the string ends 310 with the bytes 0x01 0x00 */ 311 tst data1, #0xff000000 312 it ne 313 tstne data1, #0x00ff0000 314 beq .Lstrcmp_done_equal 315#else 316 lsls syndrome, syndrome, #16 317 bne .Lstrcmp_done_equal 318#endif 319 ldrh data2, [src2] 320 S2LO tmp2, data1, #16 321#ifdef __ARM_BIG_ENDIAN 322 lsl data2, data2, #16 323#endif 324 b .Lstrcmp_tail 325 3266: 327 S2HI data2, data2, #16 328 S2LO tmp2, data1, #16 3294: 330 S2LO data2, data2, #16 331 b .Lstrcmp_tail 332 333 /* Critical inner Loop: Block with 1 byte initial overlap. */ 334 .p2align 2 335.Loverlap1: 336 and tmp2, data1, #LSB 337 cmp tmp2, data2, S2LO #24 338 sub syndrome, data1, #0x01010101 339 bic syndrome, syndrome, data1 340 bne 4f 341 ands syndrome, syndrome, #0x80808080 342 it eq 343 ldreq data2, [src2], #4 344 bne 5f 345 eor tmp2, tmp2, data1 346 cmp tmp2, data2, S2HI #8 347 bne 6f 348 ldr data1, [src1], #4 349 b .Loverlap1 3504: 351 S2LO data2, data2, #24 352 b .Lstrcmp_tail 3535: 354 /* The syndrome value may contain false ones if the string ends 355 with the bytes 0x01 0x00. */ 356 tst data1, #LSB 357 beq .Lstrcmp_done_equal 358 ldr data2, [src2], #4 3596: 360 S2LO tmp2, data1, #8 361 bic data2, data2, #MSB 362 b .Lstrcmp_tail 363.Lstrcmp_done_equal: 364 mov result, #0 365 .cfi_remember_state 366 ldmfd sp!, {r5} 367 .cfi_restore 5 368 .cfi_adjust_cfa_offset -4 369 epilogue push_ip=HAVE_PAC_LEAF 370 371.Lstrcmp_tail: 372 .cfi_restore_state 373 and r2, tmp2, #LSB 374 and result, data2, #LSB 375 cmp result, #1 376 it cs 377 cmpcs result, r2 378 itt eq 379 S2LOEQ tmp2, tmp2, #8 380 S2LOEQ data2, data2, #8 381 beq .Lstrcmp_tail 382 sub result, r2, result 383 ldmfd sp!, {r5} 384 .cfi_restore 5 385 .cfi_adjust_cfa_offset -4 386 epilogue push_ip=HAVE_PAC_LEAF 387 .cfi_endproc 388 .cantunwind 389 .fnend 390 .size strcmp, . - strcmp 391