1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 9/* See strcmp-stub.c */ 10#else 11 12/* Assumptions: 13 * 14 * ARMv8-a, AArch64. 15 * MTE compatible. 16 */ 17 18#include "asmdefs.h" 19 20#define REP8_01 0x0101010101010101 21#define REP8_7f 0x7f7f7f7f7f7f7f7f 22 23#define src1 x0 24#define src2 x1 25#define result x0 26 27#define data1 x2 28#define data1w w2 29#define data2 x3 30#define data2w w3 31#define has_nul x4 32#define diff x5 33#define off1 x5 34#define syndrome x6 35#define tmp x6 36#define data3 x7 37#define zeroones x8 38#define shift x9 39#define off2 x10 40 41/* On big-endian early bytes are at MSB and on little-endian LSB. 42 LS_FW means shifting towards early bytes. */ 43#ifdef __AARCH64EB__ 44# define LS_FW lsl 45#else 46# define LS_FW lsr 47#endif 48 49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 50 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 51 can be done in parallel across the entire word. 52 Since carry propagation makes 0x1 bytes before a NUL byte appear 53 NUL too in big-endian, byte-reverse the data before the NUL check. */ 54 55 56ENTRY (strcmp) 57 PTR_ARG (0) 58 PTR_ARG (1) 59 sub off2, src2, src1 60 mov zeroones, REP8_01 61 and tmp, src1, 7 62 tst off2, 7 63 b.ne L(misaligned8) 64 cbnz tmp, L(mutual_align) 65 66 .p2align 4 67 68L(loop_aligned): 69 ldr data2, [src1, off2] 70 ldr data1, [src1], 8 71L(start_realigned): 72#ifdef __AARCH64EB__ 73 rev tmp, data1 74 sub has_nul, tmp, zeroones 75 orr tmp, tmp, REP8_7f 76#else 77 sub has_nul, data1, zeroones 78 orr tmp, data1, REP8_7f 79#endif 80 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 81 ccmp data1, data2, 0, eq 82 b.eq L(loop_aligned) 83#ifdef __AARCH64EB__ 84 rev has_nul, has_nul 85#endif 86 eor diff, data1, data2 87 orr syndrome, diff, has_nul 88L(end): 89#ifndef __AARCH64EB__ 90 rev syndrome, syndrome 91 rev data1, data1 92 rev data2, data2 93#endif 94 clz shift, syndrome 95 /* The most-significant-non-zero bit of the syndrome marks either the 96 first bit that is different, or the top bit of the first zero byte. 97 Shifting left now will bring the critical information into the 98 top bits. */ 99 lsl data1, data1, shift 100 lsl data2, data2, shift 101 /* But we need to zero-extend (char is unsigned) the value and then 102 perform a signed 32-bit subtraction. */ 103 lsr data1, data1, 56 104 sub result, data1, data2, lsr 56 105 ret 106 107 .p2align 4 108 109L(mutual_align): 110 /* Sources are mutually aligned, but are not currently at an 111 alignment boundary. Round down the addresses and then mask off 112 the bytes that precede the start point. */ 113 bic src1, src1, 7 114 ldr data2, [src1, off2] 115 ldr data1, [src1], 8 116 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 117 mov tmp, -1 118 LS_FW tmp, tmp, shift 119 orr data1, data1, tmp 120 orr data2, data2, tmp 121 b L(start_realigned) 122 123L(misaligned8): 124 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 125 checking to make sure that we don't access beyond the end of SRC2. */ 126 cbz tmp, L(src1_aligned) 127L(do_misaligned): 128 ldrb data1w, [src1], 1 129 ldrb data2w, [src2], 1 130 cmp data1w, 0 131 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 132 b.ne L(done) 133 tst src1, 7 134 b.ne L(do_misaligned) 135 136L(src1_aligned): 137 neg shift, src2, lsl 3 138 bic src2, src2, 7 139 ldr data3, [src2], 8 140#ifdef __AARCH64EB__ 141 rev data3, data3 142#endif 143 lsr tmp, zeroones, shift 144 orr data3, data3, tmp 145 sub has_nul, data3, zeroones 146 orr tmp, data3, REP8_7f 147 bics has_nul, has_nul, tmp 148 b.ne L(tail) 149 150 sub off1, src2, src1 151 152 .p2align 4 153 154L(loop_unaligned): 155 ldr data3, [src1, off1] 156 ldr data2, [src1, off2] 157#ifdef __AARCH64EB__ 158 rev data3, data3 159#endif 160 sub has_nul, data3, zeroones 161 orr tmp, data3, REP8_7f 162 ldr data1, [src1], 8 163 bics has_nul, has_nul, tmp 164 ccmp data1, data2, 0, eq 165 b.eq L(loop_unaligned) 166 167 lsl tmp, has_nul, shift 168#ifdef __AARCH64EB__ 169 rev tmp, tmp 170#endif 171 eor diff, data1, data2 172 orr syndrome, diff, tmp 173 cbnz syndrome, L(end) 174L(tail): 175 ldr data1, [src1] 176 neg shift, shift 177 lsr data2, data3, shift 178 lsr has_nul, has_nul, shift 179#ifdef __AARCH64EB__ 180 rev data2, data2 181 rev has_nul, has_nul 182#endif 183 eor diff, data1, data2 184 orr syndrome, diff, has_nul 185 b L(end) 186 187L(done): 188 sub result, data1, data2 189 ret 190 191END (strcmp) 192#endif 193