1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8#include <picolibc.h> 9 10#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 11/* See strcmp-stub.c */ 12#else 13 14/* Assumptions: 15 * 16 * ARMv8-a, AArch64. 17 * MTE compatible. 18 */ 19 20#include "asmdefs.h" 21 22#define REP8_01 0x0101010101010101 23#define REP8_7f 0x7f7f7f7f7f7f7f7f 24 25#define src1 x0 26#define src2 x1 27#define result x0 28 29#define data1 x2 30#define data1w w2 31#define data2 x3 32#define data2w w3 33#define has_nul x4 34#define diff x5 35#define off1 x5 36#define syndrome x6 37#define tmp x6 38#define data3 x7 39#define zeroones x8 40#define shift x9 41#define off2 x10 42 43/* On big-endian early bytes are at MSB and on little-endian LSB. 44 LS_FW means shifting towards early bytes. */ 45#ifdef __AARCH64EB__ 46# define LS_FW lsl 47#else 48# define LS_FW lsr 49#endif 50 51/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 52 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 53 can be done in parallel across the entire word. 54 Since carry propagation makes 0x1 bytes before a NUL byte appear 55 NUL too in big-endian, byte-reverse the data before the NUL check. */ 56 57 58ENTRY (strcmp) 59 PTR_ARG (0) 60 PTR_ARG (1) 61 sub off2, src2, src1 62 mov zeroones, REP8_01 63 and tmp, src1, 7 64 tst off2, 7 65 b.ne L(misaligned8) 66 cbnz tmp, L(mutual_align) 67 68 .p2align 4 69 70L(loop_aligned): 71 ldr data2, [src1, off2] 72 ldr data1, [src1], 8 73L(start_realigned): 74#ifdef __AARCH64EB__ 75 rev tmp, data1 76 sub has_nul, tmp, zeroones 77 orr tmp, tmp, REP8_7f 78#else 79 sub has_nul, data1, zeroones 80 orr tmp, data1, REP8_7f 81#endif 82 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 83 ccmp data1, data2, 0, eq 84 b.eq L(loop_aligned) 85#ifdef __AARCH64EB__ 86 rev has_nul, has_nul 87#endif 88 eor diff, data1, data2 89 orr syndrome, diff, has_nul 90L(end): 91#ifndef __AARCH64EB__ 92 rev syndrome, syndrome 93 rev data1, data1 94 rev data2, data2 95#endif 96 clz shift, syndrome 97 /* The most-significant-non-zero bit of the syndrome marks either the 98 first bit that is different, or the top bit of the first zero byte. 99 Shifting left now will bring the critical information into the 100 top bits. */ 101 lsl data1, data1, shift 102 lsl data2, data2, shift 103 /* But we need to zero-extend (char is unsigned) the value and then 104 perform a signed 32-bit subtraction. */ 105 lsr data1, data1, 56 106 sub result, data1, data2, lsr 56 107 ret 108 109 .p2align 4 110 111L(mutual_align): 112 /* Sources are mutually aligned, but are not currently at an 113 alignment boundary. Round down the addresses and then mask off 114 the bytes that precede the start point. */ 115 bic src1, src1, 7 116 ldr data2, [src1, off2] 117 ldr data1, [src1], 8 118 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 119 mov tmp, -1 120 LS_FW tmp, tmp, shift 121 orr data1, data1, tmp 122 orr data2, data2, tmp 123 b L(start_realigned) 124 125L(misaligned8): 126 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 127 checking to make sure that we don't access beyond the end of SRC2. */ 128 cbz tmp, L(src1_aligned) 129L(do_misaligned): 130 ldrb data1w, [src1], 1 131 ldrb data2w, [src2], 1 132 cmp data1w, 0 133 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 134 b.ne L(done) 135 tst src1, 7 136 b.ne L(do_misaligned) 137 138L(src1_aligned): 139 neg shift, src2, lsl 3 140 bic src2, src2, 7 141 ldr data3, [src2], 8 142#ifdef __AARCH64EB__ 143 rev data3, data3 144#endif 145 lsr tmp, zeroones, shift 146 orr data3, data3, tmp 147 sub has_nul, data3, zeroones 148 orr tmp, data3, REP8_7f 149 bics has_nul, has_nul, tmp 150 b.ne L(tail) 151 152 sub off1, src2, src1 153 154 .p2align 4 155 156L(loop_unaligned): 157 ldr data3, [src1, off1] 158 ldr data2, [src1, off2] 159#ifdef __AARCH64EB__ 160 rev data3, data3 161#endif 162 sub has_nul, data3, zeroones 163 orr tmp, data3, REP8_7f 164 ldr data1, [src1], 8 165 bics has_nul, has_nul, tmp 166 ccmp data1, data2, 0, eq 167 b.eq L(loop_unaligned) 168 169 lsl tmp, has_nul, shift 170#ifdef __AARCH64EB__ 171 rev tmp, tmp 172#endif 173 eor diff, data1, data2 174 orr syndrome, diff, tmp 175 cbnz syndrome, L(end) 176L(tail): 177 ldr data1, [src1] 178 neg shift, shift 179 lsr data2, data3, shift 180 lsr has_nul, has_nul, shift 181#ifdef __AARCH64EB__ 182 rev data2, data2 183 rev has_nul, has_nul 184#endif 185 eor diff, data1, data2 186 orr syndrome, diff, has_nul 187 b L(end) 188 189L(done): 190 sub result, data1, data2 191 ret 192 193END (strcmp) 194#endif 195