1/*
2 * strcmp - compare two strings
3 *
4 * Copyright (c) 2012-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__)
9/* See strcmp-stub.c  */
10#else
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64.
15 * MTE compatible.
16 */
17
18#include "asmdefs.h"
19
20#define REP8_01 0x0101010101010101
21#define REP8_7f 0x7f7f7f7f7f7f7f7f
22
23#define src1		x0
24#define src2		x1
25#define result		x0
26
27#define data1		x2
28#define data1w		w2
29#define data2		x3
30#define data2w		w3
31#define has_nul		x4
32#define diff		x5
33#define off1		x5
34#define syndrome	x6
35#define tmp		x6
36#define data3		x7
37#define zeroones	x8
38#define shift		x9
39#define off2		x10
40
41/* On big-endian early bytes are at MSB and on little-endian LSB.
42   LS_FW means shifting towards early bytes.  */
43#ifdef __AARCH64EB__
44# define LS_FW lsl
45#else
46# define LS_FW lsr
47#endif
48
49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
50   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
51   can be done in parallel across the entire word.
52   Since carry propagation makes 0x1 bytes before a NUL byte appear
53   NUL too in big-endian, byte-reverse the data before the NUL check.  */
54
55
56ENTRY (strcmp)
57	PTR_ARG (0)
58	PTR_ARG (1)
59	sub	off2, src2, src1
60	mov	zeroones, REP8_01
61	and	tmp, src1, 7
62	tst	off2, 7
63	b.ne	L(misaligned8)
64	cbnz	tmp, L(mutual_align)
65
66	.p2align 4
67
68L(loop_aligned):
69	ldr	data2, [src1, off2]
70	ldr	data1, [src1], 8
71L(start_realigned):
72#ifdef __AARCH64EB__
73	rev	tmp, data1
74	sub	has_nul, tmp, zeroones
75	orr	tmp, tmp, REP8_7f
76#else
77	sub	has_nul, data1, zeroones
78	orr	tmp, data1, REP8_7f
79#endif
80	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
81	ccmp	data1, data2, 0, eq
82	b.eq	L(loop_aligned)
83#ifdef __AARCH64EB__
84	rev	has_nul, has_nul
85#endif
86	eor	diff, data1, data2
87	orr	syndrome, diff, has_nul
88L(end):
89#ifndef __AARCH64EB__
90	rev	syndrome, syndrome
91	rev	data1, data1
92	rev	data2, data2
93#endif
94	clz	shift, syndrome
95	/* The most-significant-non-zero bit of the syndrome marks either the
96	   first bit that is different, or the top bit of the first zero byte.
97	   Shifting left now will bring the critical information into the
98	   top bits.  */
99	lsl	data1, data1, shift
100	lsl	data2, data2, shift
101	/* But we need to zero-extend (char is unsigned) the value and then
102	   perform a signed 32-bit subtraction.  */
103	lsr	data1, data1, 56
104	sub	result, data1, data2, lsr 56
105	ret
106
107	.p2align 4
108
109L(mutual_align):
110	/* Sources are mutually aligned, but are not currently at an
111	   alignment boundary.  Round down the addresses and then mask off
112	   the bytes that precede the start point.  */
113	bic	src1, src1, 7
114	ldr	data2, [src1, off2]
115	ldr	data1, [src1], 8
116	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
117	mov	tmp, -1
118	LS_FW	tmp, tmp, shift
119	orr	data1, data1, tmp
120	orr	data2, data2, tmp
121	b	L(start_realigned)
122
123L(misaligned8):
124	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
125	   checking to make sure that we don't access beyond the end of SRC2.  */
126	cbz	tmp, L(src1_aligned)
127L(do_misaligned):
128	ldrb	data1w, [src1], 1
129	ldrb	data2w, [src2], 1
130	cmp	data1w, 0
131	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
132	b.ne	L(done)
133	tst	src1, 7
134	b.ne	L(do_misaligned)
135
136L(src1_aligned):
137	neg	shift, src2, lsl 3
138	bic	src2, src2, 7
139	ldr	data3, [src2], 8
140#ifdef __AARCH64EB__
141	rev	data3, data3
142#endif
143	lsr	tmp, zeroones, shift
144	orr	data3, data3, tmp
145	sub	has_nul, data3, zeroones
146	orr	tmp, data3, REP8_7f
147	bics	has_nul, has_nul, tmp
148	b.ne	L(tail)
149
150	sub	off1, src2, src1
151
152	.p2align 4
153
154L(loop_unaligned):
155	ldr	data3, [src1, off1]
156	ldr	data2, [src1, off2]
157#ifdef __AARCH64EB__
158	rev	data3, data3
159#endif
160	sub	has_nul, data3, zeroones
161	orr	tmp, data3, REP8_7f
162	ldr	data1, [src1], 8
163	bics	has_nul, has_nul, tmp
164	ccmp	data1, data2, 0, eq
165	b.eq	L(loop_unaligned)
166
167	lsl	tmp, has_nul, shift
168#ifdef __AARCH64EB__
169	rev	tmp, tmp
170#endif
171	eor	diff, data1, data2
172	orr	syndrome, diff, tmp
173	cbnz	syndrome, L(end)
174L(tail):
175	ldr	data1, [src1]
176	neg	shift, shift
177	lsr	data2, data3, shift
178	lsr	has_nul, has_nul, shift
179#ifdef __AARCH64EB__
180	rev     data2, data2
181	rev	has_nul, has_nul
182#endif
183	eor	diff, data1, data2
184	orr	syndrome, diff, has_nul
185	b	L(end)
186
187L(done):
188	sub	result, data1, data2
189	ret
190
191END (strcmp)
192#endif
193