1/* memcmp - compare memory
2
3   Copyright (c) 2018 Linaro Limited
4   All rights reserved.
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of the Linaro nor the
14         names of its contributors may be used to endorse or promote products
15         derived from this software without specific prior written permission.
16
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
28
29/*
30 * Copyright (c) 2017 ARM Ltd
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. The name of the company may not be used to endorse or promote
42 *    products derived from this software without specific prior written
43 *    permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
46 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
47 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
48 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
50 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
51 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
52 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
53 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
54 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55 */
56
57#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__)
58/* See memcmp-stub.c  */
59#else
60
61/* Assumptions:
62 *
63 * ARMv8-a, AArch64, unaligned accesses.
64 */
65
66#define L(l) .L ## l
67
68/* Parameters and result.  */
69#define src1		x0
70#define src2		x1
71#define limit		x2
72#define result		w0
73
74/* Internal variables.  */
75#define data1		x3
76#define data1w		w3
77#define data1h		x4
78#define data2		x5
79#define data2w		w5
80#define data2h		x6
81#define tmp1		x7
82#define tmp2		x8
83
84        .macro def_fn f p2align=0
85        .text
86        .p2align \p2align
87        .global \f
88        .type \f, %function
89\f:
90        .endm
91
92def_fn memcmp p2align=6
93	subs	limit, limit, 8
94	b.lo	L(less8)
95
96	ldr	data1, [src1], 8
97	ldr	data2, [src2], 8
98	cmp	data1, data2
99	b.ne	L(return)
100
101	subs	limit, limit, 8
102	b.gt	L(more16)
103
104	ldr	data1, [src1, limit]
105	ldr	data2, [src2, limit]
106	b	L(return)
107
108L(more16):
109	ldr	data1, [src1], 8
110	ldr	data2, [src2], 8
111	cmp	data1, data2
112	bne	L(return)
113
114	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
115	   strings.  */
116	subs	limit, limit, 16
117	b.ls	L(last_bytes)
118
119	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
120	   try to align, so limit it only to strings larger than 128 bytes.  */
121	cmp	limit, 96
122	b.ls	L(loop16)
123
124	/* Align src1 and adjust src2 with bytes not yet done.  */
125	and	tmp1, src1, 15
126	add	limit, limit, tmp1
127	sub	src1, src1, tmp1
128	sub	src2, src2, tmp1
129
130	/* Loop performing 16 bytes per iteration using aligned src1.
131	   Limit is pre-decremented by 16 and must be larger than zero.
132	   Exit if <= 16 bytes left to do or if the data is not equal.  */
133	.p2align 4
134L(loop16):
135	ldp	data1, data1h, [src1], 16
136	ldp	data2, data2h, [src2], 16
137	subs	limit, limit, 16
138	ccmp	data1, data2, 0, hi
139	ccmp	data1h, data2h, 0, eq
140	b.eq	L(loop16)
141
142	cmp	data1, data2
143	bne	L(return)
144	mov	data1, data1h
145	mov	data2, data2h
146	cmp	data1, data2
147	bne	L(return)
148
149	/* Compare last 1-16 bytes using unaligned access.  */
150L(last_bytes):
151	add	src1, src1, limit
152	add	src2, src2, limit
153	ldp	data1, data1h, [src1]
154	ldp	data2, data2h, [src2]
155	cmp     data1, data2
156	bne	L(return)
157	mov	data1, data1h
158	mov	data2, data2h
159	cmp	data1, data2
160
161	/* Compare data bytes and set return value to 0, -1 or 1.  */
162L(return):
163#ifndef __AARCH64EB__
164	rev	data1, data1
165	rev	data2, data2
166#endif
167	cmp     data1, data2
168L(ret_eq):
169	cset	result, ne
170	cneg	result, result, lo
171	ret
172
173	.p2align 4
174	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
175L(less8):
176	adds	limit, limit, 4
177	b.lo	L(less4)
178	ldr	data1w, [src1], 4
179	ldr	data2w, [src2], 4
180	cmp	data1w, data2w
181	b.ne	L(return)
182	sub	limit, limit, 4
183L(less4):
184	adds	limit, limit, 4
185	beq	L(ret_eq)
186L(byte_loop):
187	ldrb	data1w, [src1], 1
188	ldrb	data2w, [src2], 1
189	subs	limit, limit, 1
190	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
191	b.eq	L(byte_loop)
192	sub	result, data1w, data2w
193	ret
194
195	.size	memcmp, . - memcmp
196#endif
197