1/*
2 * Copyright (c) 2014
3 *      Imagination Technologies Limited.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 *    contributors may be used to endorse or promote products derived from
15 *    this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <picolibc.h>
31
32#ifdef ANDROID_CHANGES
33# include "machine/asm.h"
34# include "machine/regdef.h"
35#elif _LIBC
36# include "machine/asm.h"
37# include "machine/regdef.h"
38#else
39# include <regdef.h>
40# include <sys/asm.h>
41#endif
42
43/* Technically strcmp should not read past the end of the strings being
44   compared.  We will read a full word that may contain excess bits beyond
45   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
46   read the next word after the end of string.  Setting ENABLE_READAHEAD will
47   improve performance but is technically illegal based on the definition of
48   strcmp.  */
49#ifdef ENABLE_READAHEAD
50# define DELAY_READ
51#else
52# define DELAY_READ nop
53#endif
54
55/* Testing on a little endian machine showed using CLZ was a
56   performance loss, so we are not turning it on by default.  */
57#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
58# define USE_CLZ
59#endif
60
61/* Some asm.h files do not have the L macro definition.  */
62#ifndef L
63# if _MIPS_SIM == _ABIO32
64#  define L(label) $L ## label
65# else
66#  define L(label) .L ## label
67# endif
68#endif
69
70/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
71#ifndef PTR_ADDIU
72# ifdef USE_DOUBLE
73#  define PTR_ADDIU       daddiu
74# else
75#  define PTR_ADDIU       addiu
76# endif
77#endif
78
79/* Allow the routine to be named something else if desired.  */
80#ifndef STRCMP_NAME
81# define STRCMP_NAME strcmp
82#endif
83
84#ifdef ANDROID_CHANGES
85LEAF(STRCMP_NAME, 0)
86#else
87LEAF(STRCMP_NAME)
88#endif
89	.set	nomips16
90	.set	noreorder
91
92	or	t0, a0, a1
93	andi	t0,0x3
94	bne	t0, zero, L(byteloop)
95
96/* Both strings are 4 byte aligned at this point.  */
97
98	lui	t8, 0x0101
99	ori	t8, t8, 0x0101
100	lui	t9, 0x7f7f
101	ori	t9, 0x7f7f
102
103#define STRCMP32(OFFSET) \
104	lw	v0, OFFSET(a0); \
105	lw	v1, OFFSET(a1); \
106	subu	t0, v0, t8; \
107	bne	v0, v1, L(worddiff); \
108	nor	t1, v0, t9; \
109	and	t0, t0, t1; \
110	bne	t0, zero, L(returnzero)
111
112L(wordloop):
113	STRCMP32(0)
114	DELAY_READ
115	STRCMP32(4)
116	DELAY_READ
117	STRCMP32(8)
118	DELAY_READ
119	STRCMP32(12)
120	DELAY_READ
121	STRCMP32(16)
122	DELAY_READ
123	STRCMP32(20)
124	DELAY_READ
125	STRCMP32(24)
126	DELAY_READ
127	STRCMP32(28)
128	PTR_ADDIU a0, a0, 32
129	b	L(wordloop)
130	PTR_ADDIU a1, a1, 32
131
132L(returnzero):
133	j	ra
134	move	v0, zero
135
136L(worddiff):
137#ifdef USE_CLZ
138	subu	t0, v0, t8
139	nor	t1, v0, t9
140	and	t1, t0, t1
141	xor	t0, v0, v1
142	or	t0, t0, t1
143# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
144	wsbh	t0, t0
145	rotr	t0, t0, 16
146# endif
147	clz	t1, t0
148	and	t1, 0xf8
149# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
150	neg	t1
151	addu	t1, 24
152# endif
153	rotrv	v0, v0, t1
154	rotrv	v1, v1, t1
155	and	v0, v0, 0xff
156	and	v1, v1, 0xff
157	j	ra
158	subu	v0, v0, v1
159#else /* USE_CLZ */
160# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
161	andi	t0, v0, 0xff
162	beq	t0, zero, L(wexit01)
163	andi	t1, v1, 0xff
164	bne	t0, t1, L(wexit01)
165
166	srl	t8, v0, 8
167	srl	t9, v1, 8
168	andi	t8, t8, 0xff
169	beq	t8, zero, L(wexit89)
170	andi	t9, t9, 0xff
171	bne	t8, t9, L(wexit89)
172
173	srl	t0, v0, 16
174	srl	t1, v1, 16
175	andi	t0, t0, 0xff
176	beq	t0, zero, L(wexit01)
177	andi	t1, t1, 0xff
178	bne	t0, t1, L(wexit01)
179
180	srl	t8, v0, 24
181	srl	t9, v1, 24
182# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
183	srl	t0, v0, 24
184	beq	t0, zero, L(wexit01)
185	srl	t1, v1, 24
186	bne	t0, t1, L(wexit01)
187
188	srl	t8, v0, 16
189	srl	t9, v1, 16
190	andi	t8, t8, 0xff
191	beq	t8, zero, L(wexit89)
192	andi	t9, t9, 0xff
193	bne	t8, t9, L(wexit89)
194
195	srl	t0, v0, 8
196	srl	t1, v1, 8
197	andi	t0, t0, 0xff
198	beq	t0, zero, L(wexit01)
199	andi	t1, t1, 0xff
200	bne	t0, t1, L(wexit01)
201
202	andi	t8, v0, 0xff
203	andi	t9, v1, 0xff
204# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
205
206L(wexit89):
207	j	ra
208	subu	v0, t8, t9
209L(wexit01):
210	j	ra
211	subu	v0, t0, t1
212#endif /* USE_CLZ */
213
214/* It might seem better to do the 'beq' instruction between the two 'lbu'
215   instructions so that the nop is not needed but testing showed that this
216   code is actually faster (based on glibc strcmp test).  */
217#define BYTECMP01(OFFSET) \
218	lbu	v0, OFFSET(a0); \
219	lbu	v1, OFFSET(a1); \
220	beq	v0, zero, L(bexit01); \
221	nop; \
222	bne	v0, v1, L(bexit01)
223
224#define BYTECMP89(OFFSET) \
225	lbu	t8, OFFSET(a0); \
226	lbu	t9, OFFSET(a1); \
227	beq	t8, zero, L(bexit89); \
228	nop;	\
229	bne	t8, t9, L(bexit89)
230
231L(byteloop):
232	BYTECMP01(0)
233	BYTECMP89(1)
234	BYTECMP01(2)
235	BYTECMP89(3)
236	BYTECMP01(4)
237	BYTECMP89(5)
238	BYTECMP01(6)
239	BYTECMP89(7)
240	PTR_ADDIU a0, a0, 8
241	b	L(byteloop)
242	PTR_ADDIU a1, a1, 8
243
244L(bexit01):
245	j	ra
246	subu	v0, v0, v1
247L(bexit89):
248	j	ra
249	subu	v0, t8, t9
250
251	.set	at
252	.set	reorder
253
254END(STRCMP_NAME)
255