1/*
2 * Copyright (c) 2014
3 *      Imagination Technologies Limited.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 *    contributors may be used to endorse or promote products derived from
15 *    this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifdef ANDROID_CHANGES
31# include "machine/asm.h"
32# include "machine/regdef.h"
33#elif _LIBC
34# include "machine/asm.h"
35# include "machine/regdef.h"
36#else
37# include <regdef.h>
38# include <sys/asm.h>
39#endif
40
41/* Technically strcmp should not read past the end of the strings being
42   compared.  We will read a full word that may contain excess bits beyond
43   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
44   read the next word after the end of string.  Setting ENABLE_READAHEAD will
45   improve performance but is technically illegal based on the definition of
46   strcmp.  */
47#ifdef ENABLE_READAHEAD
48# define DELAY_READ
49#else
50# define DELAY_READ nop
51#endif
52
53/* Testing on a little endian machine showed using CLZ was a
54   performance loss, so we are not turning it on by default.  */
55#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
56# define USE_CLZ
57#endif
58
59/* Some asm.h files do not have the L macro definition.  */
60#ifndef L
61# if _MIPS_SIM == _ABIO32
62#  define L(label) $L ## label
63# else
64#  define L(label) .L ## label
65# endif
66#endif
67
68/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
69#ifndef PTR_ADDIU
70# ifdef USE_DOUBLE
71#  define PTR_ADDIU       daddiu
72# else
73#  define PTR_ADDIU       addiu
74# endif
75#endif
76
77/* Allow the routine to be named something else if desired.  */
78#ifndef STRCMP_NAME
79# define STRCMP_NAME strcmp
80#endif
81
82#ifdef ANDROID_CHANGES
83LEAF(STRCMP_NAME, 0)
84#else
85LEAF(STRCMP_NAME)
86#endif
87	.set	nomips16
88	.set	noreorder
89
90	or	t0, a0, a1
91	andi	t0,0x3
92	bne	t0, zero, L(byteloop)
93
94/* Both strings are 4 byte aligned at this point.  */
95
96	lui	t8, 0x0101
97	ori	t8, t8, 0x0101
98	lui	t9, 0x7f7f
99	ori	t9, 0x7f7f
100
101#define STRCMP32(OFFSET) \
102	lw	v0, OFFSET(a0); \
103	lw	v1, OFFSET(a1); \
104	subu	t0, v0, t8; \
105	bne	v0, v1, L(worddiff); \
106	nor	t1, v0, t9; \
107	and	t0, t0, t1; \
108	bne	t0, zero, L(returnzero)
109
110L(wordloop):
111	STRCMP32(0)
112	DELAY_READ
113	STRCMP32(4)
114	DELAY_READ
115	STRCMP32(8)
116	DELAY_READ
117	STRCMP32(12)
118	DELAY_READ
119	STRCMP32(16)
120	DELAY_READ
121	STRCMP32(20)
122	DELAY_READ
123	STRCMP32(24)
124	DELAY_READ
125	STRCMP32(28)
126	PTR_ADDIU a0, a0, 32
127	b	L(wordloop)
128	PTR_ADDIU a1, a1, 32
129
130L(returnzero):
131	j	ra
132	move	v0, zero
133
134L(worddiff):
135#ifdef USE_CLZ
136	subu	t0, v0, t8
137	nor	t1, v0, t9
138	and	t1, t0, t1
139	xor	t0, v0, v1
140	or	t0, t0, t1
141# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
142	wsbh	t0, t0
143	rotr	t0, t0, 16
144# endif
145	clz	t1, t0
146	and	t1, 0xf8
147# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
148	neg	t1
149	addu	t1, 24
150# endif
151	rotrv	v0, v0, t1
152	rotrv	v1, v1, t1
153	and	v0, v0, 0xff
154	and	v1, v1, 0xff
155	j	ra
156	subu	v0, v0, v1
157#else /* USE_CLZ */
158# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
159	andi	t0, v0, 0xff
160	beq	t0, zero, L(wexit01)
161	andi	t1, v1, 0xff
162	bne	t0, t1, L(wexit01)
163
164	srl	t8, v0, 8
165	srl	t9, v1, 8
166	andi	t8, t8, 0xff
167	beq	t8, zero, L(wexit89)
168	andi	t9, t9, 0xff
169	bne	t8, t9, L(wexit89)
170
171	srl	t0, v0, 16
172	srl	t1, v1, 16
173	andi	t0, t0, 0xff
174	beq	t0, zero, L(wexit01)
175	andi	t1, t1, 0xff
176	bne	t0, t1, L(wexit01)
177
178	srl	t8, v0, 24
179	srl	t9, v1, 24
180# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
181	srl	t0, v0, 24
182	beq	t0, zero, L(wexit01)
183	srl	t1, v1, 24
184	bne	t0, t1, L(wexit01)
185
186	srl	t8, v0, 16
187	srl	t9, v1, 16
188	andi	t8, t8, 0xff
189	beq	t8, zero, L(wexit89)
190	andi	t9, t9, 0xff
191	bne	t8, t9, L(wexit89)
192
193	srl	t0, v0, 8
194	srl	t1, v1, 8
195	andi	t0, t0, 0xff
196	beq	t0, zero, L(wexit01)
197	andi	t1, t1, 0xff
198	bne	t0, t1, L(wexit01)
199
200	andi	t8, v0, 0xff
201	andi	t9, v1, 0xff
202# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
203
204L(wexit89):
205	j	ra
206	subu	v0, t8, t9
207L(wexit01):
208	j	ra
209	subu	v0, t0, t1
210#endif /* USE_CLZ */
211
212/* It might seem better to do the 'beq' instruction between the two 'lbu'
213   instructions so that the nop is not needed but testing showed that this
214   code is actually faster (based on glibc strcmp test).  */
215#define BYTECMP01(OFFSET) \
216	lbu	v0, OFFSET(a0); \
217	lbu	v1, OFFSET(a1); \
218	beq	v0, zero, L(bexit01); \
219	nop; \
220	bne	v0, v1, L(bexit01)
221
222#define BYTECMP89(OFFSET) \
223	lbu	t8, OFFSET(a0); \
224	lbu	t9, OFFSET(a1); \
225	beq	t8, zero, L(bexit89); \
226	nop;	\
227	bne	t8, t9, L(bexit89)
228
229L(byteloop):
230	BYTECMP01(0)
231	BYTECMP89(1)
232	BYTECMP01(2)
233	BYTECMP89(3)
234	BYTECMP01(4)
235	BYTECMP89(5)
236	BYTECMP01(6)
237	BYTECMP89(7)
238	PTR_ADDIU a0, a0, 8
239	b	L(byteloop)
240	PTR_ADDIU a1, a1, 8
241
242L(bexit01):
243	j	ra
244	subu	v0, v0, v1
245L(bexit89):
246	j	ra
247	subu	v0, t8, t9
248
249	.set	at
250	.set	reorder
251
252END(STRCMP_NAME)
253