1/*
2 * Copyright (c) 2012-2014 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29	/* Basic ARM implementation.  This should run on anything except
30	   for ARMv6-M, but there are better implementations for later
31	   revisions of the architecture.  This version can support ARMv4T
32	   ARM/Thumb interworking.  */
33/* Parameters and result.  */
34#define src1		r0
35#define src2		r1
36#define result		r0	/* Overlaps src1.  */
37
38/* Internal variables.  */
39#define data1		r2
40#define data2		r3
41#define magic1		r4
42#define tmp2		r5
43#define tmp1		r12
44#define syndrome	r12	/* Overlaps tmp1 */
45
46/* For armv4t and newer, toolchains will transparently convert
47   'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support
48   for anything older than armv4t, but this should handle that
49   corner case in case anyone needs it anyway */
50.macro  RETURN
51#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0
52	mov	pc, lr
53#else
54	bx	lr
55#endif
56.endm
57
58	.arm
59def_fn strcmp
60	.cfi_sections .debug_frame
61	.cfi_startproc
62	eor	tmp1, src1, src2
63	tst	tmp1, #3
64	/* Strings not at same byte offset from a word boundary.  */
65	bne	.Lstrcmp_unaligned
66	ands	tmp1, src1, #3
67	bic	src1, src1, #3
68	bic	src2, src2, #3
69	ldr	data1, [src1], #4
70	ldreq	data2, [src2], #4
71	beq	1f
72	/* Although s1 and s2 have identical initial alignment, they are
73	   not currently word aligned.	Rather than comparing bytes,
74	   make sure that any bytes fetched from before the addressed
75	   bytes are forced to 0xff.  Then they will always compare
76	   equal.  */
77	eor	tmp1, tmp1, #3
78	mvn	data2, #MSB
79	lsl	tmp1, tmp1, #3
80	S2LO	tmp1, data2, tmp1
81	ldr	data2, [src2], #4
82	orr	data1, data1, tmp1
83	orr	data2, data2, tmp1
841:
85	/* Load the 'magic' constant 0x01010101.	*/
86	str	r4, [sp, #-4]!
87	.cfi_def_cfa_offset 4
88	.cfi_offset 4, -4
89	mov	magic1, #1
90	orr	magic1, magic1, magic1, lsl #8
91	orr	magic1, magic1, magic1, lsl #16
92	.p2align	2
934:
94	sub	syndrome, data1, magic1
95	cmp	data1, data2
96	/* check for any zero bytes in first word */
97	biceq	syndrome, syndrome, data1
98	tsteq	syndrome, magic1, lsl #7
99	ldreq	data1, [src1], #4
100	ldreq	data2, [src2], #4
101	beq	4b
1022:
103	/* There's a zero or a different byte in the word */
104	S2HI	result, data1, #24
105	S2LO	data1, data1, #8
106	cmp	result, #1
107	cmpcs	result, data2, S2HI #24
108	S2LOEQ	data2, data2, #8
109	beq	2b
110	/* On a big-endian machine, RESULT contains the desired byte in bits
111	   0-7; on a little-endian machine they are in bits 24-31.  In
112	   both cases the other bits in RESULT are all zero.  For DATA2 the
113	   interesting byte is at the other end of the word, but the
114	   other bits are not necessarily zero.	 We need a signed result
115	   representing the differnece in the unsigned bytes, so for the
116	   little-endian case we can't just shift the interesting bits
117	   up.	*/
118#ifdef __ARM_BIG_ENDIAN
119	sub	result, result, data2, lsr #24
120#else
121	and	data2, data2, #255
122	rsb	result, data2, result, lsr #24
123#endif
124	ldr	r4, [sp], #4
125	.cfi_restore 4
126	.cfi_def_cfa_offset 0
127	RETURN
128
129
130#if 0
131	/* The assembly code below is based on the following alogrithm.	 */
132#ifdef __ARM_BIG_ENDIAN
133#define RSHIFT <<
134#define LSHIFT >>
135#else
136#define RSHIFT >>
137#define LSHIFT <<
138#endif
139
140#define body(shift)							\
141  mask = 0xffffffffU RSHIFT shift;					\
142  data1 = *src1++;							\
143  data2 = *src2++;							\
144  do									\
145    {									\
146      tmp2 = data1 & mask;						\
147      if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))		\
148	{								\
149	  data2 RSHIFT= shift;						\
150	  break;							\
151	}								\
152      if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))	\
153	{								\
154	  /* See comment in assembler below re syndrome on big-endian */\
155	  if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)		\
156	    data2 RSHIFT= shift;					\
157	  else								\
158	    {								\
159	      data2 = *src2;						\
160	      tmp2 = data1 RSHIFT (32 - shift);				\
161	      data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
162	    }								\
163	  break;							\
164	}								\
165      data2 = *src2++;							\
166      tmp2 ^= data1;							\
167      if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))	\
168	{								\
169	  tmp2 = data1 >> (32 - shift);					\
170	  data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);		\
171	  break;							\
172	}								\
173      data1 = *src1++;							\
174    } while (1)
175
176  const unsigned* src1;
177  const unsigned* src2;
178  unsigned data1, data2;
179  unsigned mask;
180  unsigned shift;
181  unsigned b1 = 0x01010101;
182  char c1, c2;
183  unsigned tmp2;
184
185  while (((unsigned) s1) & 3)
186    {
187      c1 = *s1++;
188      c2 = *s2++;
189      if (c1 == 0 || c1 != c2)
190	return c1 - (int)c2;
191    }
192  src1 = (unsigned*) (((unsigned)s1) & ~3);
193  src2 = (unsigned*) (((unsigned)s2) & ~3);
194  tmp2 = ((unsigned) s2) & 3;
195  if (tmp2 == 1)
196    {
197      body(8);
198    }
199  else if (tmp2 == 2)
200    {
201      body(16);
202    }
203  else
204    {
205      body (24);
206    }
207
208  do
209    {
210#ifdef __ARM_BIG_ENDIAN
211      c1 = (char) tmp2 >> 24;
212      c2 = (char) data2 >> 24;
213#else /* not  __ARM_BIG_ENDIAN */
214      c1 = (char) tmp2;
215      c2 = (char) data2;
216#endif /* not  __ARM_BIG_ENDIAN */
217      tmp2 RSHIFT= 8;
218      data2 RSHIFT= 8;
219    } while (c1 != 0 && c1 == c2);
220  return c1 - c2;
221#endif /* 0 */
222
223
224	/* First of all, compare bytes until src1(sp1) is word-aligned. */
225.Lstrcmp_unaligned:
226	tst	src1, #3
227	beq	2f
228	ldrb	data1, [src1], #1
229	ldrb	data2, [src2], #1
230	cmp	data1, #1
231	cmpcs	data1, data2
232	beq	.Lstrcmp_unaligned
233	sub	result, data1, data2
234	RETURN
235
2362:
237	stmfd	sp!, {r4, r5}
238	.cfi_def_cfa_offset 8
239	.cfi_offset 4, -8
240	.cfi_offset 5, -4
241	mov	magic1, #1
242	orr	magic1, magic1, magic1, lsl #8
243	orr	magic1, magic1, magic1, lsl #16
244
245	ldr	data1, [src1], #4
246	and	tmp2, src2, #3
247	bic	src2, src2, #3
248	ldr	data2, [src2], #4
249	cmp	tmp2, #2
250	beq	.Loverlap2
251	bhi	.Loverlap1
252
253	/* Critical inner Loop: Block with 3 bytes initial overlap */
254	.p2align	2
255.Loverlap3:
256	bic	tmp2, data1, #MSB
257	cmp	tmp2, data2, S2LO #8
258	sub	syndrome, data1, magic1
259	bic	syndrome, syndrome, data1
260	bne	4f
261	ands	syndrome, syndrome, magic1, lsl #7
262	ldreq	data2, [src2], #4
263	bne	5f
264	eor	tmp2, tmp2, data1
265	cmp	tmp2, data2, S2HI #24
266	bne	6f
267	ldr	data1, [src1], #4
268	b	.Loverlap3
2694:
270	S2LO	data2, data2, #8
271	b	.Lstrcmp_tail
272
2735:
274#ifdef __ARM_BIG_ENDIAN
275	/* The syndrome value may contain false ones if the string ends
276	with the bytes 0x01 0x00.  */
277	tst	data1, #0xff000000
278	tstne	data1, #0x00ff0000
279	tstne	data1, #0x0000ff00
280	beq	.Lstrcmp_done_equal
281#else
282	bics	syndrome, syndrome, #0xff000000
283	bne	.Lstrcmp_done_equal
284#endif
285	ldrb	data2, [src2]
286	S2LO	tmp2, data1, #24
287#ifdef __ARM_BIG_ENDIAN
288	lsl	data2, data2, #24
289#endif
290	b	.Lstrcmp_tail
291
2926:
293	S2LO	tmp2, data1, #24
294	and	data2, data2, #LSB
295	b	.Lstrcmp_tail
296
297	/* Critical inner Loop: Block with 2 bytes initial overlap.  */
298	.p2align	2
299.Loverlap2:
300	S2HI	tmp2, data1, #16
301	sub	syndrome, data1, magic1
302	S2LO	tmp2, tmp2, #16
303	bic	syndrome, syndrome, data1
304	cmp	tmp2, data2, S2LO #16
305	bne	4f
306	ands	syndrome, syndrome, magic1, lsl #7
307	ldreq	data2, [src2], #4
308	bne	5f
309	eor	tmp2, tmp2, data1
310	cmp	tmp2, data2, S2HI #16
311	bne	6f
312	ldr	data1, [src1], #4
313	b	.Loverlap2
314
3155:
316#ifdef __ARM_BIG_ENDIAN
317	/* The syndrome value may contain false ones if the string ends
318	with the bytes 0x01 0x00 */
319	tst	data1, #0xff000000
320	tstne	data1, #0x00ff0000
321	beq	.Lstrcmp_done_equal
322#else
323	lsls	syndrome, syndrome, #16
324	bne	.Lstrcmp_done_equal
325#endif
326	ldrh	data2, [src2]
327	S2LO	tmp2, data1, #16
328#ifdef __ARM_BIG_ENDIAN
329	lsl	data2, data2, #16
330#endif
331	b	.Lstrcmp_tail
332
3336:
334	S2HI	data2, data2, #16
335	S2LO	tmp2, data1, #16
3364:
337	S2LO	data2, data2, #16
338	b	.Lstrcmp_tail
339
340	/* Critical inner Loop: Block with 1 byte initial overlap.  */
341	.p2align	2
342.Loverlap1:
343	and	tmp2, data1, #LSB
344	cmp	tmp2, data2, S2LO #24
345	sub	syndrome, data1, magic1
346	bic	syndrome, syndrome, data1
347	bne	4f
348	ands	syndrome, syndrome, magic1, lsl #7
349	ldreq	data2, [src2], #4
350	bne	5f
351	eor	tmp2, tmp2, data1
352	cmp	tmp2, data2, S2HI #8
353	bne	6f
354	ldr	data1, [src1], #4
355	b	.Loverlap1
3564:
357	S2LO	data2, data2, #24
358	b	.Lstrcmp_tail
3595:
360	/* The syndrome value may contain false ones if the string ends
361	   with the bytes 0x01 0x00.  */
362	tst	data1, #LSB
363	beq	.Lstrcmp_done_equal
364	ldr	data2, [src2], #4
3656:
366	S2LO	tmp2, data1, #8
367	bic	data2, data2, #MSB
368	b	.Lstrcmp_tail
369.Lstrcmp_done_equal:
370	mov	result, #0
371	.cfi_remember_state
372	ldmfd	sp!, {r4, r5}
373	.cfi_restore 4
374	.cfi_restore 5
375	.cfi_def_cfa_offset 0
376	RETURN
377
378.Lstrcmp_tail:
379	.cfi_restore_state
380	and	r2, tmp2, #LSB
381	and	result, data2, #LSB
382	cmp	result, #1
383	cmpcs	result, r2
384	S2LOEQ	tmp2, tmp2, #8
385	S2LOEQ	data2, data2, #8
386	beq	.Lstrcmp_tail
387	sub	result, r2, result
388	ldmfd	sp!, {r4, r5}
389	.cfi_restore 4
390	.cfi_restore 5
391	.cfi_def_cfa_offset 0
392	RETURN
393	.cfi_endproc
394	.size strcmp, . - strcmp
395