1/*
2 * Copyright (c) 2012-2014 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29	/* Basic ARM implementation.  This should run on anything except
30	   for ARMv6-M, but there are better implementations for later
31	   revisions of the architecture.  This version can support ARMv4T
32	   ARM/Thumb interworking.  */
33/* Parameters and result.  */
34#include <picolibc.h>
35
36#define src1		r0
37#define src2		r1
38#define result		r0	/* Overlaps src1.  */
39
40/* Internal variables.  */
41#define data1		r2
42#define data2		r3
43#define magic1		r4
44#define tmp2		r5
45#define tmp1		r12
46#define syndrome	r12	/* Overlaps tmp1 */
47
48/* For armv4t and newer, toolchains will transparently convert
49   'bx lr' to 'mov pc, lr' if needed. GCC has deprecated support
50   for anything older than armv4t, but this should handle that
51   corner case in case anyone needs it anyway */
52.macro  RETURN
53#if __ARM_ARCH <= 4 && __ARM_ARCH_ISA_THUMB == 0
54	mov	pc, lr
55#else
56	bx	lr
57#endif
58.endm
59
60	.arm
61def_fn strcmp
62	.cfi_sections .debug_frame
63	.cfi_startproc
64	eor	tmp1, src1, src2
65	tst	tmp1, #3
66	/* Strings not at same byte offset from a word boundary.  */
67	bne	.Lstrcmp_unaligned
68	ands	tmp1, src1, #3
69	bic	src1, src1, #3
70	bic	src2, src2, #3
71	ldr	data1, [src1], #4
72	ldreq	data2, [src2], #4
73	beq	1f
74	/* Although s1 and s2 have identical initial alignment, they are
75	   not currently word aligned.	Rather than comparing bytes,
76	   make sure that any bytes fetched from before the addressed
77	   bytes are forced to 0xff.  Then they will always compare
78	   equal.  */
79	eor	tmp1, tmp1, #3
80	mvn	data2, #MSB
81	lsl	tmp1, tmp1, #3
82	S2LO	tmp1, data2, tmp1
83	ldr	data2, [src2], #4
84	orr	data1, data1, tmp1
85	orr	data2, data2, tmp1
861:
87	/* Load the 'magic' constant 0x01010101.	*/
88	str	r4, [sp, #-4]!
89	.cfi_def_cfa_offset 4
90	.cfi_offset 4, -4
91	mov	magic1, #1
92	orr	magic1, magic1, magic1, lsl #8
93	orr	magic1, magic1, magic1, lsl #16
94	.p2align	2
954:
96	sub	syndrome, data1, magic1
97	cmp	data1, data2
98	/* check for any zero bytes in first word */
99	biceq	syndrome, syndrome, data1
100	tsteq	syndrome, magic1, lsl #7
101	ldreq	data1, [src1], #4
102	ldreq	data2, [src2], #4
103	beq	4b
1042:
105	/* There's a zero or a different byte in the word */
106	S2HI	result, data1, #24
107	S2LO	data1, data1, #8
108	cmp	result, #1
109	cmpcs	result, data2, S2HI #24
110	S2LOEQ	data2, data2, #8
111	beq	2b
112	/* On a big-endian machine, RESULT contains the desired byte in bits
113	   0-7; on a little-endian machine they are in bits 24-31.  In
114	   both cases the other bits in RESULT are all zero.  For DATA2 the
115	   interesting byte is at the other end of the word, but the
116	   other bits are not necessarily zero.	 We need a signed result
117	   representing the differnece in the unsigned bytes, so for the
118	   little-endian case we can't just shift the interesting bits
119	   up.	*/
120#ifdef __ARM_BIG_ENDIAN
121	sub	result, result, data2, lsr #24
122#else
123	and	data2, data2, #255
124	rsb	result, data2, result, lsr #24
125#endif
126	ldr	r4, [sp], #4
127	.cfi_restore 4
128	.cfi_def_cfa_offset 0
129	RETURN
130
131
132#if 0
133	/* The assembly code below is based on the following alogrithm.	 */
134#ifdef __ARM_BIG_ENDIAN
135#define RSHIFT <<
136#define LSHIFT >>
137#else
138#define RSHIFT >>
139#define LSHIFT <<
140#endif
141
142#define body(shift)							\
143  mask = 0xffffffffU RSHIFT shift;					\
144  data1 = *src1++;							\
145  data2 = *src2++;							\
146  do									\
147    {									\
148      tmp2 = data1 & mask;						\
149      if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))		\
150	{								\
151	  data2 RSHIFT= shift;						\
152	  break;							\
153	}								\
154      if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))	\
155	{								\
156	  /* See comment in assembler below re syndrome on big-endian */\
157	  if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)		\
158	    data2 RSHIFT= shift;					\
159	  else								\
160	    {								\
161	      data2 = *src2;						\
162	      tmp2 = data1 RSHIFT (32 - shift);				\
163	      data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
164	    }								\
165	  break;							\
166	}								\
167      data2 = *src2++;							\
168      tmp2 ^= data1;							\
169      if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))	\
170	{								\
171	  tmp2 = data1 >> (32 - shift);					\
172	  data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);		\
173	  break;							\
174	}								\
175      data1 = *src1++;							\
176    } while (1)
177
178  const unsigned* src1;
179  const unsigned* src2;
180  unsigned data1, data2;
181  unsigned mask;
182  unsigned shift;
183  unsigned b1 = 0x01010101;
184  char c1, c2;
185  unsigned tmp2;
186
187  while (((unsigned) s1) & 3)
188    {
189      c1 = *s1++;
190      c2 = *s2++;
191      if (c1 == 0 || c1 != c2)
192	return c1 - (int)c2;
193    }
194  src1 = (unsigned*) (((unsigned)s1) & ~3);
195  src2 = (unsigned*) (((unsigned)s2) & ~3);
196  tmp2 = ((unsigned) s2) & 3;
197  if (tmp2 == 1)
198    {
199      body(8);
200    }
201  else if (tmp2 == 2)
202    {
203      body(16);
204    }
205  else
206    {
207      body (24);
208    }
209
210  do
211    {
212#ifdef __ARM_BIG_ENDIAN
213      c1 = (char) tmp2 >> 24;
214      c2 = (char) data2 >> 24;
215#else /* not  __ARM_BIG_ENDIAN */
216      c1 = (char) tmp2;
217      c2 = (char) data2;
218#endif /* not  __ARM_BIG_ENDIAN */
219      tmp2 RSHIFT= 8;
220      data2 RSHIFT= 8;
221    } while (c1 != 0 && c1 == c2);
222  return c1 - c2;
223#endif /* 0 */
224
225
226	/* First of all, compare bytes until src1(sp1) is word-aligned. */
227.Lstrcmp_unaligned:
228	tst	src1, #3
229	beq	2f
230	ldrb	data1, [src1], #1
231	ldrb	data2, [src2], #1
232	cmp	data1, #1
233	cmpcs	data1, data2
234	beq	.Lstrcmp_unaligned
235	sub	result, data1, data2
236	RETURN
237
2382:
239	stmfd	sp!, {r4, r5}
240	.cfi_def_cfa_offset 8
241	.cfi_offset 4, -8
242	.cfi_offset 5, -4
243	mov	magic1, #1
244	orr	magic1, magic1, magic1, lsl #8
245	orr	magic1, magic1, magic1, lsl #16
246
247	ldr	data1, [src1], #4
248	and	tmp2, src2, #3
249	bic	src2, src2, #3
250	ldr	data2, [src2], #4
251	cmp	tmp2, #2
252	beq	.Loverlap2
253	bhi	.Loverlap1
254
255	/* Critical inner Loop: Block with 3 bytes initial overlap */
256	.p2align	2
257.Loverlap3:
258	bic	tmp2, data1, #MSB
259	cmp	tmp2, data2, S2LO #8
260	sub	syndrome, data1, magic1
261	bic	syndrome, syndrome, data1
262	bne	4f
263	ands	syndrome, syndrome, magic1, lsl #7
264	ldreq	data2, [src2], #4
265	bne	5f
266	eor	tmp2, tmp2, data1
267	cmp	tmp2, data2, S2HI #24
268	bne	6f
269	ldr	data1, [src1], #4
270	b	.Loverlap3
2714:
272	S2LO	data2, data2, #8
273	b	.Lstrcmp_tail
274
2755:
276#ifdef __ARM_BIG_ENDIAN
277	/* The syndrome value may contain false ones if the string ends
278	with the bytes 0x01 0x00.  */
279	tst	data1, #0xff000000
280	tstne	data1, #0x00ff0000
281	tstne	data1, #0x0000ff00
282	beq	.Lstrcmp_done_equal
283#else
284	bics	syndrome, syndrome, #0xff000000
285	bne	.Lstrcmp_done_equal
286#endif
287	ldrb	data2, [src2]
288	S2LO	tmp2, data1, #24
289#ifdef __ARM_BIG_ENDIAN
290	lsl	data2, data2, #24
291#endif
292	b	.Lstrcmp_tail
293
2946:
295	S2LO	tmp2, data1, #24
296	and	data2, data2, #LSB
297	b	.Lstrcmp_tail
298
299	/* Critical inner Loop: Block with 2 bytes initial overlap.  */
300	.p2align	2
301.Loverlap2:
302	S2HI	tmp2, data1, #16
303	sub	syndrome, data1, magic1
304	S2LO	tmp2, tmp2, #16
305	bic	syndrome, syndrome, data1
306	cmp	tmp2, data2, S2LO #16
307	bne	4f
308	ands	syndrome, syndrome, magic1, lsl #7
309	ldreq	data2, [src2], #4
310	bne	5f
311	eor	tmp2, tmp2, data1
312	cmp	tmp2, data2, S2HI #16
313	bne	6f
314	ldr	data1, [src1], #4
315	b	.Loverlap2
316
3175:
318#ifdef __ARM_BIG_ENDIAN
319	/* The syndrome value may contain false ones if the string ends
320	with the bytes 0x01 0x00 */
321	tst	data1, #0xff000000
322	tstne	data1, #0x00ff0000
323	beq	.Lstrcmp_done_equal
324#else
325	lsls	syndrome, syndrome, #16
326	bne	.Lstrcmp_done_equal
327#endif
328	ldrh	data2, [src2]
329	S2LO	tmp2, data1, #16
330#ifdef __ARM_BIG_ENDIAN
331	lsl	data2, data2, #16
332#endif
333	b	.Lstrcmp_tail
334
3356:
336	S2HI	data2, data2, #16
337	S2LO	tmp2, data1, #16
3384:
339	S2LO	data2, data2, #16
340	b	.Lstrcmp_tail
341
342	/* Critical inner Loop: Block with 1 byte initial overlap.  */
343	.p2align	2
344.Loverlap1:
345	and	tmp2, data1, #LSB
346	cmp	tmp2, data2, S2LO #24
347	sub	syndrome, data1, magic1
348	bic	syndrome, syndrome, data1
349	bne	4f
350	ands	syndrome, syndrome, magic1, lsl #7
351	ldreq	data2, [src2], #4
352	bne	5f
353	eor	tmp2, tmp2, data1
354	cmp	tmp2, data2, S2HI #8
355	bne	6f
356	ldr	data1, [src1], #4
357	b	.Loverlap1
3584:
359	S2LO	data2, data2, #24
360	b	.Lstrcmp_tail
3615:
362	/* The syndrome value may contain false ones if the string ends
363	   with the bytes 0x01 0x00.  */
364	tst	data1, #LSB
365	beq	.Lstrcmp_done_equal
366	ldr	data2, [src2], #4
3676:
368	S2LO	tmp2, data1, #8
369	bic	data2, data2, #MSB
370	b	.Lstrcmp_tail
371.Lstrcmp_done_equal:
372	mov	result, #0
373	.cfi_remember_state
374	ldmfd	sp!, {r4, r5}
375	.cfi_restore 4
376	.cfi_restore 5
377	.cfi_def_cfa_offset 0
378	RETURN
379
380.Lstrcmp_tail:
381	.cfi_restore_state
382	and	r2, tmp2, #LSB
383	and	result, data2, #LSB
384	cmp	result, #1
385	cmpcs	result, r2
386	S2LOEQ	tmp2, tmp2, #8
387	S2LOEQ	data2, data2, #8
388	beq	.Lstrcmp_tail
389	sub	result, r2, result
390	ldmfd	sp!, {r4, r5}
391	.cfi_restore 4
392	.cfi_restore 5
393	.cfi_def_cfa_offset 0
394	RETURN
395	.cfi_endproc
396	.size strcmp, . - strcmp
397