1/*
2 * Copyright (c) 2012-2014 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* Very similar to the generic code, but uses Thumb2 as implemented
30   in ARMv7-M.  */
31
32#include <picolibc.h>
33
34#include "arm_asm.h"
35
36/* Parameters and result.  */
37#define src1		r0
38#define src2		r1
39#define result		r0	/* Overlaps src1.  */
40
41/* Internal variables.  */
42#define data1		r2
43#define data2		r3
44#define tmp2		r5
45#define tmp1		r12
46#define syndrome	r12	/* Overlaps tmp1 */
47
48	.thumb
49	.syntax unified
50def_fn strcmp
51	.fnstart
52	.cfi_sections .debug_frame
53	.cfi_startproc
54	prologue push_ip=HAVE_PAC_LEAF
55	eor	tmp1, src1, src2
56	tst	tmp1, #3
57	/* Strings not at same byte offset from a word boundary.  */
58	bne	.Lstrcmp_unaligned
59	ands	tmp1, src1, #3
60	bic	src1, src1, #3
61	bic	src2, src2, #3
62	ldr	data1, [src1], #4
63	it	eq
64	ldreq	data2, [src2], #4
65	beq	4f
66	/* Although s1 and s2 have identical initial alignment, they are
67	   not currently word aligned.	Rather than comparing bytes,
68	   make sure that any bytes fetched from before the addressed
69	   bytes are forced to 0xff.  Then they will always compare
70	   equal.  */
71	eor	tmp1, tmp1, #3
72	mvn	data2, #MSB
73	lsl	tmp1, tmp1, #3
74	S2LO	tmp1, data2, tmp1
75	ldr	data2, [src2], #4
76	orr	data1, data1, tmp1
77	orr	data2, data2, tmp1
78	.p2align	2
79	/* Critical loop.  */
804:
81	sub	syndrome, data1, #0x01010101
82	cmp	data1, data2
83	/* check for any zero bytes in first word */
84	itttt	eq
85	biceq	syndrome, syndrome, data1
86	tsteq	syndrome, #0x80808080
87	ldreq	data1, [src1], #4
88	ldreq	data2, [src2], #4
89	beq	4b
902:
91	.cfi_remember_state
92	/* There's a zero or a different byte in the word */
93	S2HI	result, data1, #24
94	S2LO	data1, data1, #8
95	cmp	result, #1
96	it	cs
97	cmpcs	result, data2, S2HI #24
98	it	eq
99	S2LOEQ	data2, data2, #8
100	beq	2b
101	/* On a big-endian machine, RESULT contains the desired byte in bits
102	   0-7; on a little-endian machine they are in bits 24-31.  In
103	   both cases the other bits in RESULT are all zero.  For DATA2 the
104	   interesting byte is at the other end of the word, but the
105	   other bits are not necessarily zero.	 We need a signed result
106	   representing the differnece in the unsigned bytes, so for the
107	   little-endian case we can't just shift the interesting bits
108	   up.	*/
109#ifdef __ARM_BIG_ENDIAN
110	sub	result, result, data2, lsr #24
111#else
112	and	data2, data2, #255
113	lsrs	result, result, #24
114	subs	result, result, data2
115#endif
116	epilogue push_ip=HAVE_PAC_LEAF
117
118
119#if 0
120	/* The assembly code below is based on the following alogrithm.	 */
121#ifdef __ARM_BIG_ENDIAN
122#define RSHIFT <<
123#define LSHIFT >>
124#else
125#define RSHIFT >>
126#define LSHIFT <<
127#endif
128
129#define body(shift)							\
130  mask = 0xffffffffU RSHIFT shift;					\
131  data1 = *src1++;							\
132  data2 = *src2++;							\
133  do									\
134    {									\
135      tmp2 = data1 & mask;						\
136      if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))		\
137	{								\
138	  data2 RSHIFT= shift;						\
139	  break;							\
140	}								\
141      if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))	\
142	{								\
143	  /* See comment in assembler below re syndrome on big-endian */\
144	  if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)		\
145	    data2 RSHIFT= shift;					\
146	  else								\
147	    {								\
148	      data2 = *src2;						\
149	      tmp2 = data1 RSHIFT (32 - shift);				\
150	      data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
151	    }								\
152	  break;							\
153	}								\
154      data2 = *src2++;							\
155      tmp2 ^= data1;							\
156      if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))	\
157	{								\
158	  tmp2 = data1 >> (32 - shift);					\
159	  data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);		\
160	  break;							\
161	}								\
162      data1 = *src1++;							\
163    } while (1)
164
165  const unsigned* src1;
166  const unsigned* src2;
167  unsigned data1, data2;
168  unsigned mask;
169  unsigned shift;
170  unsigned b1 = 0x01010101;
171  char c1, c2;
172  unsigned tmp2;
173
174  while (((unsigned) s1) & 3)
175    {
176      c1 = *s1++;
177      c2 = *s2++;
178      if (c1 == 0 || c1 != c2)
179	return c1 - (int)c2;
180    }
181  src1 = (unsigned*) (((unsigned)s1) & ~3);
182  src2 = (unsigned*) (((unsigned)s2) & ~3);
183  tmp2 = ((unsigned) s2) & 3;
184  if (tmp2 == 1)
185    {
186      body(8);
187    }
188  else if (tmp2 == 2)
189    {
190      body(16);
191    }
192  else
193    {
194      body (24);
195    }
196
197  do
198    {
199#ifdef __ARM_BIG_ENDIAN
200      c1 = (char) tmp2 >> 24;
201      c2 = (char) data2 >> 24;
202#else /* not  __ARM_BIG_ENDIAN */
203      c1 = (char) tmp2;
204      c2 = (char) data2;
205#endif /* not  __ARM_BIG_ENDIAN */
206      tmp2 RSHIFT= 8;
207      data2 RSHIFT= 8;
208    } while (c1 != 0 && c1 == c2);
209  return c1 - c2;
210#endif /* 0 */
211
212
213	/* First of all, compare bytes until src1(sp1) is word-aligned. */
214.Lstrcmp_unaligned:
215	.cfi_restore_state
216	tst	src1, #3
217	beq	2f
218	.cfi_remember_state
219	ldrb	data1, [src1], #1
220	ldrb	data2, [src2], #1
221	cmp	data1, #1
222	it	cs
223	cmpcs	data1, data2
224	beq	.Lstrcmp_unaligned
225	sub	result, data1, data2
226	epilogue push_ip=HAVE_PAC_LEAF
227
2282:
229	.cfi_restore_state
230	stmfd	sp!, {r5}
231	.cfi_adjust_cfa_offset 4
232	.cfi_rel_offset 5, 0
233
234	ldr	data1, [src1], #4
235	and	tmp2, src2, #3
236	bic	src2, src2, #3
237	ldr	data2, [src2], #4
238	cmp	tmp2, #2
239	beq	.Loverlap2
240	bhi	.Loverlap1
241
242	/* Critical inner Loop: Block with 3 bytes initial overlap */
243	.p2align	2
244.Loverlap3:
245	bic	tmp2, data1, #MSB
246	cmp	tmp2, data2, S2LO #8
247	sub	syndrome, data1, #0x01010101
248	bic	syndrome, syndrome, data1
249	bne	4f
250	ands	syndrome, syndrome, #0x80808080
251	it	eq
252	ldreq	data2, [src2], #4
253	bne	5f
254	eor	tmp2, tmp2, data1
255	cmp	tmp2, data2, S2HI #24
256	bne	6f
257	ldr	data1, [src1], #4
258	b	.Loverlap3
2594:
260	S2LO	data2, data2, #8
261	b	.Lstrcmp_tail
262
2635:
264#ifdef __ARM_BIG_ENDIAN
265	/* The syndrome value may contain false ones if the string ends
266	with the bytes 0x01 0x00.  */
267	tst	data1, #0xff000000
268	itt	ne
269	tstne	data1, #0x00ff0000
270	tstne	data1, #0x0000ff00
271	beq	.Lstrcmp_done_equal
272#else
273	bics	syndrome, syndrome, #0xff000000
274	bne	.Lstrcmp_done_equal
275#endif
276	ldrb	data2, [src2]
277	S2LO	tmp2, data1, #24
278#ifdef __ARM_BIG_ENDIAN
279	lsl	data2, data2, #24
280#endif
281	b	.Lstrcmp_tail
282
2836:
284	S2LO	tmp2, data1, #24
285	and	data2, data2, #LSB
286	b	.Lstrcmp_tail
287
288	/* Critical inner Loop: Block with 2 bytes initial overlap.  */
289	.p2align	2
290.Loverlap2:
291	S2HI	tmp2, data1, #16
292	sub	syndrome, data1, #0x01010101
293	S2LO	tmp2, tmp2, #16
294	bic	syndrome, syndrome, data1
295	cmp	tmp2, data2, S2LO #16
296	bne	4f
297	ands	syndrome, syndrome, #0x80808080
298	it	eq
299	ldreq	data2, [src2], #4
300	bne	5f
301	eor	tmp2, tmp2, data1
302	cmp	tmp2, data2, S2HI #16
303	bne	6f
304	ldr	data1, [src1], #4
305	b	.Loverlap2
306
3075:
308#ifdef __ARM_BIG_ENDIAN
309	/* The syndrome value may contain false ones if the string ends
310	with the bytes 0x01 0x00 */
311	tst	data1, #0xff000000
312	it	ne
313	tstne	data1, #0x00ff0000
314	beq	.Lstrcmp_done_equal
315#else
316	lsls	syndrome, syndrome, #16
317	bne	.Lstrcmp_done_equal
318#endif
319	ldrh	data2, [src2]
320	S2LO	tmp2, data1, #16
321#ifdef __ARM_BIG_ENDIAN
322	lsl	data2, data2, #16
323#endif
324	b	.Lstrcmp_tail
325
3266:
327	S2HI	data2, data2, #16
328	S2LO	tmp2, data1, #16
3294:
330	S2LO	data2, data2, #16
331	b	.Lstrcmp_tail
332
333	/* Critical inner Loop: Block with 1 byte initial overlap.  */
334	.p2align	2
335.Loverlap1:
336	and	tmp2, data1, #LSB
337	cmp	tmp2, data2, S2LO #24
338	sub	syndrome, data1, #0x01010101
339	bic	syndrome, syndrome, data1
340	bne	4f
341	ands	syndrome, syndrome, #0x80808080
342	it	eq
343	ldreq	data2, [src2], #4
344	bne	5f
345	eor	tmp2, tmp2, data1
346	cmp	tmp2, data2, S2HI #8
347	bne	6f
348	ldr	data1, [src1], #4
349	b	.Loverlap1
3504:
351	S2LO	data2, data2, #24
352	b	.Lstrcmp_tail
3535:
354	/* The syndrome value may contain false ones if the string ends
355	   with the bytes 0x01 0x00.  */
356	tst	data1, #LSB
357	beq	.Lstrcmp_done_equal
358	ldr	data2, [src2], #4
3596:
360	S2LO	tmp2, data1, #8
361	bic	data2, data2, #MSB
362	b	.Lstrcmp_tail
363.Lstrcmp_done_equal:
364	mov	result, #0
365	.cfi_remember_state
366	ldmfd	sp!, {r5}
367	.cfi_restore 5
368	.cfi_adjust_cfa_offset -4
369	epilogue push_ip=HAVE_PAC_LEAF
370
371.Lstrcmp_tail:
372	.cfi_restore_state
373	and	r2, tmp2, #LSB
374	and	result, data2, #LSB
375	cmp	result, #1
376	it	cs
377	cmpcs	result, r2
378	itt	eq
379	S2LOEQ	tmp2, tmp2, #8
380	S2LOEQ	data2, data2, #8
381	beq	.Lstrcmp_tail
382	sub	result, r2, result
383	ldmfd	sp!, {r5}
384	.cfi_restore 5
385	.cfi_adjust_cfa_offset -4
386	epilogue push_ip=HAVE_PAC_LEAF
387	.cfi_endproc
388	.cantunwind
389	.fnend
390	.size strcmp, . - strcmp
391