1/*
2 * Copyright (c) 2012-2014 ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 *    products derived from this software without specific prior written
15 *    permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/* Very similar to the generic code, but uses Thumb2 as implemented
30   in ARMv7-M.  */
31
32#include "arm_asm.h"
33
34/* Parameters and result.  */
35#define src1		r0
36#define src2		r1
37#define result		r0	/* Overlaps src1.  */
38
39/* Internal variables.  */
40#define data1		r2
41#define data2		r3
42#define tmp2		r5
43#define tmp1		r12
44#define syndrome	r12	/* Overlaps tmp1 */
45
46	.thumb
47	.syntax unified
48def_fn strcmp
49	.fnstart
50	.cfi_sections .debug_frame
51	.cfi_startproc
52	prologue push_ip=HAVE_PAC_LEAF
53	eor	tmp1, src1, src2
54	tst	tmp1, #3
55	/* Strings not at same byte offset from a word boundary.  */
56	bne	.Lstrcmp_unaligned
57	ands	tmp1, src1, #3
58	bic	src1, src1, #3
59	bic	src2, src2, #3
60	ldr	data1, [src1], #4
61	it	eq
62	ldreq	data2, [src2], #4
63	beq	4f
64	/* Although s1 and s2 have identical initial alignment, they are
65	   not currently word aligned.	Rather than comparing bytes,
66	   make sure that any bytes fetched from before the addressed
67	   bytes are forced to 0xff.  Then they will always compare
68	   equal.  */
69	eor	tmp1, tmp1, #3
70	mvn	data2, #MSB
71	lsl	tmp1, tmp1, #3
72	S2LO	tmp1, data2, tmp1
73	ldr	data2, [src2], #4
74	orr	data1, data1, tmp1
75	orr	data2, data2, tmp1
76	.p2align	2
77	/* Critical loop.  */
784:
79	sub	syndrome, data1, #0x01010101
80	cmp	data1, data2
81	/* check for any zero bytes in first word */
82	itttt	eq
83	biceq	syndrome, syndrome, data1
84	tsteq	syndrome, #0x80808080
85	ldreq	data1, [src1], #4
86	ldreq	data2, [src2], #4
87	beq	4b
882:
89	.cfi_remember_state
90	/* There's a zero or a different byte in the word */
91	S2HI	result, data1, #24
92	S2LO	data1, data1, #8
93	cmp	result, #1
94	it	cs
95	cmpcs	result, data2, S2HI #24
96	it	eq
97	S2LOEQ	data2, data2, #8
98	beq	2b
99	/* On a big-endian machine, RESULT contains the desired byte in bits
100	   0-7; on a little-endian machine they are in bits 24-31.  In
101	   both cases the other bits in RESULT are all zero.  For DATA2 the
102	   interesting byte is at the other end of the word, but the
103	   other bits are not necessarily zero.	 We need a signed result
104	   representing the differnece in the unsigned bytes, so for the
105	   little-endian case we can't just shift the interesting bits
106	   up.	*/
107#ifdef __ARM_BIG_ENDIAN
108	sub	result, result, data2, lsr #24
109#else
110	and	data2, data2, #255
111	lsrs	result, result, #24
112	subs	result, result, data2
113#endif
114	epilogue push_ip=HAVE_PAC_LEAF
115
116
117#if 0
118	/* The assembly code below is based on the following alogrithm.	 */
119#ifdef __ARM_BIG_ENDIAN
120#define RSHIFT <<
121#define LSHIFT >>
122#else
123#define RSHIFT >>
124#define LSHIFT <<
125#endif
126
127#define body(shift)							\
128  mask = 0xffffffffU RSHIFT shift;					\
129  data1 = *src1++;							\
130  data2 = *src2++;							\
131  do									\
132    {									\
133      tmp2 = data1 & mask;						\
134      if (__builtin_expect(tmp2 != data2 RSHIFT shift, 0))		\
135	{								\
136	  data2 RSHIFT= shift;						\
137	  break;							\
138	}								\
139      if (__builtin_expect(((data1 - b1) & ~data1) & (b1 << 7), 0))	\
140	{								\
141	  /* See comment in assembler below re syndrome on big-endian */\
142	  if ((((data1 - b1) & ~data1) & (b1 << 7)) & mask)		\
143	    data2 RSHIFT= shift;					\
144	  else								\
145	    {								\
146	      data2 = *src2;						\
147	      tmp2 = data1 RSHIFT (32 - shift);				\
148	      data2 = (data2 LSHIFT (32 - shift)) RSHIFT (32 - shift);	\
149	    }								\
150	  break;							\
151	}								\
152      data2 = *src2++;							\
153      tmp2 ^= data1;							\
154      if (__builtin_expect(tmp2 != data2 LSHIFT (32 - shift), 0))	\
155	{								\
156	  tmp2 = data1 >> (32 - shift);					\
157	  data2 = (data2 << (32 - shift)) RSHIFT (32 - shift);		\
158	  break;							\
159	}								\
160      data1 = *src1++;							\
161    } while (1)
162
163  const unsigned* src1;
164  const unsigned* src2;
165  unsigned data1, data2;
166  unsigned mask;
167  unsigned shift;
168  unsigned b1 = 0x01010101;
169  char c1, c2;
170  unsigned tmp2;
171
172  while (((unsigned) s1) & 3)
173    {
174      c1 = *s1++;
175      c2 = *s2++;
176      if (c1 == 0 || c1 != c2)
177	return c1 - (int)c2;
178    }
179  src1 = (unsigned*) (((unsigned)s1) & ~3);
180  src2 = (unsigned*) (((unsigned)s2) & ~3);
181  tmp2 = ((unsigned) s2) & 3;
182  if (tmp2 == 1)
183    {
184      body(8);
185    }
186  else if (tmp2 == 2)
187    {
188      body(16);
189    }
190  else
191    {
192      body (24);
193    }
194
195  do
196    {
197#ifdef __ARM_BIG_ENDIAN
198      c1 = (char) tmp2 >> 24;
199      c2 = (char) data2 >> 24;
200#else /* not  __ARM_BIG_ENDIAN */
201      c1 = (char) tmp2;
202      c2 = (char) data2;
203#endif /* not  __ARM_BIG_ENDIAN */
204      tmp2 RSHIFT= 8;
205      data2 RSHIFT= 8;
206    } while (c1 != 0 && c1 == c2);
207  return c1 - c2;
208#endif /* 0 */
209
210
211	/* First of all, compare bytes until src1(sp1) is word-aligned. */
212.Lstrcmp_unaligned:
213	.cfi_restore_state
214	tst	src1, #3
215	beq	2f
216	.cfi_remember_state
217	ldrb	data1, [src1], #1
218	ldrb	data2, [src2], #1
219	cmp	data1, #1
220	it	cs
221	cmpcs	data1, data2
222	beq	.Lstrcmp_unaligned
223	sub	result, data1, data2
224	epilogue push_ip=HAVE_PAC_LEAF
225
2262:
227	.cfi_restore_state
228	stmfd	sp!, {r5}
229	.cfi_adjust_cfa_offset 4
230	.cfi_rel_offset 5, 0
231
232	ldr	data1, [src1], #4
233	and	tmp2, src2, #3
234	bic	src2, src2, #3
235	ldr	data2, [src2], #4
236	cmp	tmp2, #2
237	beq	.Loverlap2
238	bhi	.Loverlap1
239
240	/* Critical inner Loop: Block with 3 bytes initial overlap */
241	.p2align	2
242.Loverlap3:
243	bic	tmp2, data1, #MSB
244	cmp	tmp2, data2, S2LO #8
245	sub	syndrome, data1, #0x01010101
246	bic	syndrome, syndrome, data1
247	bne	4f
248	ands	syndrome, syndrome, #0x80808080
249	it	eq
250	ldreq	data2, [src2], #4
251	bne	5f
252	eor	tmp2, tmp2, data1
253	cmp	tmp2, data2, S2HI #24
254	bne	6f
255	ldr	data1, [src1], #4
256	b	.Loverlap3
2574:
258	S2LO	data2, data2, #8
259	b	.Lstrcmp_tail
260
2615:
262#ifdef __ARM_BIG_ENDIAN
263	/* The syndrome value may contain false ones if the string ends
264	with the bytes 0x01 0x00.  */
265	tst	data1, #0xff000000
266	itt	ne
267	tstne	data1, #0x00ff0000
268	tstne	data1, #0x0000ff00
269	beq	.Lstrcmp_done_equal
270#else
271	bics	syndrome, syndrome, #0xff000000
272	bne	.Lstrcmp_done_equal
273#endif
274	ldrb	data2, [src2]
275	S2LO	tmp2, data1, #24
276#ifdef __ARM_BIG_ENDIAN
277	lsl	data2, data2, #24
278#endif
279	b	.Lstrcmp_tail
280
2816:
282	S2LO	tmp2, data1, #24
283	and	data2, data2, #LSB
284	b	.Lstrcmp_tail
285
286	/* Critical inner Loop: Block with 2 bytes initial overlap.  */
287	.p2align	2
288.Loverlap2:
289	S2HI	tmp2, data1, #16
290	sub	syndrome, data1, #0x01010101
291	S2LO	tmp2, tmp2, #16
292	bic	syndrome, syndrome, data1
293	cmp	tmp2, data2, S2LO #16
294	bne	4f
295	ands	syndrome, syndrome, #0x80808080
296	it	eq
297	ldreq	data2, [src2], #4
298	bne	5f
299	eor	tmp2, tmp2, data1
300	cmp	tmp2, data2, S2HI #16
301	bne	6f
302	ldr	data1, [src1], #4
303	b	.Loverlap2
304
3055:
306#ifdef __ARM_BIG_ENDIAN
307	/* The syndrome value may contain false ones if the string ends
308	with the bytes 0x01 0x00 */
309	tst	data1, #0xff000000
310	it	ne
311	tstne	data1, #0x00ff0000
312	beq	.Lstrcmp_done_equal
313#else
314	lsls	syndrome, syndrome, #16
315	bne	.Lstrcmp_done_equal
316#endif
317	ldrh	data2, [src2]
318	S2LO	tmp2, data1, #16
319#ifdef __ARM_BIG_ENDIAN
320	lsl	data2, data2, #16
321#endif
322	b	.Lstrcmp_tail
323
3246:
325	S2HI	data2, data2, #16
326	S2LO	tmp2, data1, #16
3274:
328	S2LO	data2, data2, #16
329	b	.Lstrcmp_tail
330
331	/* Critical inner Loop: Block with 1 byte initial overlap.  */
332	.p2align	2
333.Loverlap1:
334	and	tmp2, data1, #LSB
335	cmp	tmp2, data2, S2LO #24
336	sub	syndrome, data1, #0x01010101
337	bic	syndrome, syndrome, data1
338	bne	4f
339	ands	syndrome, syndrome, #0x80808080
340	it	eq
341	ldreq	data2, [src2], #4
342	bne	5f
343	eor	tmp2, tmp2, data1
344	cmp	tmp2, data2, S2HI #8
345	bne	6f
346	ldr	data1, [src1], #4
347	b	.Loverlap1
3484:
349	S2LO	data2, data2, #24
350	b	.Lstrcmp_tail
3515:
352	/* The syndrome value may contain false ones if the string ends
353	   with the bytes 0x01 0x00.  */
354	tst	data1, #LSB
355	beq	.Lstrcmp_done_equal
356	ldr	data2, [src2], #4
3576:
358	S2LO	tmp2, data1, #8
359	bic	data2, data2, #MSB
360	b	.Lstrcmp_tail
361.Lstrcmp_done_equal:
362	mov	result, #0
363	.cfi_remember_state
364	ldmfd	sp!, {r5}
365	.cfi_restore 5
366	.cfi_adjust_cfa_offset -4
367	epilogue push_ip=HAVE_PAC_LEAF
368
369.Lstrcmp_tail:
370	.cfi_restore_state
371	and	r2, tmp2, #LSB
372	and	result, data2, #LSB
373	cmp	result, #1
374	it	cs
375	cmpcs	result, r2
376	itt	eq
377	S2LOEQ	tmp2, tmp2, #8
378	S2LOEQ	data2, data2, #8
379	beq	.Lstrcmp_tail
380	sub	result, r2, result
381	ldmfd	sp!, {r5}
382	.cfi_restore 5
383	.cfi_adjust_cfa_offset -4
384	epilogue push_ip=HAVE_PAC_LEAF
385	.cfi_endproc
386	.cantunwind
387	.fnend
388	.size strcmp, . - strcmp
389