1/*
2   Copyright (c) 2015, Synopsys, Inc. All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   1) Redistributions of source code must retain the above copyright notice,
8   this list of conditions and the following disclaimer.
9
10   2) Redistributions in binary form must reproduce the above copyright notice,
11   this list of conditions and the following disclaimer in the documentation
12   and/or other materials provided with the distribution.
13
14   3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15   may be used to endorse or promote products derived from this software
16   without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* This implementation is optimized for performance.  For code size a generic
32   implementation of this function from newlib/libc/string/memcmp.c will be
33   used.  */
34#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED)
35
36#include "asm.h"
37
38#if !defined (__ARC601__) && defined (__ARC_NORM__) \
39    && defined (__ARC_BARREL_SHIFTER__)
40
41#ifdef __LITTLE_ENDIAN__
42#define WORD2 r2
43#define SHIFT r3
44#else /* BIG ENDIAN */
45#define WORD2 r3
46#define SHIFT r2
47#endif
48
49ENTRY (memcmp)
50	or	r12,r0,r1
51	asl_s	r12,r12,30
52#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
53	sub_l	r3,r2,1
54	brls	r2,r12,.Lbytewise
55#else
56	brls.d	r2,r12,.Lbytewise
57	sub_s	r3,r2,1
58#endif
59	ld	r4,[r0,0]
60	ld	r5,[r1,0]
61	lsr.f	lp_count,r3,3
62#ifdef __ARCEM__
63	/* A branch can't be the last instruction in a zero overhead loop.
64	   So we move the branch to the start of the loop, duplicate it
65	   after the end, and set up r12 so that the branch isn't taken
66	   initially.  */
67	mov_s	r12,WORD2
68	lpne	.Loop_end
69	brne	WORD2,r12,.Lodd
70	ld	WORD2,[r0,4]
71#else
72	lpne	.Loop_end
73	ld_s	WORD2,[r0,4]
74#endif
75	ld_s	r12,[r1,4]
76	brne	r4,r5,.Leven
77	ld.a	r4,[r0,8]
78	ld.a	r5,[r1,8]
79#ifdef __ARCEM__
80.Loop_end:
81	brne	WORD2,r12,.Lodd
82#else
83	brne	WORD2,r12,.Lodd
84#ifdef __ARCHS__
85	nop
86#endif
87.Loop_end:
88#endif
89	asl_s	SHIFT,SHIFT,3
90	bcc_s	.Last_cmp
91	brne	r4,r5,.Leven
92	ld	r4,[r0,4]
93	ld	r5,[r1,4]
94#ifdef __LITTLE_ENDIAN__
95#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
96	nop_s
97	; one more load latency cycle
98.Last_cmp:
99	xor	r0,r4,r5
100	bset	r0,r0,SHIFT
101	sub_s	r1,r0,1
102	bic_s	r1,r1,r0
103	norm	r1,r1
104	b.d	.Leven_cmp
105	and	r1,r1,24
106.Leven:
107	xor	r0,r4,r5
108	sub_s	r1,r0,1
109	bic_s	r1,r1,r0
110	norm	r1,r1
111	; slow track insn
112	and	r1,r1,24
113.Leven_cmp:
114	asl	r2,r4,r1
115	asl	r12,r5,r1
116	lsr_s	r2,r2,1
117	lsr_s	r12,r12,1
118	j_s.d	[blink]
119	sub	r0,r2,r12
120	.balign	4
121.Lodd:
122	xor	r0,WORD2,r12
123	sub_s	r1,r0,1
124	bic_s	r1,r1,r0
125	norm	r1,r1
126	; slow track insn
127	and	r1,r1,24
128	asl_s	r2,r2,r1
129	asl_s	r12,r12,r1
130	lsr_s	r2,r2,1
131	lsr_s	r12,r12,1
132	j_s.d	[blink]
133	sub	r0,r2,r12
134#else /* !__ARC700__ */
135	.balign	4
136.Last_cmp:
137	xor	r0,r4,r5
138	b.d	.Leven_cmp
139	bset	r0,r0,SHIFT
140.Lodd:
141	mov_s	r4,WORD2
142	mov_s	r5,r12
143.Leven:
144	xor	r0,r4,r5
145.Leven_cmp:
146	mov_s	r1,0x80808080
147	; uses long immediate
148	sub_s	r12,r0,1
149	bic_s	r0,r0,r12
150	sub	r0,r1,r0
151	xor_s	r0,r0,r1
152	and	r1,r5,r0
153	and	r0,r4,r0
154	xor.f	0,r0,r1
155	sub_s	r0,r0,r1
156	j_s.d	[blink]
157	mov.mi	r0,r1
158#endif /* !__ARC700__ */
159#else /* BIG ENDIAN */
160.Last_cmp:
161	neg_s	SHIFT,SHIFT
162	lsr	r4,r4,SHIFT
163	lsr	r5,r5,SHIFT
164	; slow track insn
165.Leven:
166	sub.f	r0,r4,r5
167	mov.ne	r0,1
168	j_s.d	[blink]
169	bset.cs	r0,r0,31
170.Lodd:
171	cmp_s	WORD2,r12
172#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
173	mov_s	r0,1
174	j_s.d	[blink]
175	bset.cs	r0,r0,31
176#else
177	j_s.d	[blink]
178	rrc	r0,2
179#endif /* __ARC700__ || __ARCEM__ || __ARCHS__ */
180#endif /* ENDIAN */
181	.balign	4
182.Lbytewise:
183	breq	r2,0,.Lnil
184	ldb	r4,[r0,0]
185	ldb	r5,[r1,0]
186	lsr.f	lp_count,r3
187#ifdef __ARCEM__
188	mov	r12,r3
189	lpne	.Lbyte_end
190	brne	r3,r12,.Lbyte_odd
191#else
192	lpne	.Lbyte_end
193#endif
194	ldb_s	r3,[r0,1]
195	ldb_l	r12,[r1,1]
196	brne	r4,r5,.Lbyte_even
197	ldb.a	r4,[r0,2]
198	ldb.a	r5,[r1,2]
199#ifdef __ARCEM__
200.Lbyte_end:
201	brne	r3,r12,.Lbyte_odd
202#else
203	brne	r3,r12,.Lbyte_odd
204#ifdef __ARCHS__
205	nop
206#endif
207.Lbyte_end:
208#endif
209	bcc_l	.Lbyte_even
210	brne	r4,r5,.Lbyte_even
211	ldb_s	r3,[r0,1]
212	ldb_s	r12,[r1,1]
213.Lbyte_odd:
214	j_s.d	[blink]
215	sub	r0,r3,r12
216.Lbyte_even:
217	j_s.d	[blink]
218	sub	r0,r4,r5
219.Lnil:
220	j_s.d	[blink]
221	mov_l	r0,0
222ENDFUNC (memcmp)
223#endif /* !__ARC601__ && __ARC_NORM__ && __ARC_BARREL_SHIFTER__ */
224
225#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */
226