1/*
2   Copyright (c) 2015-2024, Synopsys, Inc. All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   1) Redistributions of source code must retain the above copyright notice,
8   this list of conditions and the following disclaimer.
9
10   2) Redistributions in binary form must reproduce the above copyright notice,
11   this list of conditions and the following disclaimer in the documentation
12   and/or other materials provided with the distribution.
13
14   3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15   may be used to endorse or promote products derived from this software
16   without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* This implementation is optimized for performance.  For code size a generic
32   implementation of this function from newlib/libc/string/memcmp.c will be
33   used.  */
34#include <picolibc.h>
35
36#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED) \
37    && !defined (__ARC_RF16__)
38
39#include "asm.h"
40
41#if !defined (__ARC601__) && defined (__ARC_NORM__) \
42    && defined (__ARC_BARREL_SHIFTER__)
43
44#ifdef __LITTLE_ENDIAN__
45#define WORD2 r2
46#define SHIFT r3
47#else /* BIG ENDIAN */
48#define WORD2 r3
49#define SHIFT r2
50#endif
51
52ENTRY (memcmp)
53	or	r12,r0,r1
54	asl_s	r12,r12,30
55#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
56	sub_l	r3,r2,1
57	brls	r2,r12,.Lbytewise
58#else
59	brls.d	r2,r12,.Lbytewise
60	sub_s	r3,r2,1
61#endif
62	ld	r4,[r0,0]
63	ld	r5,[r1,0]
64	lsr.f	lp_count,r3,3
65#ifdef __ARCEM__
66	/* A branch can't be the last instruction in a zero overhead loop.
67	   So we move the branch to the start of the loop, duplicate it
68	   after the end, and set up r12 so that the branch isn't taken
69	   initially.  */
70	mov_s	r12,WORD2
71	lpne	.Loop_end
72	brne	WORD2,r12,.Lodd
73	ld	WORD2,[r0,4]
74#else
75	lpne	.Loop_end
76	ld_s	WORD2,[r0,4]
77#endif
78	ld_s	r12,[r1,4]
79	brne	r4,r5,.Leven
80	ld.a	r4,[r0,8]
81	ld.a	r5,[r1,8]
82#ifdef __ARCEM__
83.Loop_end:
84	brne	WORD2,r12,.Lodd
85#else
86	brne	WORD2,r12,.Lodd
87#ifdef __ARCHS__
88	nop
89#endif
90.Loop_end:
91#endif
92	asl_s	SHIFT,SHIFT,3
93	bcc_s	.Last_cmp
94	brne	r4,r5,.Leven
95	ld	r4,[r0,4]
96	ld	r5,[r1,4]
97#ifdef __LITTLE_ENDIAN__
98#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
99	nop_s
100	; one more load latency cycle
101.Last_cmp:
102	xor	r0,r4,r5
103	bset	r0,r0,SHIFT
104	sub_s	r1,r0,1
105	bic_s	r1,r1,r0
106	norm	r1,r1
107	b.d	.Leven_cmp
108	and	r1,r1,24
109.Leven:
110	xor	r0,r4,r5
111	sub_s	r1,r0,1
112	bic_s	r1,r1,r0
113	norm	r1,r1
114	; slow track insn
115	and	r1,r1,24
116.Leven_cmp:
117	asl	r2,r4,r1
118	asl	r12,r5,r1
119	lsr_s	r2,r2,1
120	lsr_s	r12,r12,1
121	j_s.d	[blink]
122	sub	r0,r2,r12
123	.balign	4
124.Lodd:
125	xor	r0,WORD2,r12
126	sub_s	r1,r0,1
127	bic_s	r1,r1,r0
128	norm	r1,r1
129	; slow track insn
130	and	r1,r1,24
131	asl_s	r2,r2,r1
132	asl_s	r12,r12,r1
133	lsr_s	r2,r2,1
134	lsr_s	r12,r12,1
135	j_s.d	[blink]
136	sub	r0,r2,r12
137#else /* !__ARC700__ */
138	.balign	4
139.Last_cmp:
140	xor	r0,r4,r5
141	b.d	.Leven_cmp
142	bset	r0,r0,SHIFT
143.Lodd:
144	mov_s	r4,WORD2
145	mov_s	r5,r12
146.Leven:
147	xor	r0,r4,r5
148.Leven_cmp:
149	mov_s	r1,0x80808080
150	; uses long immediate
151	sub_s	r12,r0,1
152	bic_s	r0,r0,r12
153	sub	r0,r1,r0
154	xor_s	r0,r0,r1
155	and	r1,r5,r0
156	and	r0,r4,r0
157	xor.f	0,r0,r1
158	sub_s	r0,r0,r1
159	j_s.d	[blink]
160	mov.mi	r0,r1
161#endif /* !__ARC700__ */
162#else /* BIG ENDIAN */
163.Last_cmp:
164	neg_s	SHIFT,SHIFT
165	lsr	r4,r4,SHIFT
166	lsr	r5,r5,SHIFT
167	; slow track insn
168.Leven:
169	sub.f	r0,r4,r5
170	mov.ne	r0,1
171	j_s.d	[blink]
172	bset.cs	r0,r0,31
173.Lodd:
174	cmp_s	WORD2,r12
175#if defined (__ARC700__) || defined (__ARCEM__) || defined (__ARCHS__)
176	mov_s	r0,1
177	j_s.d	[blink]
178	bset.cs	r0,r0,31
179#else
180	j_s.d	[blink]
181	rrc	r0,2
182#endif /* __ARC700__ || __ARCEM__ || __ARCHS__ */
183#endif /* ENDIAN */
184	.balign	4
185.Lbytewise:
186	breq	r2,0,.Lnil
187	ldb	r4,[r0,0]
188	ldb	r5,[r1,0]
189	lsr.f	lp_count,r3
190#ifdef __ARCEM__
191	mov	r12,r3
192	lpne	.Lbyte_end
193	brne	r3,r12,.Lbyte_odd
194#else
195	lpne	.Lbyte_end
196#endif
197	ldb_s	r3,[r0,1]
198	ldb_l	r12,[r1,1]
199	brne	r4,r5,.Lbyte_even
200	ldb.a	r4,[r0,2]
201	ldb.a	r5,[r1,2]
202#ifdef __ARCEM__
203.Lbyte_end:
204	brne	r3,r12,.Lbyte_odd
205#else
206	brne	r3,r12,.Lbyte_odd
207#ifdef __ARCHS__
208	nop
209#endif
210.Lbyte_end:
211#endif
212	bcc_l	.Lbyte_even
213	brne	r4,r5,.Lbyte_even
214	ldb_s	r3,[r0,1]
215	ldb_s	r12,[r1,1]
216.Lbyte_odd:
217	j_s.d	[blink]
218	sub	r0,r3,r12
219.Lbyte_even:
220	j_s.d	[blink]
221	sub	r0,r4,r5
222.Lnil:
223	j_s.d	[blink]
224	mov_l	r0,0
225ENDFUNC (memcmp)
226#endif /* !__ARC601__ && __ARC_NORM__ && __ARC_BARREL_SHIFTER__ */
227
228#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */
229