1/* SH5 code Copyright 2002 SuperH Ltd. */
2
3#include <picolibc.h>
4
5#include "asm.h"
6
7ENTRY(strcmp)
8
9#if __SHMEDIA__
10	ld.ub	r2,0,r4
11	pt/l	quickret0,tr0
12	ld.ub	r3,0,r5
13	ptabs	r18,tr2
14	beqi/u	r4,0,tr0
15	ld.ub	r2,1,r6
16	bne/u	r4,r5,tr0
17	pt/l	quickret1,tr1
18	ld.ub	r3,1,r7
19	beqi/u	r6,0,tr1
20	ld.ub	r2,2,r4
21	bne/u	r6,r7,tr1
22	ld.ub	r3,2,r5
23	beqi/u	r4,0,tr0
24	ld.ub	r2,3,r6
25	bne/u	r4,r5,tr0
26	ld.ub	r3,3,r7
27	beqi/u	r6,0,tr1
28	ld.ub	r2,4,r4
29	bne/u	r6,r7,tr1
30	ld.ub	r3,4,r5
31	beqi/u	r4,0,tr0
32	ld.ub	r2,5,r6
33	bne/u	r4,r5,tr0
34	ld.ub	r3,5,r7
35	beqi/u	r6,0,tr1
36	ld.ub	r2,6,r4
37	bne/u	r6,r7,tr1
38	ld.ub	r3,6,r5
39	beqi/u	r4,0,tr0
40	ld.ub	r2,7,r6
41	bne/u	r4,r5,tr0
42	ld.ub	r3,7,r7
43	beqi/u	r6,0,tr1
44	sub	r3,r2,r3
45	bne/u	r6,r7,tr1
46
47	andi	r2,-8,r2
48	add	r3,r2,r3
49	ldlo.q	r3,8,r23
50	pt	r23_zero,tr0
51	shlli	r3,3,r22
52	sub	r63,r22,r20
53	movi	0x101,r6
54	mperm.w	r6,r63,r6
55	SHLO	r6,r22,r7
56	msubs.ub r7,r23,r8
57	pt	loop,tr1
58	bnei/u	r8,0,tr0 // r23_zero
59	pt	found_zero,tr0
60	addi	r3,15,r3
61	andi	r3,-8,r3
62	sub	r3,r2,r3
63	bne/l	r7,r6,tr1 // loop
64	/* The strings are aligned to each other.  */
65	/* It is possible to have a loop with six cycles / iteration
66	   by re-ordering the exit conditions, but then it needs extra
67	   time and/or code to sort out the r4 != r5 case.  */
68	pt	al_loop,tr1
69	pt	al_found_zero,tr0
70al_loop:
71	ld.q	r2,8,r4
72	ldx.q	r2,r3,r5
73	addi	r2,8,r2
74	mcmpeq.b r63,r4,r8
75	pt	cmp_quad,tr3
76	bnei/u	r8,0,tr0  // al_found_zero
77	beq/l	r4,r5,tr1 // al_loop
78	blink	tr3,r63   // cmp_quad
79
80	.balign 8
81quickret0:
82	sub	r4,r5,r2
83	blink	tr2,r63
84quickret1:
85	sub	r6,r7,r2
86	blink	tr2,r63
87
88loop:
89	ld.q	r2,8,r4
90	ldx.q	r2,r3,r19
91	addi	r2,8,r2
92	msubs.ub r6,r4,r8
93	mcmpeq.b r63,r19,r9
94	SHHI	r19,r20,r21
95	or	r21,r23,r5
96	SHLO	r19,r22,r23
97	bne/u	r8,r9,tr0 // found_zero
98	beq/l	r4,r5,tr1 // loop
99cmp_quad:
100#ifdef __LITTLE_ENDIAN__
101	byterev r4,r4
102	byterev r5,r5
103#endif
104	cmpgtu	r4,r5,r6
105	cmpgtu	r5,r4,r7
106	sub	r6,r7,r2
107	blink tr2,r63
108found_zero:
109	pt	zero_now,tr0
110	pt	cmp_quad,tr1
111	SHHI	r9,r20,r7
112	bne/u	r8,r7,tr0 // zero_now
113	bne/u	r4,r5,tr1 // cmp_quad
114	SHLO	r9,r22,r8
115r23_zero:
116	ld.q	r2,8,r4
117	add	r23,r63,r5
118zero_now:
119al_found_zero:
120/* We konw that one of the values has at lest one zero, and r8 holds
121   an 0x01 or 0xff mask for every zero found in one of the operands.
122   If both operands have the first zero in the same place, this mask
123   allows us to truncate the comparison to the valid bytes in the
124   strings.  If the first zero is in different places, it doesn't
125   matter if some invalid bytes are included, since the comparison
126   of the zero with the non-zero will determine the outcome.  */
127#ifdef __LITTLE_ENDIAN__
128	shlli	r8,8,r8
129	addi	r8,-1,r9
130	andc	r9,r8,r8
131	and	r8,r4,r4
132	and	r8,r5,r5
133#else
134	shlri r8,1,r8
135	nsb	r8,r8
136	addi	r8,8,r8
137	andi	r8,56,r8
138	sub	r63,r8,r8
139	shlrd	r4,r8,r4
140	shlrd	r5,r8,r5
141#endif
142#ifdef __LITTLE_ENDIAN__
143	byterev r4,r4
144	byterev r5,r5
145#endif
146	cmpgtu	r4,r5,r6
147	cmpgtu	r5,r4,r7
148	sub	r6,r7,r2
149	blink tr2,r63
150
151#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
152
153#ifdef __SH5__
154#define STR1 r2
155#define STR2 r3
156#define RESULT r2
157#define TMP r4
158#else
159! Entry: r4: string1
160!        r5: string2
161! Exit:  r0: result
162!        r1-r2,r4-r5: clobbered
163#define STR1 r4
164#define STR2 r5
165#define RESULT r0
166#define TMP r2
167#endif /* __SH5__ */
168
169	mov     STR1,r0
170	or      STR2,r0
171	tst	#3,r0
172	bf	L_setup_char_loop
173	mov	#0,r0
174#ifdef DELAYED_BRANCHES
175	mov.l	@STR1+,r1
176	.align  2
177Longword_loop:
178	mov.l	@STR2+,TMP
179	cmp/str	r0,r1
180	bt	Longword_loop_end
181	cmp/eq	r1,TMP
182	bt.s	Longword_loop
183	mov.l	@STR1+,r1
184	add #-4, STR1
185Longword_loop_end:
186	add #-4, STR1
187	add #-4, STR2
188L_setup_char_loop:
189	mov.b	@STR1+,r0
190	.align  2
191L_char_loop:
192	mov.b	@STR2+,r1
193	tst	r0,r0
194	bt	L_return
195	cmp/eq	r0,r1
196	bt.s L_char_loop
197	mov.b	@STR1+,r0
198	add	#-2,STR1
199	mov.b	@STR1,r0
200#else /* ! DELAYED_BRANCHES */
201	.align  2
202Longword_loop:
203	mov.l	@r4+,r1
204	mov.l	@r5+,r2
205	cmp/str	r0,r1
206	bt	Longword_loop_end
207	cmp/eq	r1,r2
208	bt	Longword_loop
209Longword_loop_end:
210	add #-4, r4
211	add #-4, r5
212	.align  2
213L_setup_char_loop:
214L_char_loop:
215	mov.b	@r4+,r0
216	mov.b	@r5+,r1
217	tst	r0,r0
218	bt	L_return
219	cmp/eq	r0,r1
220	bt L_char_loop
221#endif
222L_return:
223	extu.b	r0,RESULT
224	extu.b	r1,r1
225	rts
226	sub	r1,RESULT
227#endif /* ! __SHMEDIA__ */
228