1/* SH5 code Copyright 2002 SuperH Ltd. */
2
3#include "asm.h"
4
5ENTRY(strcmp)
6
7#if __SHMEDIA__
8	ld.ub	r2,0,r4
9	pt/l	quickret0,tr0
10	ld.ub	r3,0,r5
11	ptabs	r18,tr2
12	beqi/u	r4,0,tr0
13	ld.ub	r2,1,r6
14	bne/u	r4,r5,tr0
15	pt/l	quickret1,tr1
16	ld.ub	r3,1,r7
17	beqi/u	r6,0,tr1
18	ld.ub	r2,2,r4
19	bne/u	r6,r7,tr1
20	ld.ub	r3,2,r5
21	beqi/u	r4,0,tr0
22	ld.ub	r2,3,r6
23	bne/u	r4,r5,tr0
24	ld.ub	r3,3,r7
25	beqi/u	r6,0,tr1
26	ld.ub	r2,4,r4
27	bne/u	r6,r7,tr1
28	ld.ub	r3,4,r5
29	beqi/u	r4,0,tr0
30	ld.ub	r2,5,r6
31	bne/u	r4,r5,tr0
32	ld.ub	r3,5,r7
33	beqi/u	r6,0,tr1
34	ld.ub	r2,6,r4
35	bne/u	r6,r7,tr1
36	ld.ub	r3,6,r5
37	beqi/u	r4,0,tr0
38	ld.ub	r2,7,r6
39	bne/u	r4,r5,tr0
40	ld.ub	r3,7,r7
41	beqi/u	r6,0,tr1
42	sub	r3,r2,r3
43	bne/u	r6,r7,tr1
44
45	andi	r2,-8,r2
46	add	r3,r2,r3
47	ldlo.q	r3,8,r23
48	pt	r23_zero,tr0
49	shlli	r3,3,r22
50	sub	r63,r22,r20
51	movi	0x101,r6
52	mperm.w	r6,r63,r6
53	SHLO	r6,r22,r7
54	msubs.ub r7,r23,r8
55	pt	loop,tr1
56	bnei/u	r8,0,tr0 // r23_zero
57	pt	found_zero,tr0
58	addi	r3,15,r3
59	andi	r3,-8,r3
60	sub	r3,r2,r3
61	bne/l	r7,r6,tr1 // loop
62	/* The strings are aligned to each other.  */
63	/* It is possible to have a loop with six cycles / iteration
64	   by re-ordering the exit conditions, but then it needs extra
65	   time and/or code to sort out the r4 != r5 case.  */
66	pt	al_loop,tr1
67	pt	al_found_zero,tr0
68al_loop:
69	ld.q	r2,8,r4
70	ldx.q	r2,r3,r5
71	addi	r2,8,r2
72	mcmpeq.b r63,r4,r8
73	pt	cmp_quad,tr3
74	bnei/u	r8,0,tr0  // al_found_zero
75	beq/l	r4,r5,tr1 // al_loop
76	blink	tr3,r63   // cmp_quad
77
78	.balign 8
79quickret0:
80	sub	r4,r5,r2
81	blink	tr2,r63
82quickret1:
83	sub	r6,r7,r2
84	blink	tr2,r63
85
86loop:
87	ld.q	r2,8,r4
88	ldx.q	r2,r3,r19
89	addi	r2,8,r2
90	msubs.ub r6,r4,r8
91	mcmpeq.b r63,r19,r9
92	SHHI	r19,r20,r21
93	or	r21,r23,r5
94	SHLO	r19,r22,r23
95	bne/u	r8,r9,tr0 // found_zero
96	beq/l	r4,r5,tr1 // loop
97cmp_quad:
98#ifdef __LITTLE_ENDIAN__
99	byterev r4,r4
100	byterev r5,r5
101#endif
102	cmpgtu	r4,r5,r6
103	cmpgtu	r5,r4,r7
104	sub	r6,r7,r2
105	blink tr2,r63
106found_zero:
107	pt	zero_now,tr0
108	pt	cmp_quad,tr1
109	SHHI	r9,r20,r7
110	bne/u	r8,r7,tr0 // zero_now
111	bne/u	r4,r5,tr1 // cmp_quad
112	SHLO	r9,r22,r8
113r23_zero:
114	ld.q	r2,8,r4
115	add	r23,r63,r5
116zero_now:
117al_found_zero:
118/* We konw that one of the values has at lest one zero, and r8 holds
119   an 0x01 or 0xff mask for every zero found in one of the operands.
120   If both operands have the first zero in the same place, this mask
121   allows us to truncate the comparison to the valid bytes in the
122   strings.  If the first zero is in different places, it doesn't
123   matter if some invalid bytes are included, since the comparison
124   of the zero with the non-zero will determine the outcome.  */
125#ifdef __LITTLE_ENDIAN__
126	shlli	r8,8,r8
127	addi	r8,-1,r9
128	andc	r9,r8,r8
129	and	r8,r4,r4
130	and	r8,r5,r5
131#else
132	shlri r8,1,r8
133	nsb	r8,r8
134	addi	r8,8,r8
135	andi	r8,56,r8
136	sub	r63,r8,r8
137	shlrd	r4,r8,r4
138	shlrd	r5,r8,r5
139#endif
140#ifdef __LITTLE_ENDIAN__
141	byterev r4,r4
142	byterev r5,r5
143#endif
144	cmpgtu	r4,r5,r6
145	cmpgtu	r5,r4,r7
146	sub	r6,r7,r2
147	blink tr2,r63
148
149#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
150
151#ifdef __SH5__
152#define STR1 r2
153#define STR2 r3
154#define RESULT r2
155#define TMP r4
156#else
157! Entry: r4: string1
158!        r5: string2
159! Exit:  r0: result
160!        r1-r2,r4-r5: clobbered
161#define STR1 r4
162#define STR2 r5
163#define RESULT r0
164#define TMP r2
165#endif /* __SH5__ */
166
167	mov     STR1,r0
168	or      STR2,r0
169	tst	#3,r0
170	bf	L_setup_char_loop
171	mov	#0,r0
172#ifdef DELAYED_BRANCHES
173	mov.l	@STR1+,r1
174	.align  2
175Longword_loop:
176	mov.l	@STR2+,TMP
177	cmp/str	r0,r1
178	bt	Longword_loop_end
179	cmp/eq	r1,TMP
180	bt.s	Longword_loop
181	mov.l	@STR1+,r1
182	add #-4, STR1
183Longword_loop_end:
184	add #-4, STR1
185	add #-4, STR2
186L_setup_char_loop:
187	mov.b	@STR1+,r0
188	.align  2
189L_char_loop:
190	mov.b	@STR2+,r1
191	tst	r0,r0
192	bt	L_return
193	cmp/eq	r0,r1
194	bt.s L_char_loop
195	mov.b	@STR1+,r0
196	add	#-2,STR1
197	mov.b	@STR1,r0
198#else /* ! DELAYED_BRANCHES */
199	.align  2
200Longword_loop:
201	mov.l	@r4+,r1
202	mov.l	@r5+,r2
203	cmp/str	r0,r1
204	bt	Longword_loop_end
205	cmp/eq	r1,r2
206	bt	Longword_loop
207Longword_loop_end:
208	add #-4, r4
209	add #-4, r5
210	.align  2
211L_setup_char_loop:
212L_char_loop:
213	mov.b	@r4+,r0
214	mov.b	@r5+,r1
215	tst	r0,r0
216	bt	L_return
217	cmp/eq	r0,r1
218	bt L_char_loop
219#endif
220L_return:
221	extu.b	r0,RESULT
222	extu.b	r1,r1
223	rts
224	sub	r1,RESULT
225#endif /* ! __SHMEDIA__ */
226