1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31#include <picolibc.h>
32
33	.file "strcm_ca.s"
34#ifdef	__PIC
35	.pic
36#endif
37#ifdef	__PID
38	.pid
39#endif
40/*
41 * (c) copyright 1988,1993 Intel Corp., all rights reserved
42 */
43
44/*
45	procedure strcmp  (optimized assembler version for the CA)
46
47	result = strcmp (src1_addr, src2_addr)
48
49	compare the null terminated string pointed to by src1_addr to
50	the string space pointed to by src2_addr.  Return 0 iff the strings
51	are equal, -1 if src1_addr is lexicly less than src2_addr, and 1
52	if it is lexicly greater.
53
54	Undefined behavior will occur if the end of either source string
55	(i.e. the terminating null byte) is in the last word of the program's
56	allocated memory space.  This is so because, in several cases, strcmp
57	will fetch ahead one word.  Disallowing the fetch ahead would impose
58	a severe performance penalty.
59
60	This program handles five cases:
61
62	1) both arguments start on a word boundary
63	2) neither are word aligned, but they are offset by the same amount
64	3) source1 is word aligned, source2 is not
65	4) source2 is word aligned, source1 is not
66	5) neither is word aligned, and they are offset by differing amounts
67
68	At the time of this writing, only g0 thru g7 and g14 are available
69	for use in this leafproc;  other registers would have to be saved and
70	restored.  These nine registers are sufficient to implement the routine.
71	The registers are used as follows:
72
73	g0  original src1 ptr;  return result
74	g1  src2 ptr;  0xff  --  byte extraction mask
75	g2  src1 word ptr
76	g3  src2 word ptr
77	Little endian:
78		g4  lsw of src1
79		g5  msw of src1
80		g6  src2 word
81		g7  extracted src1
82	Big endian:
83		g4  msw of src1
84		g5  lsw of src1
85		g6  extracted src1
86		g7  src2 word
87	g13 return address
88	g14 shift count
89*/
90
91#if __i960_BIG_ENDIAN__
92#define MSW g4
93#define LSW g5
94#define SRC1 g6
95#define SRC2 g7
96#else
97#define LSW g4
98#define MSW g5
99#define SRC2 g6
100#define SRC1 g7
101#endif
102
103	.globl	_strcmp
104	.globl	__strcmp
105	.leafproc	_strcmp, __strcmp
106	.align	2
107_strcmp:
108#ifndef __PIC
109	lda 	Lrett,g14
110#else
111	lda 	Lrett-(.+8)(ip),g14
112#endif
113
114__strcmp:
115Lrestart:
116	notand	g0,3,g2		# extract word addr of start of src1
117	 lda	(g14),g13	# preserve return address
118#if __i960_BIG_ENDIAN__
119	cmpo	g0,g2		# check alignment of src1
120#endif
121	 ld	(g2),LSW	# fetch word with at least first byte of src1
122	notand	g1,3,g3		# extract word addr of start of src2
123	 ld	4(g2),MSW	# fetch second word of src1
124#if __i960_BIG_ENDIAN__
125	 bne	Lsrc1_unaligned	# branch if src1 is unaligned
126	cmpo	g3,g1		# check alignment of src2
127	 ld	(g3),SRC2	# fetch word with at least first byte of src2
128	mov	LSW,SRC1	# extract word of src1
129	 lda	8(g2),g2	# advance src1 word addr
130	 bne.f	Lsrc2_unaligned	# branch if src2 is NOT word aligned
131
132				/* src2 is word aligned */
133
134Lwloop2:				# word comparing loop
135	cmpo	SRC2,SRC1	# compare src1 and src2 words
136	 lda	0xff000000,g1	# byte extraction mask
137	mov	MSW,LSW		# move msw of src1 to lsw
138	 ld	(g2),MSW	# pre-fetch next msw of src1
139	addo	4,g2,g2		# post-increment src1 addr
140	 lda	4(g3),g3	# pre-increment src2 addr
141	 bne.f	Lcloop		# branch if src1 and src2 unequal
142	scanbyte 0,SRC1		# check for null byte in src1 word
143	 ld	(g3),SRC2	# pre-fetch next word of src2
144	mov	LSW,SRC1	# extract word of src1
145	 lda	0,g0		# prepare to return zero, indicating equality
146	bno.t	Lwloop2		# branch if null byte not encountered
147
148				/* words were equal and contained null byte */
149
150	mov	0,g14		# conform to register conventions
151	 bx	(g13)		# return
152
153
154Lsrc1_unaligned:
155#endif
156	cmpo	g3,g1		# check alignment of src2
157	 ld	(g3),SRC2	# fetch word with at least first byte of src2
158	shlo	3,g0,g14	# compute shift count for src1
159#if __i960_BIG_ENDIAN__
160	subo	g14,0,g14	# 32 - shift count for big endian.
161#endif
162	eshro	g14,g4,SRC1	# extract word of src1
163	 lda	8(g2),g2	# advance src1 word addr
164	 bne.f	Lsrc2_unaligned	# branch if src2 is NOT word aligned
165
166				/* at least src2 is word aligned */
167
168Lwloop:				# word comparing loop
169	cmpo	SRC2,SRC1	# compare src1 and src2 words
170#if __i960_BIG_ENDIAN__
171	 lda	0xff000000,g1	# byte extraction mask
172#else
173	 lda	0xff,g1		# byte extraction mask
174#endif
175	mov	MSW,LSW		# move msw of src1 to lsw
176	 ld	(g2),MSW	# pre-fetch next msw of src1
177	addo	4,g2,g2		# post-increment src1 addr
178	 lda	4(g3),g3	# pre-increment src2 addr
179	 bne.f	Lcloop		# branch if src1 and src2 unequal
180	scanbyte 0,SRC1		# check for null byte in src1 word
181	 ld	(g3),SRC2	# pre-fetch next word of src2
182	eshro	g14,g4,SRC1	# extract word of src1
183	 lda	0,g0		# prepare to return zero, indicating equality
184	bno.t	Lwloop		# branch if null byte not encountered
185
186				/* words were equal and contained null byte */
187
188	mov	0,g14		# conform to register conventions
189	 bx	(g13)		# return
190
191Lcloop_setup:			# setup for coming from Lsrc2_unaligned
192	mov	LSW,SRC1	# restore extracted src1 word
193#if __i960_BIG_ENDIAN__
194	 lda	0xff000000,g1	# byte extraction mask
195#else
196	 lda	0xff,g1		# byte extraction mask
197#endif
198
199Lcloop:				# character comparing loop
200	and	SRC2,g1,g3	# extract next char of src2
201	and	SRC1,g1,g0	# extract next char of src1
202	cmpobne.f g0,g3,.diff	# check for equality
203	cmpo	0,g0		# check for null byte
204#if __i960_BIG_ENDIAN__
205	shro	8,g1,g1		# shift mask for next byte
206#else
207	shlo	8,g1,g1		# shift mask for next byte
208#endif
209	 bne.t	Lcloop		# branch if null not reached
210
211				/* words are equal up thru null byte */
212
213	mov	0,g14
214	 bx	(g13)		# g0 = 0 (src1 == src2)
215Lrett:
216	ret
217
218.diff:
219	mov	0,g14
220	 bl	Lless_than_exit
221Lgreater_than_exit:
222	mov	1,g0
223	 bx	(g13)		# g0 = 1 (src1 > src2)
224Lless_than_exit:
225	subi	1,0,g0
226	 bx	(g13)		# g0 = -1 (src1 < src2)
227
228Lsrc2_unaligned:
229	mov	SRC1,LSW	# retain src1 extracted word
230	 ld	4(g3),SRC1	# fetch second word of src2
231	shlo	3,g1,MSW	# compute shift count for src2
232#if __i960_BIG_ENDIAN__
233	subo	MSW,0,MSW	# 32 - shift count for big endian.
234#endif
235	eshro	MSW,g6,SRC2	# extract word of src2
236	cmpo	LSW,SRC2	# compare src1 and src2 words
237	notor	g1,3,MSW	# first step in computing new src1 ptr
238	 lda	4(g3),g1	# set new src2 ptr
239	 bne.f	Lcloop_setup	# first four bytes differ
240	scanbyte 0,LSW		# check for null byte
241	 lda	(g13),g14	# prepare return pointer for Lrestart
242	subo	MSW,g0,g0	# second (final) step in computing new src1 ptr
243	 bno.t	Lrestart		# if null byte not encountered, continue
244				/* with both string fetches shifted such that */
245				/* src2 is now word aligned. */
246	mov	0,g14		# conform to register conventions.
247	 lda	0,g0		# return indicator of equality.
248	bx	(g13)
249