1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31#include <picolibc.h>
32
33	.file "memcm_ca.s"
34#ifdef	__PIC
35	.pic
36#endif
37#ifdef	__PID
38	.pid
39#endif
40/*
41 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved
42 */
43
44/*
45	procedure memcmp  (optimized assembler version for the CA)
46
47	result = memcmp (src1_addr, src2_addr, max_bytes)
48
49	compare the byte array pointed to by src1_addr to the byte array
50	pointed to by src2_addr.  Return 0 iff the arrays are equal, -1 if
51	src1_addr is lexicly less than src2_addr, and 1 if it is lexicly
52	greater.  Do not compare more than max_bytes bytes.
53
54	Undefined behavior will occur if the end of either source array
55	is in the last word of the program's allocated memory space.  This
56	is so because, in several cases, memcmp will fetch ahead one word.
57	Disallowing the fetch ahead would impose a severe performance penalty.
58
59	This program handles five cases:
60
61	1) both arguments start on a word boundary
62	2) neither are word aligned, but they are offset by the same amount
63	3) source1 is word aligned, source2 is not
64	4) source2 is word aligned, source1 is not
65	5) neither is word aligned, and they are offset by differing amounts
66
67	At the time of this writing, only g0 thru g7 and g14 are available
68	for use in this leafproc;  other registers would have to be saved and
69	restored.  These nine registers are sufficient to implement the routine.
70	The registers are used as follows:
71
72	g0  original src1 ptr;  extracted word;  return result
73	g1  src2 ptr; byt extraction mask
74	g2  maximum number of bytes to compare
75	g3  src2 word ptr
76	Little endian
77		g4  lsw of src1
78		g5  msw of src1
79		g6  src2 word
80		g7  src1 word ptr
81	Big endian
82		g4  msw of src1
83		g5  lsw of src1
84		g6  src1 word ptr
85		g7  src2 word
86	g13 return address
87	g14 shift count
88*/
89
90#if __i960_BIG_ENDIAN__
91#define MSW g4
92#define LSW g5
93#define SRC1 g6
94#define SRC2 g7
95#else
96#define LSW g4
97#define MSW g5
98#define SRC2 g6
99#define SRC1 g7
100#endif
101
102	.globl	_memcmp
103	.globl	__memcmp
104	.leafproc	_memcmp, __memcmp
105	.align	2
106_memcmp:
107#ifndef __PIC
108	lda 	Lrett,g14
109#else
110	lda 	Lrett-(.+8)(ip),g14
111#endif
112__memcmp:
113Lrestart:
114#if __i960_BIG_ENDIAN__
115	subo	1,g0,SRC1
116	notand	SRC1,3,SRC1	# extract word addr of start of src1
117#else
118	notand	g0,3,SRC1	# extract word addr of start of src1
119#endif
120	 lda	(g14),g13	# preserve return address
121	cmpibge.f 0,g2,Lequal_exit	# return equality if number bytes 0
122	notand	g1,3,g3		# extract word addr of start of src2
123	 ld	(SRC1),LSW	# fetch word with at least first byte of src1
124	cmpo	g3,g1		# check alignment of src2
125	 ld	4(SRC1),MSW	# fetch second word of src1
126	shlo	3,g0,g14	# compute shift count for src1
127#if __i960_BIG_ENDIAN__
128	subo	g14,0,g14	# adjust shift count for big endian.
129#endif
130	 ld	(g3),SRC2	# fetch word with at least first byte of src2
131	eshro	g14,g4,LSW	# extract word of src1
132	 lda	8(SRC1),SRC1	# advance src1 word addr
133	 bne.f	Lsrc2_unaligned	# branch if src2 is NOT word aligned
134
135	mov	LSW,g0		# at least src2 is word aligned
136
137	 lda	0xff,g1
138
139Lwloop:				# word comparing loop
140	cmpo	SRC2,g0		# compare src1 and src2 words
141	 lda	4(g3),g3	# pre-increment src2 addr
142	mov	MSW,LSW		# move msw of src1 to lsw
143	 ld	(SRC1),MSW	# pre-fetch next msw of src1
144	subi	4,g2,g2		# decrement maximum byte count
145	 bne.f	Lcloop		# branch if src1 and src2 unequal
146	cmpi	0,g2
147	 ld	(g3),SRC2	# pre-fetch next word of src2
148	eshro	g14,g4,g0	# extract word of src1
149	 lda	4(SRC1),SRC1	# post-increment src1 addr
150	bl.t	Lwloop		# branch if max_bytes not reached yet
151
152	b	Lequal_exit	# strings were equal up through max_bytes
153
154Lcloop_setup:			# setup for coming from Lsrc2_unaligned
155	mov	LSW,g0		# restore extracted src1 word
156	subo	4,g2,g2		# make up for later re-incrementing
157	 lda	0xff,g1		# byte extraction mask
158
159Lcloop:				# character comparing loop
160#if __i960_BIG_ENDIAN__
161	rotate	24,g1,g1	# shift mask for next byte
162#endif
163	and	SRC2,g1,g3	# extract next char of src2
164	and	g0,g1,LSW	# extract next char of src1
165	cmpobne.f LSW,g3,.diff	# check for equality
166#if ! __i960_BIG_ENDIAN__
167	shlo	8,g1,g1		# shift mask for next byte
168#endif
169	subi	1,g2,g2		# decrement character counter
170	 b	Lcloop		# branch if null not reached
171
172
173Lequal_exit: 			# words are equal up thru null byte
174	mov	0,g14		# conform to register conventions
175	 lda	0,g0		# return zero, indicating equality
176	bx	(g13)		# return
177Lrett:
178	ret
179
180.diff:
181	addo	4,g2,g2		# to make up for extra decrement in loop
182	 lda	0,g14
183	 bl	Lless_than_exit
184Lgreater_than_exit:
185	cmpibge.f 0,g2,Lequal_exit  # branch if difference is beyond max_bytes
186	mov	1,g0
187	 bx	(g13)		# g0 = 1 (src1 > src2)
188Lless_than_exit:
189	cmpibge.f 0,g2,Lequal_exit  # branch if difference is beyond max_bytes
190	subi	1,0,g0
191	 bx	(g13)		# g0 = -1 (src1 < src2)
192
193Lsrc2_unaligned:
194	notor	g1,3,g14	# first step in computing new src1 ptr
195	 ld	4(g3),SRC1	# fetch second word of src2
196	shlo	3,g1,MSW	# compute shift count for src2
197#if __i960_BIG_ENDIAN__
198	subo	MSW,0,MSW
199#endif
200	eshro	MSW,g6,SRC2	# extract word of src2
201	cmpo	LSW,SRC2	# compare src1 and src2 words
202	 lda	4(g3),g1	# set new src2 ptr
203	 bne.f	Lcloop_setup	# first four bytes differ
204	subo	g14,g0,g0	# second (final) step in computing new src1 ptr
205	addi	g14,g2,g2	# compute new max_bytes too
206	 lda	(g13),g14	# prepare return pointer for Lrestart
207	 b	Lrestart		# continue with both string fetches shifted
208