1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31#include <picolibc.h>
32
33	.file "sncat_ca.s"
34#ifdef	__PIC
35	.pic
36#endif
37#ifdef	__PID
38	.pid
39#endif
40/*
41 * (c) copyright 1988,1993 Intel Corp., all rights reserved
42 */
43
44/*
45	procedure strncat  (optimized assembler version for the CA)
46
47	dest_addr = strncat (dest_addr, src_addr, max_bytes)
48
49	append the null terminated string pointed to by src_addr to the null
50	terminated string pointed to by dest_addr.  Return the original
51	dest_addr.  If the source string is longer than max_bytes, then
52	append only max_bytes bytes, and tack on a null byte on the end
53
54	This routine will fail if the source and destination string
55	overlap (in particular, if the end of the source is overlapped
56	by the beginning of the destination).  The behavior is undefined.
57	This is acceptable according to the draft C standard.
58
59	Undefined behavior will also occur if the end of the source string
60	(i.e. the terminating null byte) is in the last word of the program's
61	allocated memory space.  This is so because, in several cases, strncat
62	will fetch ahead one word.  Disallowing the fetch ahead would impose
63	a severe performance penalty.
64
65	This program handles five cases:
66
67	1) both arguments start on a word boundary
68	2) neither are word aligned, but they are offset by the same amount
69	3) source is word aligned, destination is not
70	4) destination is word aligned, source is not
71	5) neither is word aligned, and they are offset by differing amounts
72
73	At the time of this writing, only g0 thru g7 and g13 are available
74	for use in this leafproc;  other registers would have to be saved and
75	restored.  These nine registers, plus tricky use of g14 are sufficient
76	to implement the routine.  The registers are used as follows:
77
78	g0  original dest ptr;  not modified, so that it may be returned.
79	g1  src ptr;  shift count
80	g2  max_bytes
81	g3  src ptr (word aligned)
82	g4  dest ptr (word aligned)
83	g5  0xff  --  byte extraction mask
84	Little endian:
85		g6  lsw of double word for extraction of 4 bytes
86		g7  msw of double word for extraction of 4 bytes
87	Big endian:
88		g6  msw of double word for extraction of 4 bytes
89		g7  lsw of double word for extraction of 4 bytes
90	g13 return address
91	g14 byte extracted.
92*/
93
94#if __i960_BIG_ENDIAN__
95#define MSW g6
96#define LSW g7
97#else
98#define LSW g6
99#define MSW g7
100#endif
101
102	.globl	_strncat
103	.globl	__strncat
104	.leafproc	_strncat, __strncat
105	.align	2
106_strncat:
107#ifndef __PIC
108	lda 	Lrett,g14
109#else
110	lda 	Lrett-(.+8)(ip),g14
111#endif
112__strncat:
113	notand	g0,3,g4		# extract word addr of start of dest
114	 lda	(g14),g13	# preserve return address
115	cmpibge.f 0,g2,Lexit_code # Lexit if number of bytes to move is <= zero.
116	and	g0,3,LSW	# extract byte offset of dest
117	 ld	(g4),MSW	# fetch word containing at least first byte
118	shlo	3,LSW,g14	# get shift count for making mask for first word
119	subi	1,0,LSW		# mask initially all ones
120#if __i960_BIG_ENDIAN__
121	shro	g14,LSW,LSW	# get mask for bytes needed from first word
122#else
123	shlo	g14,LSW,LSW	# get mask for bytes needed from first word
124#endif
125	notor	MSW,LSW,MSW	# set unneeded bytes to all ones
126	 lda	0xff,g5		# byte extraction mask
127Lsearch_for_word_with_null:
128	scanbyte 0,MSW		# check for null byte
129	 lda	4(g4),g4	# post-increment dest word pointer
130	mov	MSW,LSW		# keep a copy of current word
131	 ld	(g4),MSW	# fetch next word of dest
132	 bno.t	Lsearch_for_word_with_null	# branch if null not found yet
133#if __i960_BIG_ENDIAN__
134	shro	24,LSW,g14	# extract byte
135#else
136	and	g5,LSW,g14	# extract byte
137#endif
138	cmpo	0,g14		# branch if null is first byte of word
139	subo	4,g4,g4		# move dest word ptr to word with null
140	notand	g1,3,g3		# extract word addr of start of src
141	 bne.t	Lsearch_for_null
142
143Lcase_14:
144	cmpo	g1,g3		# check alignment of source
145	 ld	(g3),LSW	# fetch first word of source
146	shlo	3,g1,g14	# compute shift count
147	 lda	4(g3),g3	# post-increment src addr
148	 bne.f	Lcase_4		# branch if source is unaligned
149Lcase_1:
150Lcase_1_wloop:			# word copying loop
151	cmpi	g2,4		# check for fewer than four bytes to move
152	 lda	(LSW),g1	# keep a copy of the src word
153	 bl.f	Lcase_1_cloop	# branch if fewer than four bytes to copy
154	scanbyte 0,g1		# check for null byte in src word
155	 ld	(g3),LSW	# pre-fetch next word of src
156	addo	4,g3,g3		# post-increment src addr
157	 bo.f	Lcase_1_cloop	# branch if word contains null byte
158	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
159	 st	g1,(g4)		# store word in dest string
160	addo	4,g4,g4		# post-increment dest addr
161	 b	Lcase_1_wloop
162
163Lcase_3_cloop:
164Lcase_1_cloop:			# character copying loop (max_bytes <= 3)
165	cmpdeci	0,g2,g2		# is max_bytes exhausted?
166#if __i960_BIG_ENDIAN__
167	rotate	8,g1,g1		# move next byte into position for extraction
168#endif
169	and	g5,g1,g14	# extract next char
170	be.f	Lstore_null	# if max_bytes is exhausted, store null and quit
171	cmpo	0,g14		# check for null byte
172	 stob	g14,(g4)	# store the byte in dest
173#if ! __i960_BIG_ENDIAN__
174	shro	8,g1,g1		# move next byte into position for extraction
175#endif
176	 lda	1(g4),g4	# post-increment dest byte addr
177	 bne.t	Lcase_1_cloop	# branch if null not reached
178	bx	(g13)		# Lexit (g14 == 0)
179
180Lstore_null:
181	mov	0,g14		# store null, and set g14 to zero
182	stob	g14,(g4)
183	bx	(g13)
184
185
186Lsearch_for_null:
187#if __i960_BIG_ENDIAN__
188	shlo	8,LSW,LSW	# check next byte
189	shro	24,LSW,g14
190#else
191	shlo	8,g5,g5		# move mask up to next byte
192	and	g5,LSW,g14	# extract byte
193#endif
194	 lda	1(g4),g4	# move dest byte ptr to next byte
195	cmpobne.t 0,g14,Lsearch_for_null	# branch if null is not yet found
196
197Lcase_235:
198	cmpo	g1,g3		# check alignment of src
199	 ld	(g3),LSW	# pre-fetch word with start of src
200	and	3,g1,g1		# compute shift count
201	 lda	0xff,g5		# load mask for byte extraction
202	shlo	3,g1,g14
203	 lda	4(g3),g3	# post-increment src word counter
204	 be.t	Lcase_3		# branch if src is word aligned
205	and	g4,3,MSW	# extract byte offset for dest string
206	cmpo    MSW,g1		# < indicates first word of dest has more bytes
207				/* than first word of source. */
208	 ld	(g3),MSW	# fetch second word of src
209#if __i960_BIG_ENDIAN__
210	subo	g14,0,g14	# adjust shift count for big endian
211#endif
212	eshro	g14,g6,g5	# extract four bytes
213#if __i960_BIG_ENDIAN__
214	 bge.f	1f
215#else
216	 bg.f	1f
217#endif
218	mov	MSW,LSW
219	 lda	4(g3),g3	# move src word addr to second word boundary
2201:
221	mov	g5,MSW
222	 lda	0xff,g5
223	 b	Lcase_25
224
225Lcase_3:				# src is word aligned; dest is not
226	mov	LSW,MSW		# make copy of first word of src
227	 lda	32,g14		# initialize shift count to zero (mod 32)
228Lcase_25:
229
230Lcase_3_cloop_at_start:		# character copying loop for start of dest str
231	cmpdeci	0,g2,g2		# is max_bytes exhausted?
232#if __i960_BIG_ENDIAN__
233	shro	24,MSW,g5	# extract next char
234#else
235	and	g5,MSW,g5	# extract next char
236#endif
237	 be.f	Lstore_null	# Lexit if max_bytes is exhausted
238	cmpo	0,g5		# check for null byte
239	 stob	g5,(g4)		# store the byte in dest
240	addo	1,g4,g4		# post-increment dest ptr
241	 lda	0xff,g5		# re-initialize byte extraction mask
242	notand	g4,3,g1		# extract word address
243	 be.t	Lexit_code	# Lexit if null byte reached
244	cmpo	g1,g4		# have we reached word boundary in dest yet?
245#if __i960_BIG_ENDIAN__
246	 lda	-8(g14),g14	# augment the shift counter
247	rotate	8,MSW,MSW	# move next byte into position for extraction
248#else
249	 lda	8(g14),g14	# augment the shift counter
250	shro	8,MSW,MSW	# move next byte into position for extraction
251#endif
252	 bne.t	Lcase_3_cloop_at_start	# branch if reached word boundary?
253
254#if __i960_BIG_ENDIAN__
255	cmpo	0,g14
256	 ld	(g3),MSW	# fetch msw of operand for double shift
257	bne	Lcase_3_wloop	# branch if src is still unaligned.
258
259Lcase_3_wloop2:
260	cmpi	g2,4		# less than four bytes to move?
261	mov	LSW,g1		# extract 4 bytes of src
262	 lda	4(g3),g3	# post-increment src word addr
263	 bl.f	Lcase_3_cloop	# branch if < four bytes left to move
264	scanbyte 0,g1		# check for null byte
265	mov	MSW,LSW		# move msw to lsw
266	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
267	 bo.f	Lcase_3_cloop	# branch if word contains null byte
268	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
269	 st	g1,(g4)		# store 4 bytes to dest
270	addo	4,g4,g4		# post-increment dest ptr
271	 b	Lcase_3_wloop2
272Lcase_4:
273	subo	g14,0,g14	# adjust shift count for big endian
274#else
275Lcase_4:
276#endif
277
278	ld	(g3),MSW	# fetch msw of operand for double shift
279
280Lcase_3_wloop:
281	cmpi	g2,4		# less than four bytes to move?
282	eshro	g14,g6,g1	# extract 4 bytes of src
283	 lda	4(g3),g3	# post-increment src word addr
284	 bl.f	Lcase_3_cloop	# branch if < four bytes left to move
285	scanbyte 0,g1		# check for null byte
286	mov	MSW,LSW		# move msw to lsw
287	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
288	 bo.f	Lcase_3_cloop	# branch if word contains null byte
289	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
290	 st	g1,(g4)		# store 4 bytes to dest
291	addo	4,g4,g4		# post-increment dest ptr
292	 b	Lcase_3_wloop
293
294
295Lexit_code:
296	mov	0,g14		# conform to register conventions
297	bx	(g13)		# g0 = addr of dest;  g14 = 0
298Lrett:
299	ret
300
301/* end of strncat */
302
303