1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31	.file "sncpy_ca.s"
32#ifdef	__PIC
33	.pic
34#endif
35#ifdef	__PID
36	.pid
37#endif
38/*
39 * (c) copyright 1988,1993 Intel Corp., all rights reserved
40 */
41
42/*
43	procedure strncpy  (optimized assembler version for the CA)
44
45	dest_addr = strncpy (dest_addr, src_addr, max_bytes)
46
47	copy the null terminated string pointed to by src_addr to
48	the string space pointed to by dest_addr.  Return the original
49	dest_addr.  If the source string is shorter than max_bytes,
50        then null-pad the destination string.
51
52	This routine will fail if the source and destination string
53	overlap (in particular, if the end of the source is overlapped
54	by the beginning of the destination).  The behavior is undefined.
55	This is acceptable according to the draft C standard.
56
57	Undefined behavior will also occur if the end of the source string
58	(i.e. the terminating null byte) is in the last word of the program's
59	allocated memory space.  This is so because, in several cases, strcpy
60	will fetch ahead one word.  Disallowing the fetch ahead would impose
61	a severe performance penalty.
62
63	This program handles five cases:
64
65	1) both arguments start on a word boundary
66	2) neither are word aligned, but they are offset by the same amount
67	3) source is word aligned, destination is not
68	4) destination is word aligned, source is not
69	5) neither is word aligned, and they are offset by differing amounts
70
71	At the time of this writing, only g0 thru g7 and g13 are available
72	for use in this leafproc;  other registers would have to be saved and
73	restored.  These nine registers, plus tricky use of g14 are sufficient
74	to implement the routine.  The registers are used as follows:
75
76	g0  original dest ptr;  not modified, so that it may be returned.
77	g1  src ptr;  shift count
78	g2  max_bytes
79	g3  src ptr (word aligned)
80	g4  dest ptr (word aligned)
81	g5  0xff  --  byte extraction mask
82	Little endian:
83		g6  lsw of double word for extraction of 4 bytes
84		g7  msw of double word for extraction of 4 bytes
85	Big endian:
86		g6  msw of double word for extraction of 4 bytes
87		g7  lsw of double word for extraction of 4 bytes
88	g13 return address
89	g14 byte extracted.
90*/
91
92#if __i960_BIG_ENDIAN__
93#define MSW g6
94#define LSW g7
95#else
96#define LSW g6
97#define MSW g7
98#endif
99
100	.globl	_strncpy
101	.globl	__strncpy
102	.leafproc	_strncpy, __strncpy
103	.align	2
104_strncpy:
105#ifndef __PIC
106	lda 	Lrett,g14
107#else
108	lda 	Lrett-(.+8)(ip),g14
109#endif
110__strncpy:
111	notand	g1,3,g3		# extract word addr of start of src
112	 lda	(g14),g13	# preserve return address
113	 cmpibge.f 0,g2,Lexit_code # Lexit if number of bytes to move is <= zero.
114	cmpo	g3,g1		# check alignment of src
115	 ld	(g3),LSW	# fetch word containing at least first byte
116	notand	g0,3,g4		# extract word addr of start of dest
117	 lda	4(g3),g3	# advance src word addr
118	 bne.f	Lcase_245	# branch if src is NOT word aligned
119
120Lcase_13:
121	cmpo	g0,g4		# check alignment of dest
122	 lda	0xff,g5		# load mask for byte extraction
123	subo	4,g4,g4		# store is pre-incrementing;  back up dest addr
124	 bne.f	Lcase_3		# branch if dest not word aligned
125
126Lcase_1:				# src and dest are word aligned
127Lcase_1_wloop:			# word copying loop
128	cmpi	g2,4		# check for fewer than four bytes to move
129	 lda	(LSW),g1	# keep a copy of the src word
130	addo	4,g4,g4		# pre-increment dest addr
131	 bl.f	Lcase_1_cloop.a	# branch if fewer than four bytes to copy
132	scanbyte 0,g1		# check for null byte in src word
133	 ld	(g3),LSW	# pre-fetch next word of src
134	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
135	 bo.f	Lcase_1_cloop.c	# branch if word contains null byte
136	addo	4,g3,g3		# post-increment src addr
137	 st	g1,(g4)		# store word in dest string
138	 b	Lcase_1_wloop
139
140Lcase_3_cloop.a:
141Lcase_1_cloop.a:			# character copying loop (max_bytes <= 3)
142#if __i960_BIG_ENDIAN__
143	rotate	8,g1,g1		# move next byte into position for extraction
144#endif
145	and	g5,g1,g14	# extract next char
146Lcase_1_cloop.b:
147	cmpdeci	0,g2,g2		# is max_bytes exhausted?
148	be.f	Lexit_code	# Lexit if max_bytes is exhausted
149	cmpo	0,g14		# check for null byte
150	 stob	g14,(g4)	# store the byte in dest
151#if ! __i960_BIG_ENDIAN__
152	shro	8,g1,g1		# move next byte into position for extraction
153#endif
154	 lda	1(g4),g4	# post-increment dest byte addr
155	 bne.t	Lcase_1_cloop.a	# branch if null not reached
156	 b	Lcase_1_cloop.b
157
158Lexit_code:
159	mov	0,g14		# conform to register conventions
160	bx	(g13)		# g0 = addr of dest;  g14 = 0
161Lrett:
162	ret
163
164Lcase_1_cloop.c:
165Lcase_3_cloop.c:
166#if __i960_BIG_ENDIAN__
167	rotate	24,g5,g5	# move mask into position for testing next byte
168#endif
169	and	g5,g1,g14	# extract next char
170	cmpo	0,g14		# check for null byte
171#if ! __i960_BIG_ENDIAN__
172	 lda	(g5),LSW	# keep a copy of the current mask
173	shlo	8,g5,g5		# move mask into position for testing next byte
174#endif
175	 bne.t	Lcase_1_cloop.c	# branch if null not reached
176#if __i960_BIG_ENDIAN__
177	subo	1,g5,g5		# null pad.
178	andnot	g5,g1,g1	# last bytes to copy, and null pad rest of word
179#else
180	subo	1,LSW,g5	# mask to get last bytes to copy, and null pad
181	and	g5,g1,g1	# last bytes to copy, and null pad rest of word
182#endif
183	st	g1,(g4)
184
185Lcase_1_zwloop:			# zero word loop
186	cmpi	g2,4		# check for fewer than four bytes to move
187	addo	4,g4,g4		# pre-increment dest addr
188	 bl.f	Lcase_1_cloop.b	# branch if fewer than four bytes to copy
189	subo	4,g2,g2		# decrease max_byte count by the 4 bytes moved
190	 st	g14,(g4)	# store word in dest string
191	 b	Lcase_1_zwloop
192
193Lcase_3:				# src is word aligned; dest is not
194	addo	8,g4,g4		# move dest word ptr to first word boundary
195	 lda	(g0),g1		# copy dest byte ptr
196	mov	LSW,MSW		# make copy of first word of src
197	 lda	32,g14		# initialize shift count to zero (mod 32)
198
199Lcase_25:
200Lcase_3_cloop_at_start:		# character copying loop for start of dest str
201	cmpdeci	0,g2,g2		# is max_bytes exhausted?
202#if __i960_BIG_ENDIAN__
203	shro	24,MSW,g5	# extract next char
204#else
205	and	g5,MSW,g5	# extract next char
206#endif
207	 be.f	Lexit_code	# Lexit if max_bytes is exhausted
208	cmpo	0,g5		# check for null byte
209	 stob	g5,(g1)		# store the byte in dest
210	addo	1,g1,g1		# post-increment dest ptr
211	 lda	0xff,g5		# re-initialize byte extraction mask
212	 bne.t	1f		# drop thru if null byte reached (to pad)
213	movl	0,g6		# blank out remainder of input buffer
2141:
215	cmpo	g1,g4		# have we reached word boundary in dest yet?
216#if __i960_BIG_ENDIAN__
217	 lda	-8(g14),g14	# augment the shift counter
218	rotate	8,MSW,MSW	# move next byte into position for extraction
219#else
220	 lda	8(g14),g14	# augment the shift counter
221	shro	8,MSW,MSW	# move next byte into position for extraction
222#endif
223	 bne.t	Lcase_3_cloop_at_start	# branch if reached word boundary?
224
225	ld	(g3),MSW	# fetch msw of operand for double shift
226
227Lcase_4:
228
229#if __i960_BIG_ENDIAN__
230	cmpobne	0,g14,Lcase_3_wloop # branch if src is still unaligned.
231
232Lcase_3_wloop2:
233	cmpi	g2,4		# less than four bytes to move?
234	 lda	(LSW),g1	# extract 4 bytes of src
235	lda	4(g3),g3	# post-increment src word addr
236	 bl.f	Lcase_3_cloop.a	# branch if < four bytes left to move
237	scanbyte 0,g1		# check for null byte
238	 lda	(MSW),LSW	# move msw to lsw
239	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
240	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
241	 bo.f	Lcase_3_cloop.c	# branch if word contains null byte
242	st	g1,(g4)		# store 4 bytes to dest
243	addo	4,g4,g4		# post-increment dest ptr
244	 b	Lcase_3_wloop2
245
246#endif
247
248Lcase_3_wloop:
249	cmpi	g2,4		# less than four bytes to move?
250	eshro	g14,g6,g1	# extract 4 bytes of src
251	 lda	4(g3),g3	# post-increment src word addr
252	 bl.f	Lcase_3_cloop.a	# branch if < four bytes left to move
253	scanbyte 0,g1		# check for null byte
254	 lda	(MSW),LSW	# move msw to lsw
255	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
256	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
257	 bo.f	Lcase_3_cloop.c	# branch if word contains null byte
258	st	g1,(g4)		# store 4 bytes to dest
259	addo	4,g4,g4		# post-increment dest ptr
260	 b	Lcase_3_wloop
261
262Lcase_245:
263	cmpo	g0,g4		# check alignment of dest
264	 ld	(g3),MSW	# pre-fetch second half
265	and	3,g1,g1		# compute shift count
266	 lda	0xff,g5		# load mask for byte extraction
267#if __i960_BIG_ENDIAN__
268	subo	g1,4,g14	# adjust shift count for big endian.
269	shlo	3,g14,g14
270#else
271	shlo	3,g1,g14
272#endif
273	 be.t	Lcase_4		# branch if dest is word aligned
274	or	g4,g1,g1	# is src earlier in word, later, or sync w/ dst
275	cmpo    g0,g1		# < indicates first word of dest has more bytes
276				/* than first word of source. */
277	 lda	4(g4),g4	# move dest word addr to first word boundary
278	eshro	g14,g6,g5	# extract four bytes
279	 lda	(g0),g1
280	 bg.f	1f
281	mov	MSW,LSW
282	 lda	4(g3),g3	# move src word addr to second word boundary
2831:
284	mov	g5,MSW
285	 lda	0xff,g5
286	 b	Lcase_25
287
288/* end of strncpy */
289
290