1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31#include <picolibc.h>
32
33	.file "memcp_ca.s"
34#ifdef	__PIC
35	.pic
36#endif
37#ifdef	__PID
38	.pid
39#endif
40/*
41 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved
42 */
43
44/*
45	procedure memmove  (optimized assembler version for the CA)
46	procedure memcpy   (optimized assembler version for the CA)
47
48	dest_addr = memmove (dest_addr, src_addr, len)
49	dest_addr = memcpy  (dest_addr, src_addr, len)
50
51	copy len bytes pointed to by src_addr to the space pointed to by
52	dest_addr.  Return the original dest_addr.
53
54	Memcpy will fail if the source and destination string overlap
55	(in particular, if the end of the source is overlapped by the
56	beginning of the destination).  The behavior is undefined.
57	This is acceptable according to the draft C standard.
58	Memmove will not fail if overlap exists.
59
60	Undefined behavior will also occur if the end of the source string
61	(i.e. the terminating null byte) is in the last word of the program's
62	allocated memory space.  This is so because, in several cases, the
63	routine will fetch ahead one word.  Disallowing the fetch ahead would
64	impose a severe performance penalty.
65
66	This program handles five cases:
67
68	1) both arguments start on a word boundary
69	2) neither are word aligned, but they are offset by the same amount
70	3) source is word aligned, destination is not
71	4) destination is word aligned, source is not
72	5) neither is word aligned, and they are offset by differing amounts
73
74	At the time of this writing, only g0 thru g7 and g13 are available
75	for use in this leafproc;  other registers would have to be saved and
76	restored.  These nine registers, plus tricky use of g14 are sufficient
77	to implement the routine.  The registers are used as follows:
78
79	g0  dest ptr;  not modified, so that it may be returned
80	g1  src ptr;  shift count
81	g2  len
82	g3  src ptr (word aligned)
83	g4  dest ptr (word aligned)
84	g5  -4 for Lbackwards move
85	Little endian
86		g6  lsw of double word for extraction of 4 bytes
87		g7  msw of double word for extraction of 4 bytes
88	Big endian
89		g6  msw of double word for extraction of 4 bytes
90		g7  lsw of double word for extraction of 4 bytes
91	g13 return address
92	g14 byte extracted.
93*/
94
95#if __i960_BIG_ENDIAN__
96#define MSW g6
97#define LSW g7
98#else
99#define LSW g6
100#define MSW g7
101#endif
102
103	.globl	_memmove, _memcpy
104	.globl	__memmove, __memcpy
105	.leafproc	_memmove, __memmove
106	.leafproc	_memcpy,  __memcpy
107	.align	2
108_memcpy:
109_memmove:
110#ifndef __PIC
111	lda 	Lrett,g14
112#else
113	lda 	Lrett-(.+8)(ip),g14
114#endif
115__memcpy:
116__memmove:
117	cmpibge.f 0,g2,Lquick_exit # Lexit if number of bytes to move is <= zero.
118	cmpo	g0,g1		# if dest starts earlier than src ...
119	 lda	(g14),g13	# preserve return address
120	addo	g2,g1,g5	# compute addr of byte after last byte of src
121	 be.f	Lexit_code	# no move necessary if src and dest are same
122	concmpo	g5,g0		# ... or if dest starts after end of src ...
123	notand	g1,3,g3		# extract word addr of start of src
124	 bg.f	Lbackwards	# ... then drop thru, else do move backwards
125	cmpo	g3,g1		# check alignment of src
126	 ld	(g3),LSW	# fetch word containing at least first byte
127	notand	g0,3,g4		# extract word addr of start of dest
128	 lda	4(g3),g3	# advance src word addr
129	 bne.f	Lcase_245	# branch if src is NOT word aligned
130
131Lcase_13:
132	cmpo	g0,g4		# check alignment of dest
133	subo	4,g4,g4		# store is pre-incrementing;  back up dest addr
134	 be.t	Lcase_1		# branch if dest word aligned
135
136Lcase_3:				# src is word aligned; dest is not
137	addo	8,g4,g4		# move dest word ptr to first word boundary
138	 lda	(g0),g1		# copy dest byte ptr
139	mov	LSW,MSW		# make copy of first word of src
140	 lda	32,g14		# initialize shift count to zero (mod 32)
141
142Lcase_25:
143Lcase_3_cloop_at_start:		# character copying loop for start of dest str
144	cmpdeci	0,g2,g2		# is max_bytes exhausted?
145	be.f	Lexit_code	# Lexit if max_bytes is exhausted
146#if __i960_BIG_ENDIAN__
147	rotate	8,MSW,MSW	# move next byte into position for extraction
148	subo	8,g14,g14	# augment the shift counter
149	 stob	MSW,(g1)	# store the byte in dest
150#else
151	addo	8,g14,g14	# augment the shift counter
152	 stob	MSW,(g1)	# store the byte in dest
153	shro	8,MSW,MSW	# move next byte into position for extraction
154#endif
155	 lda	1(g1),g1	# post-increment dest ptr
156	cmpobne.t g1,g4,Lcase_3_cloop_at_start # branch if reached word boundary
157
158	ld	(g3),MSW	# fetch msw of operand for double shift
159
160Lcase_4:
161Lcase_3_wloop:
162	cmpi	g2,4		# less than four bytes to move?
163	 lda	4(g3),g3	# post-increment src word addr
164	eshro	g14,g6,g1	# extract 4 bytes of src
165	 bl.f	Lcase_3_cloop	# branch if < four bytes left to move
166	mov	MSW,LSW		# move msw to lsw
167	 ld	(g3),MSW	# pre-fetch msw of operand for double shift
168	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
169	 st	g1,(g4)		# store 4 bytes to dest
170	addo	4,g4,g4		# post-increment dest ptr
171	 b	Lcase_3_wloop
172
173Lcase_1_wloop:			# word copying loop
174	subi	4,g2,g2		# decrease max_byte count by the 4 bytes moved
175	 ld	(g3),LSW	# pre-fetch next word of src
176	addo	4,g3,g3		# post-increment src addr
177	 st	g1,(g4)		# store word in dest string
178Lcase_1:				# src and dest are word aligned
179	cmpi	g2,4		# check for fewer than four bytes to move
180	addo	4,g4,g4		# pre-increment dest addr
181	 lda	(LSW),g1	# keep a copy of the src word
182	 bge.t	Lcase_1_wloop	# branch if at least four bytes to copy
183Lcase_3_cloop:
184	cmpibe.f 0,g2,Lexit_code	# Lexit if max_bytes is exhausted
185
186Lcase_1_cloop:
187#if __i960_BIG_ENDIAN__
188	rotate	8,g1,g1		# move next byte into position for extraction
189#endif
190	subi	1,g2,g2
191	 stob	g1,(g4)		# store the byte in dest
192	cmpi	0,g2
193	 lda	1(g4),g4	# post-increment dest byte addr
194#if ! __i960_BIG_ENDIAN__
195	shro	8,g1,g1		# move next byte into position for extraction
196#endif
197	 bne.t	Lcase_1_cloop	# Lexit if max_bytes is exhausted
198
199Lexit_code:
200	mov	0,g14		# conform to register conventions
201	bx	(g13)		# g0 = addr of dest;  g14 = 0
202Lrett:
203	ret
204
205
206Lcase_245:
207	cmpo	g0,g4		# check alignment of dest
208	 ld	(g3),MSW	# pre-fetch second half
209	and	3,g1,g1		# compute shift count
210	shlo	3,g1,g14
211#if __i960_BIG_ENDIAN__
212	subo	g14,0,g14	# adjust shift count for big endian
213#endif
214	 be.t	Lcase_4		# branch if dest is word aligned
215	or	g4,g1,g1	# is src earlier in word, later, or sync w/ dst
216	cmpo    g0,g1		# < indicates first word of dest has more bytes
217	 lda	4(g4),g4	# move dest word addr to first word boundary
218	eshro	g14,g6,g5	# extract four bytes
219	 lda	(g0),g1
220#if __i960_BIG_ENDIAN__
221	 bge.f	1f
222#else
223	 bg.f	1f
224#endif
225	mov	MSW,LSW
226	 lda	4(g3),g3	# move src word addr to second word boundary
2271:
228	mov	g5,MSW
229	 b	Lcase_25
230
231
232Lbackwards:
233	notand	g5,3,MSW	# extract word addr of byte after end of src
234	cmpo	MSW,g5		# check alignment of end of src
235	subo	4,MSW,g3	# retreat src word addr
236	addo	g2,g0,g1	# compute addr of byte after end of dest
237	notand	g1,3,g4		# extract word addr of start of dest
238	 bne.f	Lcase.245	# branch if src is NOT word aligned
239
240Lcase.13:
241	cmpo	g1,g4		# check alignment of dest
242	 ld	(g3),MSW	# fetch last word of src
243	subo	4,g3,g3		# retreat src word addr
244	 be.t	Lcase.1		# branch if dest word aligned
245
246Lcase.3:			# src is word aligned; dest is not
247	mov	MSW,LSW		# make copy of first word of src
248	 lda	32,g14		# initialize shift count to zero (mod 32)
249
250Lcase.25:
251Lcase.3_cloop_at_start:		# character copying loop for start of dest str
252	cmpdeci	0,g2,g2		# is max.bytes exhausted?
253	 be.f	Lexit_code	# Lexit if max_bytes is exhausted
254#if ! __i960_BIG_ENDIAN__
255	rotate	8,LSW,LSW	# move next byte into position for storing
256#endif
257	 lda	-1(g1),g1	# pre-decrement dest ptr
258	cmpo	g1,g4		# have we reached word boundary in dest yet?
259	 stob	LSW,(g1)	# store the byte in dest
260#if __i960_BIG_ENDIAN__
261	shro	8,LSW,LSW	# move next byte into position for storing
262	addo	8,g14,g14	# augment the shift counter
263#else
264	subo	8,g14,g14	# augment the shift counter
265#endif
266	 bne.t	Lcase.3_cloop_at_start	# branch if reached word boundary?
267
268	ld	(g3),LSW	# fetch lsw of operand for double shift
269
270#if __i960_BIG_ENDIAN__
271	cmpobne	0,g14,Lcase.3_wloop
272Lcase.3_wloop2:
273	cmpi	g2,4		# less than four bytes to move?
274	 lda	-4(g3),g3	# post-decrement src word addr
275	mov	MSW,g1 		# extract 4 bytes of src
276	 lda	(LSW),MSW	# move lsw to msw
277	subo	4,g4,g4		# pre-decrement dest ptr
278	 bl.f	Lcase.3_cloop	# branch if < four bytes left to move
279	 ld	(g3),LSW	# pre-fetch lsw of operand for double shift
280	subi	4,g2,g2		# decrease max.byte count by the 4 bytes moved
281	 st	g1,(g4)		# store 4 bytes to dest
282	 b	Lcase.3_wloop2
283#endif
284
285Lcase.4:
286Lcase.3_wloop:
287	cmpi	g2,4		# less than four bytes to move?
288	 lda	-4(g3),g3	# post-decrement src word addr
289	eshro	g14,g6,g1	# extract 4 bytes of src
290	 lda	(LSW),MSW	# move lsw to msw
291	subo	4,g4,g4		# pre-decrement dest ptr
292	 bl.f	Lcase.3_cloop	# branch if < four bytes left to move
293	ld	(g3),LSW	# pre-fetch lsw of operand for double shift
294	subi	4,g2,g2		# decrease max.byte count by the 4 bytes moved
295	 st	g1,(g4)		# store 4 bytes to dest
296	 b	Lcase.3_wloop
297
298Lcase.1_wloop:			# word copying loop
299	subi	4,g2,g2		# decrease max.byte count by the 4 bytes moved
300	 ld	(g3),MSW	# pre-fetch next word of src
301	subo	4,g3,g3		# post-decrement src addr
302	 st	g1,(g4)		# store word in dest string
303Lcase.1:				# src and dest are word aligned
304	cmpi	g2,4		# check for fewer than four bytes to move
305	subo	4,g4,g4		# pre-decrement dest addr
306	 lda	(MSW),g1	# keep a copy of the src word
307	 bge.t	Lcase.1_wloop	# branch if at least four bytes to copy
308Lcase.3_cloop:
309	cmpibe.f 0,g2,Lexit_code	# Lexit if max_bytes is exhausted
310#if ! __i960_BIG_ENDIAN__
311	rotate	8,g1,g1		# move next byte into position for storing
312#endif
313	 lda	4(g4),g4	# pre-decremented dest addr 4 too much
314
315Lcase.1_cloop:
316	subi	1,g4,g4		# pre-decrement dest byte addr
317	cmpi	g4,g0		# has dest ptr reached beginning of dest?
318	 stob	g1,(g4)		# store the byte in dest
319#if __i960_BIG_ENDIAN__
320	shro	8,g1,g1		# move next byte into position for storing
321#else
322	rotate	8,g1,g1		# move next byte into position for storing
323#endif
324	 bne.t	Lcase.1_cloop	# Lexit if move is completed
325	b	Lexit_code
326
327Lcase.245:
328	cmpo	g1,g4		# check alignment of dest
329	 ld	(MSW),MSW	# pre-fetch word with at least last byte
330	and	3,g5,g5		# compute shift count
331	 ld	(g3),LSW	# pre-fetch second to last word
332	shlo	3,g5,g14
333#if __i960_BIG_ENDIAN__
334	subo	g14,0,g14	# adjust shift count for big endian
335#endif
336	 be.t	Lcase.4		# branch if dest is word aligned
337	or	g4,g5,g5	# is src earlier in word, later, or sync w/ dst
338	cmpo    g1,g5		# < indicates last word of dest has less bytes
339	eshro	g14,g6,g5	# extract four bytes
340	 bl.t	1f
341	mov	LSW,MSW
342#if ! __i960_BIG_ENDIAN__
343	 be.t	1f
344#endif
345	subo	4,g3,g3		# move src word addr to second word boundary
3461:
347	mov	g5,LSW
348	 b	Lcase.25
349
350
351Lquick_exit:
352	mov	g14,g13
353	 b	Lexit_code
354
355/* end of memmove */
356