1/*******************************************************************************
2 *
3 * Copyright (c) 1993 Intel Corporation
4 *
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation.  Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation.  In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
15 *
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE.  Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
23 *
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND.  IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
28 *
29 ******************************************************************************/
30
31#include <picolibc.h>
32
33	.file "memcpy.s"
34#ifdef	__PIC
35	.pic
36#endif
37#ifdef	__PID
38	.pid
39#endif
40/*
41 * (c) copyright 1988,1993 Intel Corp., all rights reserved
42 */
43/*
44	procedure memmove  (optimized assembler version for the 80960K series)
45	procedure memcpy   (optimized assembler version for the 80960K series)
46
47	dest_addr = memmove (dest_addr, src_addr, len)
48	dest_addr = memcpy  (dest_addr, src_addr, len)
49
50	copy len bytes pointed to by src_addr to the space pointed to by
51	dest_addr.  Return the original dest_addr.
52
53	These routines will work even if the arrays overlap.  The standard
54	requires this of memmove, but memcpy is allowed to fail if overlap
55	is present.  Nevertheless, it is implemented the same as memmove
56	because the overhead is trifling.
57
58	Undefined behavior will occur if the end of the source array is in
59	the last two words of the program's allocated memory space.  This
60	is so because the routine fetches ahead.  Disallowing the fetch
61	ahead would impose a severe performance penalty.
62
63	Strategy:
64
65	Fetch the source array by words and store them by words to the
66	destination array, until there are fewer than three bytes left
67	to copy.  Then, using the last word of the source (the one that
68	contains the remaining 0, 1, 2, or 3 bytes to be copied), store
69	a byte at a time until Ldone.
70
71	Tactics:
72
73	1) Do NOT try to fetch and store the words in a word aligned manner
74	because, in my judgement, the performance degradation experienced due
75	to non-aligned accesses does NOT outweigh the time and complexity added
76	by the preamble and convoluted body that would be necessary to assure
77	alignment.  This is supported by the intuition that most source and
78	destination arrays (even more true of most big source arrays) will
79	be word aligned to begin with.
80
81	2) For non-overlapping arrays, rather than decrementing len to zero,
82	I calculate the address of the byte after the last byte of the
83	destination array, and quit when the destination byte pointer passes
84	that.
85
86	3) For overlapping arrays where the source starts at a lower address
87	than the destination the move is performed in reverse order.
88
89	4) Overlapping arrays where the source starts at a higher address
90	are treated like non-overlapping case.  Where the two arrays exactly
91	coincide, the routine is short-circuited;  no move is Ldone at all.
92	This costs only one cycle.
93*/
94
95	.globl _memcpy, _memmove
96	.globl __memcpy, __memmove
97	.leafproc _memmove, __memmove
98	.leafproc _memcpy, __memcpy
99	.align    2
100_memmove:
101_memcpy:
102#ifndef __PIC
103 	lda	Lrett,g14
104#else
105 	lda	Lrett-(.+8)(ip),g14
106#endif
107__memmove:
108__memcpy:
109	mov	g14, g13	# preserve return address
110	cmpibge	0,g2,Lexit	# exit if number of bytes to move is <= zero.
111	cmpo	g0,g1		# does start of dest overlap end of src?
112	addo	g2,g1,g3
113	be	Lexit		# no move necessary if src and dest are same
114	concmpo	g3,g0
115	addo	g2, g0, g6
116	bg	Lbackwards	# if overlap, then do move backwards
117
118	ld	(g1), g7	# fetch first word of source
119	mov	g0, g5
120	b	Lwloop_b
121
122Lwloop_a:
123	ld	(g1), g7	# fetch ahead next word of source
124	st	g4, (g5)	# store word to dest
125	addo	4, g5, g5	# post-increment dest pointer
126Lwloop_b:			# word copying loop
127	addo	4, g1, g1	# pre-increment src pointer
128	cmpo	g3, g1		# is len <= 3 ?
129	mov	g7, g4		# keep a copy of the current word
130	bge	Lwloop_a		# loop if more than 3 bytes to move
131	cmpobe	g6, g5, Lexit    # quit if no more bytes to move
132
133Lcloop_a:			# character copying loop (len < 3)
134	stob	g4, (g5)	# store a byte
135	shro	8, g4, g4	# position next byte for storing
136	addo	1, g5, g5
137	cmpobne	g6, g5, Lcloop_a	# quit if no more bytes to move
138
139Lexit:
140	mov	0, g14
141	bx	(g13)		# g0 = dest array address; g14 = 0
142Lrett:
143	ret
144
145Lwloop.a:
146	subo	4, g6, g6	# pre-decrement dest pointer
147	st	g7, (g6)	# store word to dest
148Lbackwards:			# word copying loop
149	subo	4, g3, g3	# pre-decrement src pointer
150	cmpo	g1, g3		# is len <= 3?
151	ld	(g3), g7	# fetch ahead next word of source
152	ble	Lwloop.a		# loop if more than 3 bytes to move
153	cmpobe	g6, g0, Lexit	# quit if no more bytes to move
154
155Lcloop.a:
156	subo	1, g6, g6
157	rotate	8, g7, g7	# position byte for storing
158	stob	g7, (g6)	# store byte
159	cmpobne	g6, g0, Lcloop.a	# quit if no more bytes to move
160	b	Lexit
161
162/* end of memmove */
163