1/*
2   Copyright (c) 2015, Synopsys, Inc. All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   1) Redistributions of source code must retain the above copyright notice,
8   this list of conditions and the following disclaimer.
9
10   2) Redistributions in binary form must reproduce the above copyright notice,
11   this list of conditions and the following disclaimer in the documentation
12   and/or other materials provided with the distribution.
13
14   3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15   may be used to endorse or promote products derived from this software
16   without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* This implementation is optimized for performance.  For code size a generic
32   implementation of this function from newlib/libc/string/memcpy.c will be
33   used.  */
34#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED)
35
36#include "asm.h"
37
38#if defined (__ARCHS__)
39
40#ifdef __LITTLE_ENDIAN__
41# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
42# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
43# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM
44# define MERGE_2(RX,RY,IMM)
45# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF
46# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM
47#else
48# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
49# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
50# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
51# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; <<
52# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM
53# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08
54#endif
55
56#ifdef __ARC_LL64__
57# define PREFETCH_READ(RX)	prefetch	[RX, 56]
58# define PREFETCH_WRITE(RX)	prefetchw	[RX, 64]
59# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8]
60# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8]
61# define ZOLSHFT		5
62# define ZOLAND			0x1F
63#else
64# define PREFETCH_READ(RX)	prefetch	[RX, 28]
65# define PREFETCH_WRITE(RX)	prefetchw	[RX, 32]
66# define LOADX(DST,RX)		ld.ab	DST, [RX, 4]
67# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4]
68# define ZOLSHFT		4
69# define ZOLAND			0xF
70#endif
71
72#ifdef __ARC_ALIGNED_ACCESS__
73ENTRY (memcpy)
74	prefetch  [r1]		; Prefetch the read location
75	prefetchw [r0]		; Prefetch the write location
76	mov.f	0, r2
77; if size is zero
78	jz.d	[blink]
79	mov	r3, r0		; don't clobber ret val
80
81; if size <= 8
82	cmp	r2, 8
83	bls.d	@.Lsmallchunk
84	mov.f	lp_count, r2
85
86	and.f	r4, r0, 0x03
87	rsub	lp_count, r4, 4
88	lpnz	@.Laligndestination
89	; LOOP BEGIN
90	ldb.ab	r5, [r1,1]
91	sub	r2, r2, 1
92	stb.ab	r5, [r3,1]
93.Laligndestination:
94
95; Check the alignment of the source
96	and.f	r4, r1, 0x03
97	bnz.d	@.Lsourceunaligned
98
99; CASE 0: Both source and destination are 32bit aligned
100; Convert len to Dwords, unfold x4
101	lsr.f	lp_count, r2, ZOLSHFT
102	lpnz	@.Lcopy32_64bytes
103	; LOOP START
104	LOADX (r6, r1)
105	PREFETCH_READ (r1)
106	PREFETCH_WRITE (r3)
107	LOADX (r8, r1)
108	LOADX (r10, r1)
109	LOADX (r4, r1)
110	STOREX (r6, r3)
111	STOREX (r8, r3)
112	STOREX (r10, r3)
113	STOREX (r4, r3)
114.Lcopy32_64bytes:
115
116	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes
117.Lsmallchunk:
118	lpnz	@.Lcopyremainingbytes
119	; LOOP START
120	ldb.ab	r5, [r1,1]
121	stb.ab	r5, [r3,1]
122.Lcopyremainingbytes:
123
124	j	[blink]
125; END CASE 0
126
127.Lsourceunaligned:
128	cmp	r4, 2
129	beq.d	@.LunalignedOffby2
130	sub	r2, r2, 1
131
132	bhi.d	@.LunalignedOffby3
133	ldb.ab	r5, [r1, 1]
134
135; CASE 1: The source is unaligned, off by 1
136	; Hence I need to read 1 byte for a 16bit alignment
137	; and 2bytes to reach 32bit alignment
138	ldh.ab	r6, [r1, 2]
139	sub	r2, r2, 2
140	; Convert to words, unfold x2
141	lsr.f	lp_count, r2, 3
142	MERGE_1 (r6, r6, 8)
143	MERGE_2 (r5, r5, 24)
144	or	r5, r5, r6
145
146	; Both src and dst are aligned
147	lpnz	@.Lcopy8bytes_1
148	; LOOP START
149	ld.ab	r6, [r1, 4]
150	prefetch [r1, 28]	;Prefetch the next read location
151	ld.ab	r8, [r1,4]
152	prefetchw [r3, 32]	;Prefetch the next write location
153
154	SHIFT_1	(r7, r6, 24)
155	or	r7, r7, r5
156	SHIFT_2	(r5, r6, 8)
157
158	SHIFT_1	(r9, r8, 24)
159	or	r9, r9, r5
160	SHIFT_2	(r5, r8, 8)
161
162	st.ab	r7, [r3, 4]
163	st.ab	r9, [r3, 4]
164.Lcopy8bytes_1:
165
166	; Write back the remaining 16bits
167	EXTRACT_1 (r6, r5, 16)
168	sth.ab	r6, [r3, 2]
169	; Write back the remaining 8bits
170	EXTRACT_2 (r5, r5, 16)
171	stb.ab	r5, [r3, 1]
172
173	and.f	lp_count, r2, 0x07 ;Last 8bytes
174	lpnz	@.Lcopybytewise_1
175	; LOOP START
176	ldb.ab	r6, [r1,1]
177	stb.ab	r6, [r3,1]
178.Lcopybytewise_1:
179	j	[blink]
180
181.LunalignedOffby2:
182; CASE 2: The source is unaligned, off by 2
183	ldh.ab	r5, [r1, 2]
184	sub	r2, r2, 1
185
186	; Both src and dst are aligned
187	; Convert to words, unfold x2
188	lsr.f	lp_count, r2, 3
189#ifdef __BIG_ENDIAN__
190	asl.nz	r5, r5, 16
191#endif
192	lpnz	@.Lcopy8bytes_2
193	; LOOP START
194	ld.ab	r6, [r1, 4]
195	prefetch [r1, 28]	;Prefetch the next read location
196	ld.ab	r8, [r1,4]
197	prefetchw [r3, 32]	;Prefetch the next write location
198
199	SHIFT_1	(r7, r6, 16)
200	or	r7, r7, r5
201	SHIFT_2	(r5, r6, 16)
202
203	SHIFT_1	(r9, r8, 16)
204	or	r9, r9, r5
205	SHIFT_2	(r5, r8, 16)
206
207	st.ab	r7, [r3, 4]
208	st.ab	r9, [r3, 4]
209.Lcopy8bytes_2:
210
211#ifdef __BIG_ENDIAN__
212	lsr.nz	r5, r5, 16
213#endif
214	sth.ab	r5, [r3, 2]
215
216	and.f	lp_count, r2, 0x07 ;Last 8bytes
217	lpnz	@.Lcopybytewise_2
218	; LOOP START
219	ldb.ab	r6, [r1,1]
220	stb.ab	r6, [r3,1]
221.Lcopybytewise_2:
222	j	[blink]
223
224.LunalignedOffby3:
225; CASE 3: The source is unaligned, off by 3
226; Hence, I need to read 1byte for achieve the 32bit alignment
227
228	; Both src and dst are aligned
229	; Convert to words, unfold x2
230	lsr.f	lp_count, r2, 3
231#ifdef __BIG_ENDIAN__
232	asl.ne	r5, r5, 24
233#endif
234	lpnz	@.Lcopy8bytes_3
235	; LOOP START
236	ld.ab	r6, [r1, 4]
237	prefetch [r1, 28]	;Prefetch the next read location
238	ld.ab	r8, [r1,4]
239	prefetchw [r3, 32]	;Prefetch the next write location
240
241	SHIFT_1	(r7, r6, 8)
242	or	r7, r7, r5
243	SHIFT_2	(r5, r6, 24)
244
245	SHIFT_1	(r9, r8, 8)
246	or	r9, r9, r5
247	SHIFT_2	(r5, r8, 24)
248
249	st.ab	r7, [r3, 4]
250	st.ab	r9, [r3, 4]
251.Lcopy8bytes_3:
252
253#ifdef __BIG_ENDIAN__
254	lsr.nz	r5, r5, 24
255#endif
256	stb.ab	r5, [r3, 1]
257
258	and.f	lp_count, r2, 0x07 ;Last 8bytes
259	lpnz	@.Lcopybytewise_3
260	; LOOP START
261	ldb.ab	r6, [r1,1]
262	stb.ab	r6, [r3,1]
263.Lcopybytewise_3:
264	j	[blink]
265
266ENDFUNC (memcpy)
267
268#else
269
270ENTRY(memcpy)
271	prefetch  [r1]		; Prefetch the read location
272	prefetchw [r0]		; Prefetch the write location
273	mov.f	0, r2
274;;; if size is zero
275	jz.d	[blink]
276	mov	r3, r0		; don't clobber ret val
277
278;;; if size <= 8
279	cmp	r2, 8
280	bls.d	@.Lsmallchunk
281	mov.f	lp_count, r2
282
283;;; Convert len to Dwords, unfold x4
284	lsr.f	lp_count, r2, ZOLSHFT
285	lpnz	@.Lcopyfast
286	;; LOOP START
287	LOADX (r6, r1)
288	PREFETCH_READ (r1)
289	PREFETCH_WRITE (r3)
290	LOADX (r8, r1)
291	LOADX (r10, r1)
292	LOADX (r4, r1)
293	STOREX (r6, r3)
294	STOREX (r8, r3)
295	STOREX (r10, r3)
296	STOREX (r4, r3)
297.Lcopyfast:
298
299#ifdef __ARC_LL64__
300	and     r2, r2, ZOLAND	;Remaining 31 bytes
301	lsr.f   lp_count, r2, 3	;Convert to 64-bit words.
302	lpnz	@.Lcopy64b
303	;; LOOP START
304	ldd.ab	r6,[r1,8]
305	std.ab	r6,[r3,8]
306.Lcopy64b:
307
308	and.f	lp_count, r2, 0x07 ; Last 7 bytes
309#else
310	and.f	lp_count, r2, ZOLAND
311#endif
312
313.Lsmallchunk:
314	lpnz	@.Lcopyremainingbytes
315	;; LOOP START
316	ldb.ab	r5, [r1,1]
317	stb.ab	r5, [r3,1]
318.Lcopyremainingbytes:
319
320	j	[blink]
321
322ENDFUNC(memcpy)
323#endif
324
325#endif /* __ARCHS__ */
326
327#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */
328