1/* ANSI C standard library function strcpy.
2
3   Copyright (c) 2001-2008 Tensilica Inc.
4
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
23
24#include "xtensa-asm.h"
25
26	.text
27	.begin schedule
28	.align	4
29	.literal_position
30	.global	strcpy
31	.type	strcpy, @function
32strcpy:
33	leaf_entry sp, 16
34	/* a2 = dst, a3 = src */
35
36	mov	a10, a2		// leave dst in return value register
37	movi	a4, MASK0
38	movi	a5, MASK1
39	movi	a6, MASK2
40	movi	a7, MASK3
41	bbsi.l	a3, 0, .Lsrc1mod2
42	bbsi.l	a3, 1, .Lsrc2mod4
43.Lsrcaligned:
44
45	/* Check if the destination is aligned.  */
46	movi	a8, 3
47	bnone	a10, a8, .Laligned
48
49	j	.Ldstunaligned
50
51.Lsrc1mod2: // src address is odd
52	l8ui	a8, a3, 0	// get byte 0
53	addi	a3, a3, 1	// advance src pointer
54	s8i	a8, a10, 0	// store byte 0
55#if XTENSA_ESP32_PSRAM_CACHE_FIX
56	memw
57#endif
58	beqz	a8, 1f		// if byte 0 is zero
59	addi	a10, a10, 1	// advance dst pointer
60	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
61
62.Lsrc2mod4: // src address is 2 mod 4
63	l8ui	a8, a3, 0	// get byte 0
64	/* 1-cycle interlock */
65	s8i	a8, a10, 0	// store byte 0
66#if XTENSA_ESP32_PSRAM_CACHE_FIX
67	memw
68#endif
69	beqz	a8, 1f		// if byte 0 is zero
70	l8ui	a8, a3, 1	// get byte 0
71	addi	a3, a3, 2	// advance src pointer
72	s8i	a8, a10, 1	// store byte 0
73	addi	a10, a10, 2	// advance dst pointer
74#if XTENSA_ESP32_PSRAM_CACHE_FIX
75	memw
76#endif
77	bnez	a8, .Lsrcaligned
781:	leaf_return
79
80
81/* dst is word-aligned; src is word-aligned.  */
82
83	.align	4
84#if XCHAL_HAVE_LOOPS
85#if XCHAL_HAVE_DENSITY
86	/* (2 mod 4) alignment for loop instruction */
87#else
88	/* (1 mod 4) alignment for loop instruction */
89	.byte	0
90	.byte	0
91#endif
92.Laligned:
93#if XCHAL_HAVE_DENSITY
94	_movi.n	a8, 0		// set up for the maximum loop count
95#else
96	_movi	a8, 0		// set up for the maximum loop count
97#endif
98	loop	a8, .Lz3	// loop forever (almost anyway)
99	l32i	a8, a3, 0	// get word from src
100	addi	a3, a3, 4	// advance src pointer
101	bnone	a8, a4, .Lz0	// if byte 0 is zero
102	bnone	a8, a5, .Lz1	// if byte 1 is zero
103	bnone	a8, a6, .Lz2	// if byte 2 is zero
104	s32i	a8, a10, 0	// store word to dst
105#if XTENSA_ESP32_PSRAM_CACHE_FIX
106	l32i	a8, a10, 0
107	s32i	a8, a10, 0
108#endif
109	bnone	a8, a7, .Lz3	// if byte 3 is zero
110	addi	a10, a10, 4	// advance dst pointer
111
112#else /* !XCHAL_HAVE_LOOPS */
113
1141:	addi	a10, a10, 4	// advance dst pointer
115.Laligned:
116	l32i	a8, a3, 0	// get word from src
117	addi	a3, a3, 4	// advance src pointer
118	bnone	a8, a4, .Lz0	// if byte 0 is zero
119	bnone	a8, a5, .Lz1	// if byte 1 is zero
120	bnone	a8, a6, .Lz2	// if byte 2 is zero
121	s32i	a8, a10, 0	// store word to dst
122#if XTENSA_ESP32_PSRAM_CACHE_FIX
123	l32i	a8, a10, 0
124	s32i	a8, a10, 0
125#endif
126
127	bany	a8, a7, 1b	// if byte 3 is zero
128#endif /* !XCHAL_HAVE_LOOPS */
129
130.Lz3:	/* Byte 3 is zero.  */
131	leaf_return
132
133.Lz0:	/* Byte 0 is zero.  */
134#ifdef __XTENSA_EB__
135	movi	a8, 0
136#endif
137	s8i	a8, a10, 0
138#if XTENSA_ESP32_PSRAM_CACHE_FIX
139	memw
140#endif
141	leaf_return
142
143.Lz1:	/* Byte 1 is zero.  */
144#ifdef __XTENSA_EB__
145        extui   a8, a8, 16, 16
146#endif
147	s16i	a8, a10, 0
148#if XTENSA_ESP32_PSRAM_CACHE_FIX
149	memw
150#endif
151	leaf_return
152
153.Lz2:	/* Byte 2 is zero.  */
154#ifdef __XTENSA_EB__
155        extui   a8, a8, 16, 16
156#endif
157	s16i	a8, a10, 0
158	movi	a8, 0
159	s8i	a8, a10, 2
160#if XTENSA_ESP32_PSRAM_CACHE_FIX
161	memw
162#endif
163	leaf_return
164
165#if 1
166/* For now just use byte copy loop for the unaligned destination case.  */
167
168	.align	4
169#if XCHAL_HAVE_LOOPS
170#if XCHAL_HAVE_DENSITY
171	/* (2 mod 4) alignment for loop instruction */
172#else
173	/* (1 mod 4) alignment for loop instruction */
174	.byte	0
175	.byte	0
176#endif
177#endif
178.Ldstunaligned:
179
180#if XCHAL_HAVE_LOOPS
181#if XCHAL_HAVE_DENSITY
182	_movi.n	a8, 0		// set up for the maximum loop count
183#else
184	_movi	a8, 0		// set up for the maximum loop count
185#endif
186	loop	a8, 2f		// loop forever (almost anyway)
187#endif
1881:	l8ui	a8, a3, 0
189	addi	a3, a3, 1
190	s8i	a8, a10, 0
191	addi	a10, a10, 1
192#if XTENSA_ESP32_PSRAM_CACHE_FIX
193	memw
194#endif
195#if XCHAL_HAVE_LOOPS
196	beqz	a8, 2f
197#else
198	bnez	a8, 1b
199#endif
2002:	leaf_return
201
202#else /* 0 */
203
204/* This code is not functional yet.  */
205
206.Ldstunaligned:
207	l32i	a9, a2, 0	// load word from dst
208#ifdef __XTENSA_EB__
209	ssa8b	a9		// rotate by dst alignment so that
210	src	a9, a9, a9	// shift in loop will put back in place
211	ssa8l	a9		// shift left by byte*8
212#else
213	ssa8l	a9		// rotate by dst alignment so that
214	src	a9, a9, a9	// shift in loop will put back in place
215	ssa8b	a9		// shift left by 32-byte*8
216#endif
217
218/* dst is word-aligned; src is unaligned.  */
219
220.Ldstunalignedloop:
221	l32i	a8, a3, 0	// get word from src
222	/* 1-cycle interlock */
223	bnone	a8, a4, .Lu0	// if byte 0 is zero
224	bnone	a8, a5, .Lu1	// if byte 1 is zero
225	bnone	a8, a6, .Lu2	// if byte 2 is zero
226	src	a9, a8, a9	// combine last word and this word
227	s32i	a9, a10, 0	// store word to dst
228	bnone	a8, a7, .Lu3	// if byte 3 is nonzero, iterate
229	l32i	a9, a3, 4	// get word from src
230	addi	a3, a3, 8	// advance src pointer
231	bnone	a9, a4, .Lu4	// if byte 0 is zero
232	bnone	a9, a5, .Lu5	// if byte 1 is zero
233	bnone	a9, a6, .Lu6	// if byte 2 is zero
234	src	a8, a9, a8	// combine last word and this word
235	s32i	a8, a10, 4	// store word to dst
236	addi	a10, a10, 8	// advance dst pointer
237	bany	a8, a7, .Ldstunalignedloop // if byte 3 is nonzero, iterate
238
239	/* Byte 7 is zero.  */
240.Lu7:	leaf_return
241
242.Lu0:	/* Byte 0 is zero.  */
243#ifdef __XTENSA_EB__
244	movi	a8, 0
245#endif
246	s8i	a8, a10, 0
247#if XTENSA_ESP32_PSRAM_CACHE_FIX
248	memw
249#endif
250	leaf_return
251
252.Lu1:	/* Byte 1 is zero.  */
253#ifdef __XTENSA_EB__
254        extui   a8, a8, 16, 16
255#endif
256	s16i	a8, a10, 0
257#if XTENSA_ESP32_PSRAM_CACHE_FIX
258	memw
259#endif
260	leaf_return
261
262.Lu2:	/* Byte 2 is zero.  */
263	s16i	a8, a10, 0
264	movi	a8, 0
265	s8i	a8, a10, 2
266#if XTENSA_ESP32_PSRAM_CACHE_FIX
267	memw
268#endif
269	leaf_return
270
271#endif /* 0 */
272	.end schedule
273
274	.size	strcpy, . - strcpy
275