1/* ANSI C standard library function strcpy.
2
3   Copyright (c) 2001-2008 Tensilica Inc.
4
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
23
24#include <picolibc.h>
25
26#include "xtensa-asm.h"
27
28	.text
29	.begin schedule
30	.align	4
31	.literal_position
32	.global	strcpy
33	.type	strcpy, @function
34strcpy:
35	leaf_entry sp, 16
36	/* a2 = dst, a3 = src */
37
38	mov	a10, a2		// leave dst in return value register
39	movi	a4, MASK0
40	movi	a5, MASK1
41	movi	a6, MASK2
42	movi	a7, MASK3
43	bbsi.l	a3, 0, .Lsrc1mod2
44	bbsi.l	a3, 1, .Lsrc2mod4
45.Lsrcaligned:
46
47	/* Check if the destination is aligned.  */
48	movi	a8, 3
49	bnone	a10, a8, .Laligned
50
51	j	.Ldstunaligned
52
53.Lsrc1mod2: // src address is odd
54	l8ui	a8, a3, 0	// get byte 0
55	addi	a3, a3, 1	// advance src pointer
56	s8i	a8, a10, 0	// store byte 0
57#if XTENSA_ESP32_PSRAM_CACHE_FIX
58	memw
59#endif
60	beqz	a8, 1f		// if byte 0 is zero
61	addi	a10, a10, 1	// advance dst pointer
62	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
63
64.Lsrc2mod4: // src address is 2 mod 4
65	l8ui	a8, a3, 0	// get byte 0
66	/* 1-cycle interlock */
67	s8i	a8, a10, 0	// store byte 0
68#if XTENSA_ESP32_PSRAM_CACHE_FIX
69	memw
70#endif
71	beqz	a8, 1f		// if byte 0 is zero
72	l8ui	a8, a3, 1	// get byte 0
73	addi	a3, a3, 2	// advance src pointer
74	s8i	a8, a10, 1	// store byte 0
75	addi	a10, a10, 2	// advance dst pointer
76#if XTENSA_ESP32_PSRAM_CACHE_FIX
77	memw
78#endif
79	bnez	a8, .Lsrcaligned
801:	leaf_return
81
82
83/* dst is word-aligned; src is word-aligned.  */
84
85	.align	4
86#if XCHAL_HAVE_LOOPS
87#if XCHAL_HAVE_DENSITY
88	/* (2 mod 4) alignment for loop instruction */
89#else
90	/* (1 mod 4) alignment for loop instruction */
91	.byte	0
92	.byte	0
93#endif
94.Laligned:
95#if XCHAL_HAVE_DENSITY
96	_movi.n	a8, 0		// set up for the maximum loop count
97#else
98	_movi	a8, 0		// set up for the maximum loop count
99#endif
100	loop	a8, .Lz3	// loop forever (almost anyway)
101	l32i	a8, a3, 0	// get word from src
102	addi	a3, a3, 4	// advance src pointer
103	bnone	a8, a4, .Lz0	// if byte 0 is zero
104	bnone	a8, a5, .Lz1	// if byte 1 is zero
105	bnone	a8, a6, .Lz2	// if byte 2 is zero
106	s32i	a8, a10, 0	// store word to dst
107#if XTENSA_ESP32_PSRAM_CACHE_FIX
108	l32i	a8, a10, 0
109	s32i	a8, a10, 0
110#endif
111	bnone	a8, a7, .Lz3	// if byte 3 is zero
112	addi	a10, a10, 4	// advance dst pointer
113
114#else /* !XCHAL_HAVE_LOOPS */
115
1161:	addi	a10, a10, 4	// advance dst pointer
117.Laligned:
118	l32i	a8, a3, 0	// get word from src
119	addi	a3, a3, 4	// advance src pointer
120	bnone	a8, a4, .Lz0	// if byte 0 is zero
121	bnone	a8, a5, .Lz1	// if byte 1 is zero
122	bnone	a8, a6, .Lz2	// if byte 2 is zero
123	s32i	a8, a10, 0	// store word to dst
124#if XTENSA_ESP32_PSRAM_CACHE_FIX
125	l32i	a8, a10, 0
126	s32i	a8, a10, 0
127#endif
128
129	bany	a8, a7, 1b	// if byte 3 is zero
130#endif /* !XCHAL_HAVE_LOOPS */
131
132.Lz3:	/* Byte 3 is zero.  */
133	leaf_return
134
135.Lz0:	/* Byte 0 is zero.  */
136#ifdef __XTENSA_EB__
137	movi	a8, 0
138#endif
139	s8i	a8, a10, 0
140#if XTENSA_ESP32_PSRAM_CACHE_FIX
141	memw
142#endif
143	leaf_return
144
145.Lz1:	/* Byte 1 is zero.  */
146#ifdef __XTENSA_EB__
147        extui   a8, a8, 16, 16
148#endif
149	s16i	a8, a10, 0
150#if XTENSA_ESP32_PSRAM_CACHE_FIX
151	memw
152#endif
153	leaf_return
154
155.Lz2:	/* Byte 2 is zero.  */
156#ifdef __XTENSA_EB__
157        extui   a8, a8, 16, 16
158#endif
159	s16i	a8, a10, 0
160	movi	a8, 0
161	s8i	a8, a10, 2
162#if XTENSA_ESP32_PSRAM_CACHE_FIX
163	memw
164#endif
165	leaf_return
166
167#if 1
168/* For now just use byte copy loop for the unaligned destination case.  */
169
170	.align	4
171#if XCHAL_HAVE_LOOPS
172#if XCHAL_HAVE_DENSITY
173	/* (2 mod 4) alignment for loop instruction */
174#else
175	/* (1 mod 4) alignment for loop instruction */
176	.byte	0
177	.byte	0
178#endif
179#endif
180.Ldstunaligned:
181
182#if XCHAL_HAVE_LOOPS
183#if XCHAL_HAVE_DENSITY
184	_movi.n	a8, 0		// set up for the maximum loop count
185#else
186	_movi	a8, 0		// set up for the maximum loop count
187#endif
188	loop	a8, 2f		// loop forever (almost anyway)
189#endif
1901:	l8ui	a8, a3, 0
191	addi	a3, a3, 1
192	s8i	a8, a10, 0
193	addi	a10, a10, 1
194#if XTENSA_ESP32_PSRAM_CACHE_FIX
195	memw
196#endif
197#if XCHAL_HAVE_LOOPS
198	beqz	a8, 2f
199#else
200	bnez	a8, 1b
201#endif
2022:	leaf_return
203
204#else /* 0 */
205
206/* This code is not functional yet.  */
207
208.Ldstunaligned:
209	l32i	a9, a2, 0	// load word from dst
210#ifdef __XTENSA_EB__
211	ssa8b	a9		// rotate by dst alignment so that
212	src	a9, a9, a9	// shift in loop will put back in place
213	ssa8l	a9		// shift left by byte*8
214#else
215	ssa8l	a9		// rotate by dst alignment so that
216	src	a9, a9, a9	// shift in loop will put back in place
217	ssa8b	a9		// shift left by 32-byte*8
218#endif
219
220/* dst is word-aligned; src is unaligned.  */
221
222.Ldstunalignedloop:
223	l32i	a8, a3, 0	// get word from src
224	/* 1-cycle interlock */
225	bnone	a8, a4, .Lu0	// if byte 0 is zero
226	bnone	a8, a5, .Lu1	// if byte 1 is zero
227	bnone	a8, a6, .Lu2	// if byte 2 is zero
228	src	a9, a8, a9	// combine last word and this word
229	s32i	a9, a10, 0	// store word to dst
230	bnone	a8, a7, .Lu3	// if byte 3 is nonzero, iterate
231	l32i	a9, a3, 4	// get word from src
232	addi	a3, a3, 8	// advance src pointer
233	bnone	a9, a4, .Lu4	// if byte 0 is zero
234	bnone	a9, a5, .Lu5	// if byte 1 is zero
235	bnone	a9, a6, .Lu6	// if byte 2 is zero
236	src	a8, a9, a8	// combine last word and this word
237	s32i	a8, a10, 4	// store word to dst
238	addi	a10, a10, 8	// advance dst pointer
239	bany	a8, a7, .Ldstunalignedloop // if byte 3 is nonzero, iterate
240
241	/* Byte 7 is zero.  */
242.Lu7:	leaf_return
243
244.Lu0:	/* Byte 0 is zero.  */
245#ifdef __XTENSA_EB__
246	movi	a8, 0
247#endif
248	s8i	a8, a10, 0
249#if XTENSA_ESP32_PSRAM_CACHE_FIX
250	memw
251#endif
252	leaf_return
253
254.Lu1:	/* Byte 1 is zero.  */
255#ifdef __XTENSA_EB__
256        extui   a8, a8, 16, 16
257#endif
258	s16i	a8, a10, 0
259#if XTENSA_ESP32_PSRAM_CACHE_FIX
260	memw
261#endif
262	leaf_return
263
264.Lu2:	/* Byte 2 is zero.  */
265	s16i	a8, a10, 0
266	movi	a8, 0
267	s8i	a8, a10, 2
268#if XTENSA_ESP32_PSRAM_CACHE_FIX
269	memw
270#endif
271	leaf_return
272
273#endif /* 0 */
274	.end schedule
275
276	.size	strcpy, . - strcpy
277