1/* ANSI C standard library function strncpy.
2
3   Copyright (c) 2001-2008 Tensilica Inc.
4
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
23
24#include <picolibc.h>
25
26#include "xtensa-asm.h"
27
28	.text
29.begin schedule
30	.align	4
31	.literal_position
32__strncpy_aux:
33
34.Lsrc1mod2: // src address is odd
35	l8ui	a8, a3, 0	// get byte 0
36	addi	a3, a3, 1	// advance src pointer
37	s8i	a8, a10, 0	// store byte 0
38	addi	a4, a4, -1	// decrement n
39	beqz    a4, .Lret       // if n is zero
40	addi	a10, a10, 1	// advance dst pointer
41	beqz	a8, .Lfill	// if byte 0 is zero
42	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned
43
44.Lsrc2mod4: // src address is 2 mod 4
45	l8ui	a8, a3, 0	// get byte 0
46	addi	a4, a4, -1	// decrement n
47	s8i	a8, a10, 0	// store byte 0
48	beqz    a4, .Lret       // if n is zero
49	addi	a10, a10, 1	// advance dst pointer
50	beqz	a8, .Lfill	// if byte 0 is zero
51	l8ui	a8, a3, 1	// get byte 0
52	addi	a3, a3, 2	// advance src pointer
53	s8i	a8, a10, 0	// store byte 0
54	addi	a4, a4, -1	// decrement n
55	beqz    a4, .Lret       // if n is zero
56	addi	a10, a10, 1	// advance dst pointer
57	bnez	a8, .Lsrcaligned
58	j	.Lfill
59
60.Lret:
61#if XTENSA_ESP32_PSRAM_CACHE_FIX
62	memw
63#endif
64	leaf_return
65
66
67	.align	4
68	.global	strncpy
69	.type	strncpy, @function
70strncpy:
71	leaf_entry sp, 16
72	/* a2 = dst, a3 = src */
73
74	mov	a10, a2		// leave dst in return value register
75	beqz    a4, .Lret       // if n is zero
76
77	movi	a11, MASK0
78	movi	a5, MASK1
79	movi	a6, MASK2
80	movi	a7, MASK3
81	bbsi.l	a3, 0, .Lsrc1mod2
82	bbsi.l	a3, 1, .Lsrc2mod4
83.Lsrcaligned:
84
85	/* Check if the destination is aligned.  */
86	movi	a8, 3
87	bnone	a10, a8, .Laligned
88
89	j	.Ldstunaligned
90
91
92/* Fill the dst with zeros -- n is at least 1.  */
93
94.Lfill:
95	movi	a9, 0
96	bbsi.l	a10, 0, .Lfill1mod2
97	bbsi.l	a10, 1, .Lfill2mod4
98.Lfillaligned:
99	blti	a4, 4, .Lfillcleanup
100
101	/* Loop filling complete words with zero.  */
102#if XCHAL_HAVE_LOOPS
103
104	srai	a8, a4, 2
105	loop	a8, 1f
106	s32i	a9, a10, 0
107	addi	a10, a10, 4
108
1091:	slli	a8, a8, 2
110	sub	a4, a4, a8
111
112#else /* !XCHAL_HAVE_LOOPS */
113
1141:	s32i	a9, a10, 0
115	addi	a10, a10, 4
116	addi	a4, a4, -4
117	bgei    a4, 4, 1b
118
119#endif /* !XCHAL_HAVE_LOOPS */
120
121	beqz	a4, 2f
122
123.Lfillcleanup:
124	/* Fill leftover (1 to 3) bytes with zero.  */
125	s8i	a9, a10, 0	// store byte 0
126	addi	a4, a4, -1	// decrement n
127	addi	a10, a10, 1
128	bnez    a4, .Lfillcleanup
129
1302:
131#if XTENSA_ESP32_PSRAM_CACHE_FIX
132	memw
133#endif
134	leaf_return
135
136.Lfill1mod2: // dst address is odd
137	s8i	a9, a10, 0	// store byte 0
138	addi	a4, a4, -1	// decrement n
139	beqz    a4, 2b		// if n is zero
140	addi    a10, a10, 1	// advance dst pointer
141	bbci.l	a10, 1, .Lfillaligned // if dst is now word-aligned
142
143.Lfill2mod4: // dst address is 2 mod 4
144	s8i	a9, a10, 0	// store byte 0
145	addi	a4, a4, -1	// decrement n
146	beqz    a4, 2b		// if n is zero
147	s8i	a9, a10, 1	// store byte 1
148	addi	a4, a4, -1	// decrement n
149	beqz    a4, 2b		// if n is zero
150	addi    a10, a10, 2	// advance dst pointer
151	j	.Lfillaligned
152
153
154/* dst is word-aligned; src is word-aligned; n is at least 1.  */
155
156	.align	4
157#if XCHAL_HAVE_LOOPS
158#if XCHAL_HAVE_DENSITY
159	/* (2 mod 4) alignment for loop instruction */
160#else
161	/* (1 mod 4) alignment for loop instruction */
162	.byte	0
163	.byte	0
164#endif
165#endif
166.Laligned:
167#if XCHAL_HAVE_LOOPS
168#if XCHAL_HAVE_DENSITY
169	_movi.n	a8, 0		// set up for the maximum loop count
170#else
171	_movi	a8, 0		// set up for the maximum loop count
172#endif
173	loop	a8, 1f		// loop forever (almost anyway)
174	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
175	l32i	a8, a3, 0	// get word from src
176	addi	a3, a3, 4	// advance src pointer
177	bnone	a8, a11, .Lz0	// if byte 0 is zero
178	bnone	a8, a5, .Lz1	// if byte 1 is zero
179	bnone	a8, a6, .Lz2	// if byte 2 is zero
180	s32i	a8, a10, 0	// store word to dst
181	addi	a4, a4, -4	// decrement n
182	addi	a10, a10, 4	// advance dst pointer
183	bnone	a8, a7, .Lfill	// if byte 3 is zero
1841:
185
186#else /* !XCHAL_HAVE_LOOPS */
187
1881:	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
189	l32i	a8, a3, 0	// get word from src
190	addi	a3, a3, 4	// advance src pointer
191	bnone	a8, a11, .Lz0	// if byte 0 is zero
192	bnone	a8, a5, .Lz1	// if byte 1 is zero
193	bnone	a8, a6, .Lz2	// if byte 2 is zero
194	s32i	a8, a10, 0	// store word to dst
195	addi	a4, a4, -4	// decrement n
196	addi	a10, a10, 4	// advance dst pointer
197	bany	a8, a7, 1b	// no zeroes
198#endif /* !XCHAL_HAVE_LOOPS */
199
200	j	.Lfill
201
202.Lz0:	/* Byte 0 is zero.  */
203#ifdef __XTENSA_EB__
204	movi	a8, 0
205#endif
206	s8i	a8, a10, 0
207	addi	a4, a4, -1	// decrement n
208	addi	a10, a10, 1	// advance dst pointer
209	j	.Lfill
210
211.Lz1:	/* Byte 1 is zero.  */
212#ifdef __XTENSA_EB__
213        extui   a8, a8, 16, 16
214#endif
215	s16i	a8, a10, 0
216	addi	a4, a4, -2	// decrement n
217	addi	a10, a10, 2	// advance dst pointer
218	j	.Lfill
219
220.Lz2:	/* Byte 2 is zero.  */
221#ifdef __XTENSA_EB__
222        extui   a8, a8, 16, 16
223#endif
224	s16i	a8, a10, 0
225	movi	a8, 0
226	s8i	a8, a10, 2
227	addi	a4, a4, -3	// decrement n
228	addi	a10, a10, 3	// advance dst pointer
229	j	.Lfill
230
231	.align	4
232#if XCHAL_HAVE_LOOPS
233#if XCHAL_HAVE_DENSITY
234	/* (2 mod 4) alignment for loop instruction */
235#else
236	/* (1 mod 4) alignment for loop instruction */
237	.byte	0
238	.byte	0
239#endif
240#endif
241.Ldstunaligned:
242
243#if XCHAL_HAVE_LOOPS
244#if XCHAL_HAVE_DENSITY
245	_movi.n	a8, 0		// set up for the maximum loop count
246#else
247	_movi	a8, 0		// set up for the maximum loop count
248#endif
249	loop	a8, 2f		// loop forever (almost anyway)
250#endif
2511:	l8ui	a8, a3, 0
252	addi	a3, a3, 1
253#if XTENSA_ESP32_PSRAM_CACHE_FIX
254	nop
255	nop
256	nop
257#endif
258	s8i	a8, a10, 0
259	addi	a4, a4, -1
260	beqz	a4, 3f
261	addi	a10, a10, 1
262#if XCHAL_HAVE_LOOPS
263	beqz	a8, 2f
264#else
265	bnez	a8, 1b
266#endif
2672:	j	.Lfill
268
2693:
270#if XTENSA_ESP32_PSRAM_CACHE_FIX
271	memw
272#endif
273	leaf_return
274.end schedule
275
276	.size	strncpy, . - strncpy
277