1/*
2   Copyright (c) 2015-2024, Synopsys, Inc. All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6
7   1) Redistributions of source code must retain the above copyright notice,
8   this list of conditions and the following disclaimer.
9
10   2) Redistributions in binary form must reproduce the above copyright notice,
11   this list of conditions and the following disclaimer in the documentation
12   and/or other materials provided with the distribution.
13
14   3) Neither the name of the Synopsys, Inc., nor the names of its contributors
15   may be used to endorse or promote products derived from this software
16   without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/* This implementation is optimized for performance.  For code size a generic
32   implementation of this function from newlib/libc/string/memset.c will be
33   used.  */
34#include <picolibc.h>
35
36#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED) \
37    && !defined (__ARC_RF16__)
38
39#include "asm.h"
40
41#ifdef __ARCHS__
42
43#define USE_PREFETCH
44
45#ifdef USE_PREFETCH
46#define PREWRITE(A,B)	prefetchw [(A),(B)]
47#else
48#define PREWRITE(A,B)	prealloc [(A),(B)]
49#endif
50
51ENTRY (memset)
52	prefetchw [r0]		; Prefetch the write location
53	mov.f	0, r2
54; if size is zero
55	jz.d	[blink]
56	mov	r3, r0		; don't clobber ret val
57
58; if length < 8
59	brls.d.nt	r2, 8, .Lsmallchunk
60	mov.f	lp_count,r2
61
62	and.f	r4, r0, 0x03
63	rsub	lp_count, r4, 4
64	lpnz	@.Laligndestination
65	; LOOP BEGIN
66	stb.ab	r1, [r3,1]
67	sub	r2, r2, 1
68.Laligndestination:
69
70; Destination is aligned
71	and	r1, r1, 0xFF
72	asl	r4, r1, 8
73	or	r4, r4, r1
74	asl	r5, r4, 16
75	or	r5, r5, r4
76	mov	r4, r5
77
78	sub3	lp_count, r2, 8
79	cmp     r2, 64
80	bmsk.hi	r2, r2, 5
81	mov.ls	lp_count, 0
82	add3.hi	r2, r2, 8
83
84; Convert len to Dwords, unfold x8
85	lsr.f	lp_count, lp_count, 6
86	lpnz	@.Lset64bytes
87	; LOOP START
88	PREWRITE (r3, 64)	;Prefetch the next write location
89#ifdef __ARC_LL64__
90	std.ab	r4, [r3, 8]
91	std.ab	r4, [r3, 8]
92	std.ab	r4, [r3, 8]
93	std.ab	r4, [r3, 8]
94	std.ab	r4, [r3, 8]
95	std.ab	r4, [r3, 8]
96	std.ab	r4, [r3, 8]
97	std.ab	r4, [r3, 8]
98#else
99	st.ab	r4, [r3, 4]
100	st.ab	r4, [r3, 4]
101	st.ab	r4, [r3, 4]
102	st.ab	r4, [r3, 4]
103	st.ab	r4, [r3, 4]
104	st.ab	r4, [r3, 4]
105	st.ab	r4, [r3, 4]
106	st.ab	r4, [r3, 4]
107	st.ab	r4, [r3, 4]
108	st.ab	r4, [r3, 4]
109	st.ab	r4, [r3, 4]
110	st.ab	r4, [r3, 4]
111	st.ab	r4, [r3, 4]
112	st.ab	r4, [r3, 4]
113	st.ab	r4, [r3, 4]
114	st.ab	r4, [r3, 4]
115#endif
116.Lset64bytes:
117
118	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
119	lpnz	.Lset32bytes
120	; LOOP START
121	prefetchw [r3, 32]	;Prefetch the next write location
122#ifdef __ARC_LL64__
123	std.ab	r4, [r3, 8]
124	std.ab	r4, [r3, 8]
125	std.ab	r4, [r3, 8]
126	std.ab	r4, [r3, 8]
127#else
128	st.ab	r4, [r3, 4]
129	st.ab	r4, [r3, 4]
130	st.ab	r4, [r3, 4]
131	st.ab	r4, [r3, 4]
132	st.ab	r4, [r3, 4]
133	st.ab	r4, [r3, 4]
134	st.ab	r4, [r3, 4]
135	st.ab	r4, [r3, 4]
136#endif
137.Lset32bytes:
138
139	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
140.Lsmallchunk:
141	lpnz	.Lcopy3bytes
142	; LOOP START
143	stb.ab	r1, [r3, 1]
144.Lcopy3bytes:
145
146	j	[blink]
147
148ENDFUNC (memset)
149#endif /* __ARCHS__ */
150
151#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */
152