1#include <picolibc.h>
2
3!
4! Fast SH memset
5!
6! by Toshiyasu Morita (tm@netcom.com)
7!
8! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
9/* Copyright 2002 SuperH Ltd. */
10!
11
12#include "asm.h"
13
14ENTRY(memset)
15#if __SHMEDIA__
16	pta/l multiquad, tr0
17	ptabs r18, tr2
18
19	andi r2, -8, r25
20	add r2, r4, r5
21	addi r5, -1, r20    // calculate end address.
22	andi r20, -8, r20
23	cmveq r4, r25, r20
24	bne/u r25, r20, tr0 // multiquad
25
26!	This sequence could clobber volatile objects that are in the same
27!	quadword as a very short char array.
28!	ldlo.q r2, 0, r7
29!	shlli r4, 2, r4
30!	movi -1, r8
31!	SHHI r8, r4, r8
32!	SHHI r8, r4, r8
33!	mcmv r7, r8, r3
34!	stlo.q r2, 0, r3
35
36	pta/l setlongs, tr0
37	movi 4, r8
38	bgeu/u r4, r8, tr0
39	pta/l endset, tr0
40	beqi/u r4, 0, tr0
41	st.b r2, 0, r3
42	beqi/u r4, 1, tr0
43	nop
44	st.b r2, 1, r3
45	beqi/l r4, 2, tr0
46	st.b r2,2,r3
47endset: blink tr2, r63
48setlongs:
49	mshflo.b r3, r3, r3
50	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3
51	stlo.l r2, 0, r3
52	nop
53	nop
54	sthi.l r5, -1, r3
55	blink tr2, r63
56
57multiquad:
58	mshflo.b r3, r3, r3
59	mperm.w r3, r63, r3	// Fill pattern now in every byte of r3
60	pta/l lastquad, tr0
61	stlo.q r2, 0, r3
62	sub r20, r25, r24
63	movi 64, r9
64	beqi/u r24, 8, tr0 // lastquad
65	pta/l loop, tr1
66	addi r20, -7*8, r8 // loop end address; This might overflow, so we need
67	                   // to use a different test before we start the loop
68	bgeu/u r24, r9, tr1// loop
69	st.q r25, 8, r3
70	shlri r24, 4, r24
71	st.q r20, -8, r3
72	beqi/u r24, 1, tr0 // lastquad
73	st.q r25, 16, r3
74	st.q r20, -16, r3
75	beqi/u r24, 2, tr0 // lastquad
76	st.q r25, 24, r3
77	st.q r20, -24, r3
78lastquad:
79	sthi.q r5, -1, r3
80	blink tr2,r63
81
82loop:
83	alloco r25, 32
84	st.q r25, 8, r3
85	st.q r25, 16, r3
86	st.q r25, 24, r3
87	st.q r25, 32, r3
88	addi r25, 32, r25
89	bgeu/l r8, r25, tr1 // loop
90
91	st.q r20, -40, r3
92	st.q r20, -32, r3
93	st.q r20, -24, r3
94	st.q r20, -16, r3
95	st.q r20, -8, r3
96	sthi.q r5, -1, r3
97	blink tr2,r63
98#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */
99! Entry: r4: destination pointer
100!        r5: fill value
101!        r6: byte count
102!
103! Exit:  r0-r3: trashed
104!
105
106! This assumes that the first four bytes of the address space (0..3) are
107! reserved - usually by the linker script.  Otherwise, we would had to check
108! for the case of objects of the size 12..15 at address 0..3 .
109
110#ifdef __SH5__
111#define DST r2
112#define VAL r3
113#define CNT r4
114#define TMP r5
115#else
116#define DST r4
117#define VAL r5
118#define CNT r6
119#define TMP r2
120#endif
121
122	mov	#12,r0	! Check for small number of bytes
123	cmp/gt	CNT,r0
124	mov	DST,r0
125	SL(bt, L_store_byte_loop_check0, add DST,CNT)
126
127	tst	#3,r0	! Align destination
128	SL(bt,	L_dup_bytes, extu.b r5,r5)
129	.balignw 4,0x0009
130L_align_loop:
131	mov.b	VAL,@r0
132	add	#1,r0
133	tst	#3,r0
134	bf	L_align_loop
135
136L_dup_bytes:
137	swap.b	VAL,TMP	! Duplicate bytes across longword
138	or	TMP,VAL
139	swap.w	VAL,TMP
140	or	TMP,VAL
141
142	add	#-16,CNT
143
144	.balignw 4,0x0009
145L_store_long_loop:
146	mov.l	VAL,@r0	! Store double longs to memory
147	cmp/hs	CNT,r0
148	mov.l	VAL,@(4,r0)
149	SL(bf, L_store_long_loop, add #8,r0)
150
151	add	#16,CNT
152
153L_store_byte_loop_check0:
154	cmp/eq	CNT,r0
155	bt	L_exit
156	.balignw 4,0x0009
157L_store_byte_loop:
158	mov.b	VAL,@r0	! Store bytes to memory
159	add	#1,r0
160	cmp/eq	CNT,r0
161	bf	L_store_byte_loop
162
163L_exit:
164	rts
165	mov	r4,r0
166#endif /* ! SHMEDIA */
167