1/* a-memset.s -- memset, optimised for fido asm
2 *
3 * Copyright (c) 2007 mocom software GmbH & Co KG)
4 *
5 * The authors hereby grant permission to use, copy, modify, distribute,
6 * and license this software and its documentation for any purpose, provided
7 * that existing copyright notices are retained in all copies and that this
8 * notice is included verbatim in any distributions. No written agreement,
9 * license, or royalty fee is required for any of the authorized uses.
10 * Modifications to this software may be copyrighted by their authors
11 * and need not follow the licensing terms described here, provided that
12 * the new terms are clearly indicated on the first page of each file where
13 * they apply.
14 */
15
16#include <picolibc.h>
17
18#include "m68kasm.h"
19
20	.text
21	.align	4
22
23	.globl	SYM(memset)
24	.type	SYM(memset), @function
25
26|   memset, optimised
27|
28|   strategy:
29|       - no argument testing (the original memcpy from the GNU lib does
30|         no checking either)
31|       - make sure the destination pointer (the write pointer) is long word
32|         aligned. This is the best you can do, because writing to unaligned
33|         addresses can be the most costfull thing one could do.
34|       - we fill long word wise if possible
35|
36|   VG, 2006
37|
38|	bugfixes:
39|		- distribution of byte value improved - in cases someone gives
40|         non-byte value
41|		- residue byte transfer was not working
42|
43|	VG, April 2007
44|
45SYM(memset):
46	move.l	4(sp),a0	| dest ptr
47	move.l	8(sp),d0	| value
48	move.l	12(sp),d1	| len
49	cmp.l	#16,d1
50	blo	.Lbset		| below, byte fills
51	|
52	move.l	d2,-(sp)	| need a register
53	move.b	d0,d2		| distribute low byte to all byte in word
54	lsl.l	#8,d0
55	move.b	d2,d0
56	move.w	d0,d2
57	swap	d0		| rotate 16
58	move.w	d2,d0
59	|
60	move.l	a0,d2		| copy of src
61	neg.l	d2		| 1 2 3 ==> 3 2 1
62	and.l	#3,d2
63	beq	2f		| is aligned
64	|
65	sub.l	d2,d1		| fix length
66	lsr.l	#1,d2		| word align needed?
67	bcc	1f
68	move.b	d0,(a0)+	| fill byte
691:
70	lsr.l	#1,d2		| long align needed?
71	bcc	2f
72	move.w	d0,(a0)+	| fill word
732:
74	move.l	d1,d2		| number of long transfers (at least 3)
75	lsr.l	#2,d2
76	subq.l	#1,d2
77
781:
79	move.l	d0,(a0)+	| fill long words
80.Llset:
81#if !defined (__mcoldfire__)
82	dbra	d2,1b		| loop until done
83	sub.l	#0x10000,d2
84#else
85	subq.l	#1,d2
86#endif
87	bpl	1b
88	and.l	#3,d1		| residue byte transfers, fixed
89	move.l	(sp)+,d2	| restore d2
90	bra	.Lbset
91
921:
93	move.b	d0,(a0)+	| fill residue bytes
94.Lbset:
95#if !defined (__mcoldfire__)
96	dbra	d1,1b		| loop until done
97#else
98	subq.l	#1,d1
99	bpl	1b
100#endif
101	move.l	4(sp),a0	| return value
102	move.l	a0,d0		| in both a0 and d0
103	rts
104	.size	SYM(memset), . - SYM(memset)
105
106#if defined(__linux__) && defined(__ELF__)
107	.section .note.GNU-stack,"",%progbits
108#endif
109