1/* a-memcpy.s -- memcpy, optimised for m68k asm
2 *
3 * Copyright (c) 2007 mocom software GmbH & Co KG)
4 *
5 * The authors hereby grant permission to use, copy, modify, distribute,
6 * and license this software and its documentation for any purpose, provided
7 * that existing copyright notices are retained in all copies and that this
8 * notice is included verbatim in any distributions. No written agreement,
9 * license, or royalty fee is required for any of the authorized uses.
10 * Modifications to this software may be copyrighted by their authors
11 * and need not follow the licensing terms described here, provided that
12 * the new terms are clearly indicated on the first page of each file where
13 * they apply.
14 */
15
16#include <picolibc.h>
17
18#include "m68kasm.h"
19
20#if defined (__mcoldfire__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__)
21# define MISALIGNED_OK 1
22#else
23# define MISALIGNED_OK 0
24#endif
25
26	.text
27	.align	4
28
29	.globl	SYM(memcpy)
30	.type	SYM(memcpy), @function
31
32/*   memcpy, optimised
33 *
34 *   strategy:
35 *       - no argument testing (the original memcpy from the GNU lib does
36 *         no checking either)
37 *       - make sure the destination pointer (the write pointer) is long word
38 *         aligned. This is the best you can do, because writing to unaligned
39 *         addresses can be the most costfull thing you could do.
40 *       - Once you have figured that out, we do a little loop unrolling
41 *         to further improve speed.
42 */
43
44SYM(memcpy):
45	move.l	4(sp),a0	| dest ptr
46	move.l	8(sp),a1	| src ptr
47	move.l	12(sp),d1	| len
48	cmp.l	#8,d1		| if fewer than 8 bytes to transfer,
49	blo	.Lresidue	| do not optimise
50
51#if !MISALIGNED_OK
52	/* Goto .Lresidue if either dest or src is not 4-byte aligned */
53	move.l	a0,d0
54	and.l	#3,d0
55	bne	.Lresidue
56	move.l	a1,d0
57	and.l	#3,d0
58	bne	.Lresidue
59#else /* MISALIGNED_OK */
60	/* align dest */
61	move.l	a0,d0		| copy of dest
62	neg.l	d0
63	and.l	#3,d0		| look for the lower two only
64	beq	2f		| is aligned?
65	sub.l	d0,d1
66	lsr.l	#1,d0		| word align needed?
67	bcc	1f
68	move.b	(a1)+,(a0)+
691:
70	lsr.l	#1,d0		| long align needed?
71	bcc	2f
72	move.w	(a1)+,(a0)+
732:
74#endif /* !MISALIGNED_OK */
75
76	/* long word transfers */
77	move.l	d1,d0
78	and.l	#3,d1		| byte residue
79	lsr.l	#3,d0
80	bcc	1f		| carry set for 4-byte residue
81	move.l	(a1)+,(a0)+
821:
83	lsr.l	#1,d0		| number of 16-byte transfers
84	bcc	.Lcopy 		| carry set for 8-byte residue
85	bra	.Lcopy8
86
871:
88	move.l	(a1)+,(a0)+
89	move.l	(a1)+,(a0)+
90.Lcopy8:
91	move.l	(a1)+,(a0)+
92	move.l	(a1)+,(a0)+
93.Lcopy:
94#if !defined (__mcoldfire__)
95	dbra	d0,1b
96	sub.l	#0x10000,d0
97#else
98	subq.l	#1,d0
99#endif
100	bpl	1b
101	bra	.Lresidue
102
1031:
104	move.b	(a1)+,(a0)+	| move residue bytes
105
106.Lresidue:
107#if !defined (__mcoldfire__)
108	dbra	d1,1b		| loop until done
109#else
110	subq.l	#1,d1
111	bpl	1b
112#endif
113	move.l	4(sp),a0	| return value
114	move.l	a0,d0		| in both a0 and d0
115	rts
116	.size	SYM(memcpy), . - SYM(memcpy)
117
118#if defined(__linux__) && defined(__ELF__)
119	.section .note.GNU-stack,"",%progbits
120#endif
121