1/* a-memcpy.s -- memcpy, optimised for m68k asm
2 *
3 * Copyright (c) 2007 mocom software GmbH & Co KG)
4 *
5 * The authors hereby grant permission to use, copy, modify, distribute,
6 * and license this software and its documentation for any purpose, provided
7 * that existing copyright notices are retained in all copies and that this
8 * notice is included verbatim in any distributions. No written agreement,
9 * license, or royalty fee is required for any of the authorized uses.
10 * Modifications to this software may be copyrighted by their authors
11 * and need not follow the licensing terms described here, provided that
12 * the new terms are clearly indicated on the first page of each file where
13 * they apply.
14 */
15
16#include "m68kasm.h"
17
18#if defined (__mcoldfire__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__)
19# define MISALIGNED_OK 1
20#else
21# define MISALIGNED_OK 0
22#endif
23
24	.text
25	.align	4
26
27	.globl	SYM(memcpy)
28	.type	SYM(memcpy), @function
29
30/*   memcpy, optimised
31 *
32 *   strategy:
33 *       - no argument testing (the original memcpy from the GNU lib does
34 *         no checking either)
35 *       - make sure the destination pointer (the write pointer) is long word
36 *         aligned. This is the best you can do, because writing to unaligned
37 *         addresses can be the most costfull thing you could do.
38 *       - Once you have figured that out, we do a little loop unrolling
39 *         to further improve speed.
40 */
41
42SYM(memcpy):
43	move.l	4(sp),a0	| dest ptr
44	move.l	8(sp),a1	| src ptr
45	move.l	12(sp),d1	| len
46	cmp.l	#8,d1		| if fewer than 8 bytes to transfer,
47	blo	.Lresidue	| do not optimise
48
49#if !MISALIGNED_OK
50	/* Goto .Lresidue if either dest or src is not 4-byte aligned */
51	move.l	a0,d0
52	and.l	#3,d0
53	bne	.Lresidue
54	move.l	a1,d0
55	and.l	#3,d0
56	bne	.Lresidue
57#else /* MISALIGNED_OK */
58	/* align dest */
59	move.l	a0,d0		| copy of dest
60	neg.l	d0
61	and.l	#3,d0		| look for the lower two only
62	beq	2f		| is aligned?
63	sub.l	d0,d1
64	lsr.l	#1,d0		| word align needed?
65	bcc	1f
66	move.b	(a1)+,(a0)+
671:
68	lsr.l	#1,d0		| long align needed?
69	bcc	2f
70	move.w	(a1)+,(a0)+
712:
72#endif /* !MISALIGNED_OK */
73
74	/* long word transfers */
75	move.l	d1,d0
76	and.l	#3,d1		| byte residue
77	lsr.l	#3,d0
78	bcc	1f		| carry set for 4-byte residue
79	move.l	(a1)+,(a0)+
801:
81	lsr.l	#1,d0		| number of 16-byte transfers
82	bcc	.Lcopy 		| carry set for 8-byte residue
83	bra	.Lcopy8
84
851:
86	move.l	(a1)+,(a0)+
87	move.l	(a1)+,(a0)+
88.Lcopy8:
89	move.l	(a1)+,(a0)+
90	move.l	(a1)+,(a0)+
91.Lcopy:
92#if !defined (__mcoldfire__)
93	dbra	d0,1b
94	sub.l	#0x10000,d0
95#else
96	subq.l	#1,d0
97#endif
98	bpl	1b
99	bra	.Lresidue
100
1011:
102	move.b	(a1)+,(a0)+	| move residue bytes
103
104.Lresidue:
105#if !defined (__mcoldfire__)
106	dbra	d1,1b		| loop until done
107#else
108	subq.l	#1,d1
109	bpl	1b
110#endif
111	move.l	4(sp),d0	| return value
112	rts
113