1/*
2Copyright (c) 1990 The Regents of the University of California.
3All rights reserved.
4
5Redistribution and use in source and binary forms are permitted
6provided that the above copyright notice and this paragraph are
7duplicated in all such forms and that any documentation,
8and/or other materials related to such
9distribution and use acknowledge that the software was developed
10by the University of California, Berkeley.  The name of the
11University may not be used to endorse or promote products derived
12from this software without specific prior written permission.
13THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 */
17#include "setarch.h"
18
19#include "defines.h"
20
21#ifdef __H8300SX__
22
23	.global _memcpy
24_memcpy:
25	stm.l	er4-er6,@-er7
26
27	; Set up source and destination pointers for movmd.
28	mov.l	er0,er6
29	mov.l	er1,er5
30
31	; See whether the copy is long enough to use the movmd.l code.
32	; Although the code can handle anything longer than 6 bytes,
33	; it can be more expensive than movmd.b for small moves.
34	; It's better to use a higher threshold to account for this.
35	;
36	; Note that the exact overhead of the movmd.l checks depends on
37	; the alignments of the length and pointers.  They are faster when
38	; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values
39	; are 0.  This threshold is a compromise between the various cases.
40	cmp	#16,LEN(r2)
41	blo	simple
42
43	; movmd.l only works for even addresses.  If one of the addresses
44	; is odd and the other is not, fall back on a simple move.
45	bld	#0,r5l
46	bxor	#0,r6l
47	bcs	simple
48
49	; Make the addresses even.
50	bld	#0,r5l
51	bcc	word_aligned
52	mov.b	@er5+,@er6+
53	sub	#1,LEN(r2)
54
55word_aligned:
56	; See if copying one word would make the first operand longword
57	; aligned.  Although this is only really worthwhile if it aligns
58	; the second operand as well, it's no worse if doesn't, so it
59	; hardly seems worth the overhead of a "band" check.
60	bld	#1,r6l
61	bcc	fast_copy
62	mov.w	@er5+,@er6+
63	sub	#2,LEN(r2)
64
65fast_copy:
66	; Set (e)r4 to the number of longwords to copy.
67	mov	LEN(r2),LEN(r4)
68	shlr	#2,LEN(r4)
69
70#ifdef __NORMAL_MODE__
71	; 16-bit pointers and size_ts: one movmd.l is enough.  This code
72	; is never reached with r4 == 0.
73	movmd.l
74	and.w	#3,r2
75simple:
76	mov.w	r2,r4
77	beq	quit
78	movmd.b
79quit:
80	rts/l	er4-er6
81#else
82	; Skip the first iteration if the number of longwords is divisible
83	; by 0x10000.
84	mov.w	r4,r4
85	beq	fast_loop_next
86
87	; This loop copies r4 (!= 0) longwords the first time round and 65536
88	; longwords on each iteration after that.
89fast_loop:
90	movmd.l
91fast_loop_next:
92	sub.w	#1,e4
93	bhs	fast_loop
94
95	; Mop up any left-over bytes.  We could just fall through to the
96	; simple code after the "and" but the version below is quicker
97	; and only takes 10 more bytes.
98	and.w	#3,r2
99	beq	quit
100	mov.w	r2,r4
101	movmd.b
102quit:
103	rts/l	er4-er6
104
105simple:
106	; Simple bytewise copy.  We need to handle all lengths, including zero.
107	mov.w	r2,r4
108	beq	simple_loop_next
109simple_loop:
110	movmd.b
111simple_loop_next:
112	sub.w	#1,e2
113	bhs	simple_loop
114	rts/l	er4-er6
115#endif
116
117#else
118
119	.global _memcpy
120_memcpy:
121;	MOVP	@(2/4,r7),A0P	; dst
122;	MOVP	@(4/8,r7),A1P	; src
123;	MOVP	@(6/12,r7),A2P	; len
124
125	MOVP	A0P,A3P	; keep copy of final dst
126	ADDP	A2P,A0P	; point to end of dst
127	CMPP	A0P,A3P	; see if anything to do
128	beq	quit
129
130	ADDP	A2P,A1P	; point to end of src
131
132	; lets see if we can do this in words
133	or	A0L,A2L	; or in the dst address
134	or	A3L,A2L	; or the length
135	or	A1L,A2L	; or the src address
136	btst	#0,A2L	; see if the lsb is zero
137	bne	byteloop
138
139wordloop:
140#ifdef __NORMAL_MODE__
141	sub	#2,A1P
142#else
143	subs	#2,A1P		; point to word
144#endif
145	mov.w	@A1P,A2		; get word
146	mov.w	A2,@-A0P	; save word
147	CMPP	A0P,A3P		; at the front again ?
148	bne 	wordloop
149	rts
150
151byteloop:
152#ifdef __NORMAL_MODE__
153	sub	#1,A1P
154#else
155	subs	#1,A1P		; point to byte
156#endif
157	mov.b	@A1P,A2L	; get byte
158	mov.b	A2L,@-A0P	; save byte
159	CMPP	A0P,A3P 	; at the front again ?
160	bne 	byteloop
161
162	; return with A0 pointing to dst
163quit:	rts
164
165#endif
166