1/*
2Copyright (c) 1990 The Regents of the University of California.
3All rights reserved.
4
5Redistribution and use in source and binary forms are permitted
6provided that the above copyright notice and this paragraph are
7duplicated in all such forms and that any documentation,
8and/or other materials related to such
9distribution and use acknowledge that the software was developed
10by the University of California, Berkeley.  The name of the
11University may not be used to endorse or promote products derived
12from this software without specific prior written permission.
13THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 */
17#include <picolibc.h>
18
19#include "setarch.h"
20
21#include "defines.h"
22
23#ifdef __H8300SX__
24
25	.global _memcpy
26_memcpy:
27	stm.l	er4-er6,@-er7
28
29	; Set up source and destination pointers for movmd.
30	mov.l	er0,er6
31	mov.l	er1,er5
32
33	; See whether the copy is long enough to use the movmd.l code.
34	; Although the code can handle anything longer than 6 bytes,
35	; it can be more expensive than movmd.b for small moves.
36	; It's better to use a higher threshold to account for this.
37	;
38	; Note that the exact overhead of the movmd.l checks depends on
39	; the alignments of the length and pointers.  They are faster when
40	; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values
41	; are 0.  This threshold is a compromise between the various cases.
42	cmp	#16,LEN(r2)
43	blo	simple
44
45	; movmd.l only works for even addresses.  If one of the addresses
46	; is odd and the other is not, fall back on a simple move.
47	bld	#0,r5l
48	bxor	#0,r6l
49	bcs	simple
50
51	; Make the addresses even.
52	bld	#0,r5l
53	bcc	word_aligned
54	mov.b	@er5+,@er6+
55	sub	#1,LEN(r2)
56
57word_aligned:
58	; See if copying one word would make the first operand longword
59	; aligned.  Although this is only really worthwhile if it aligns
60	; the second operand as well, it's no worse if doesn't, so it
61	; hardly seems worth the overhead of a "band" check.
62	bld	#1,r6l
63	bcc	fast_copy
64	mov.w	@er5+,@er6+
65	sub	#2,LEN(r2)
66
67fast_copy:
68	; Set (e)r4 to the number of longwords to copy.
69	mov	LEN(r2),LEN(r4)
70	shlr	#2,LEN(r4)
71
72#ifdef __NORMAL_MODE__
73	; 16-bit pointers and size_ts: one movmd.l is enough.  This code
74	; is never reached with r4 == 0.
75	movmd.l
76	and.w	#3,r2
77simple:
78	mov.w	r2,r4
79	beq	quit
80	movmd.b
81quit:
82	rts/l	er4-er6
83#else
84	; Skip the first iteration if the number of longwords is divisible
85	; by 0x10000.
86	mov.w	r4,r4
87	beq	fast_loop_next
88
89	; This loop copies r4 (!= 0) longwords the first time round and 65536
90	; longwords on each iteration after that.
91fast_loop:
92	movmd.l
93fast_loop_next:
94	sub.w	#1,e4
95	bhs	fast_loop
96
97	; Mop up any left-over bytes.  We could just fall through to the
98	; simple code after the "and" but the version below is quicker
99	; and only takes 10 more bytes.
100	and.w	#3,r2
101	beq	quit
102	mov.w	r2,r4
103	movmd.b
104quit:
105	rts/l	er4-er6
106
107simple:
108	; Simple bytewise copy.  We need to handle all lengths, including zero.
109	mov.w	r2,r4
110	beq	simple_loop_next
111simple_loop:
112	movmd.b
113simple_loop_next:
114	sub.w	#1,e2
115	bhs	simple_loop
116	rts/l	er4-er6
117#endif
118
119#else
120
121	.global _memcpy
122_memcpy:
123;	MOVP	@(2/4,r7),A0P	; dst
124;	MOVP	@(4/8,r7),A1P	; src
125;	MOVP	@(6/12,r7),A2P	; len
126
127	MOVP	A0P,A3P	; keep copy of final dst
128	ADDP	A2P,A0P	; point to end of dst
129	CMPP	A0P,A3P	; see if anything to do
130	beq	quit
131
132	ADDP	A2P,A1P	; point to end of src
133
134	; lets see if we can do this in words
135	or	A0L,A2L	; or in the dst address
136	or	A3L,A2L	; or the length
137	or	A1L,A2L	; or the src address
138	btst	#0,A2L	; see if the lsb is zero
139	bne	byteloop
140
141wordloop:
142#ifdef __NORMAL_MODE__
143	sub	#2,A1P
144#else
145	subs	#2,A1P		; point to word
146#endif
147	mov.w	@A1P,A2		; get word
148	mov.w	A2,@-A0P	; save word
149	CMPP	A0P,A3P		; at the front again ?
150	bne 	wordloop
151	rts
152
153byteloop:
154#ifdef __NORMAL_MODE__
155	sub	#1,A1P
156#else
157	subs	#1,A1P		; point to byte
158#endif
159	mov.b	@A1P,A2L	; get byte
160	mov.b	A2L,@-A0P	; save byte
161	CMPP	A0P,A3P 	; at the front again ?
162	bne 	byteloop
163
164	; return with A0 pointing to dst
165quit:	rts
166
167#endif
168