1/* 2Copyright (c) 1990 The Regents of the University of California. 3All rights reserved. 4 5Redistribution and use in source and binary forms are permitted 6provided that the above copyright notice and this paragraph are 7duplicated in all such forms and that any documentation, 8and/or other materials related to such 9distribution and use acknowledge that the software was developed 10by the University of California, Berkeley. The name of the 11University may not be used to endorse or promote products derived 12from this software without specific prior written permission. 13THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 */ 17#include <picolibc.h> 18 19#include "setarch.h" 20 21#include "defines.h" 22 23#ifdef __H8300SX__ 24 25 .global _memcpy 26_memcpy: 27 stm.l er4-er6,@-er7 28 29 ; Set up source and destination pointers for movmd. 30 mov.l er0,er6 31 mov.l er1,er5 32 33 ; See whether the copy is long enough to use the movmd.l code. 34 ; Although the code can handle anything longer than 6 bytes, 35 ; it can be more expensive than movmd.b for small moves. 36 ; It's better to use a higher threshold to account for this. 37 ; 38 ; Note that the exact overhead of the movmd.l checks depends on 39 ; the alignments of the length and pointers. They are faster when 40 ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values 41 ; are 0. This threshold is a compromise between the various cases. 42 cmp #16,LEN(r2) 43 blo simple 44 45 ; movmd.l only works for even addresses. If one of the addresses 46 ; is odd and the other is not, fall back on a simple move. 47 bld #0,r5l 48 bxor #0,r6l 49 bcs simple 50 51 ; Make the addresses even. 52 bld #0,r5l 53 bcc word_aligned 54 mov.b @er5+,@er6+ 55 sub #1,LEN(r2) 56 57word_aligned: 58 ; See if copying one word would make the first operand longword 59 ; aligned. Although this is only really worthwhile if it aligns 60 ; the second operand as well, it's no worse if doesn't, so it 61 ; hardly seems worth the overhead of a "band" check. 62 bld #1,r6l 63 bcc fast_copy 64 mov.w @er5+,@er6+ 65 sub #2,LEN(r2) 66 67fast_copy: 68 ; Set (e)r4 to the number of longwords to copy. 69 mov LEN(r2),LEN(r4) 70 shlr #2,LEN(r4) 71 72#ifdef __NORMAL_MODE__ 73 ; 16-bit pointers and size_ts: one movmd.l is enough. This code 74 ; is never reached with r4 == 0. 75 movmd.l 76 and.w #3,r2 77simple: 78 mov.w r2,r4 79 beq quit 80 movmd.b 81quit: 82 rts/l er4-er6 83#else 84 ; Skip the first iteration if the number of longwords is divisible 85 ; by 0x10000. 86 mov.w r4,r4 87 beq fast_loop_next 88 89 ; This loop copies r4 (!= 0) longwords the first time round and 65536 90 ; longwords on each iteration after that. 91fast_loop: 92 movmd.l 93fast_loop_next: 94 sub.w #1,e4 95 bhs fast_loop 96 97 ; Mop up any left-over bytes. We could just fall through to the 98 ; simple code after the "and" but the version below is quicker 99 ; and only takes 10 more bytes. 100 and.w #3,r2 101 beq quit 102 mov.w r2,r4 103 movmd.b 104quit: 105 rts/l er4-er6 106 107simple: 108 ; Simple bytewise copy. We need to handle all lengths, including zero. 109 mov.w r2,r4 110 beq simple_loop_next 111simple_loop: 112 movmd.b 113simple_loop_next: 114 sub.w #1,e2 115 bhs simple_loop 116 rts/l er4-er6 117#endif 118 119#else 120 121 .global _memcpy 122_memcpy: 123; MOVP @(2/4,r7),A0P ; dst 124; MOVP @(4/8,r7),A1P ; src 125; MOVP @(6/12,r7),A2P ; len 126 127 MOVP A0P,A3P ; keep copy of final dst 128 ADDP A2P,A0P ; point to end of dst 129 CMPP A0P,A3P ; see if anything to do 130 beq quit 131 132 ADDP A2P,A1P ; point to end of src 133 134 ; lets see if we can do this in words 135 or A0L,A2L ; or in the dst address 136 or A3L,A2L ; or the length 137 or A1L,A2L ; or the src address 138 btst #0,A2L ; see if the lsb is zero 139 bne byteloop 140 141wordloop: 142#ifdef __NORMAL_MODE__ 143 sub #2,A1P 144#else 145 subs #2,A1P ; point to word 146#endif 147 mov.w @A1P,A2 ; get word 148 mov.w A2,@-A0P ; save word 149 CMPP A0P,A3P ; at the front again ? 150 bne wordloop 151 rts 152 153byteloop: 154#ifdef __NORMAL_MODE__ 155 sub #1,A1P 156#else 157 subs #1,A1P ; point to byte 158#endif 159 mov.b @A1P,A2L ; get byte 160 mov.b A2L,@-A0P ; save byte 161 CMPP A0P,A3P ; at the front again ? 162 bne byteloop 163 164 ; return with A0 pointing to dst 165quit: rts 166 167#endif 168