1/* 2Copyright (c) 1990 The Regents of the University of California. 3All rights reserved. 4 5Redistribution and use in source and binary forms are permitted 6provided that the above copyright notice and this paragraph are 7duplicated in all such forms and that any documentation, 8and/or other materials related to such 9distribution and use acknowledge that the software was developed 10by the University of California, Berkeley. The name of the 11University may not be used to endorse or promote products derived 12from this software without specific prior written permission. 13THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 */ 17#include "setarch.h" 18 19#include "defines.h" 20 21#ifdef __H8300SX__ 22 23 .global _memcpy 24_memcpy: 25 stm.l er4-er6,@-er7 26 27 ; Set up source and destination pointers for movmd. 28 mov.l er0,er6 29 mov.l er1,er5 30 31 ; See whether the copy is long enough to use the movmd.l code. 32 ; Although the code can handle anything longer than 6 bytes, 33 ; it can be more expensive than movmd.b for small moves. 34 ; It's better to use a higher threshold to account for this. 35 ; 36 ; Note that the exact overhead of the movmd.l checks depends on 37 ; the alignments of the length and pointers. They are faster when 38 ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values 39 ; are 0. This threshold is a compromise between the various cases. 40 cmp #16,LEN(r2) 41 blo simple 42 43 ; movmd.l only works for even addresses. If one of the addresses 44 ; is odd and the other is not, fall back on a simple move. 45 bld #0,r5l 46 bxor #0,r6l 47 bcs simple 48 49 ; Make the addresses even. 50 bld #0,r5l 51 bcc word_aligned 52 mov.b @er5+,@er6+ 53 sub #1,LEN(r2) 54 55word_aligned: 56 ; See if copying one word would make the first operand longword 57 ; aligned. Although this is only really worthwhile if it aligns 58 ; the second operand as well, it's no worse if doesn't, so it 59 ; hardly seems worth the overhead of a "band" check. 60 bld #1,r6l 61 bcc fast_copy 62 mov.w @er5+,@er6+ 63 sub #2,LEN(r2) 64 65fast_copy: 66 ; Set (e)r4 to the number of longwords to copy. 67 mov LEN(r2),LEN(r4) 68 shlr #2,LEN(r4) 69 70#ifdef __NORMAL_MODE__ 71 ; 16-bit pointers and size_ts: one movmd.l is enough. This code 72 ; is never reached with r4 == 0. 73 movmd.l 74 and.w #3,r2 75simple: 76 mov.w r2,r4 77 beq quit 78 movmd.b 79quit: 80 rts/l er4-er6 81#else 82 ; Skip the first iteration if the number of longwords is divisible 83 ; by 0x10000. 84 mov.w r4,r4 85 beq fast_loop_next 86 87 ; This loop copies r4 (!= 0) longwords the first time round and 65536 88 ; longwords on each iteration after that. 89fast_loop: 90 movmd.l 91fast_loop_next: 92 sub.w #1,e4 93 bhs fast_loop 94 95 ; Mop up any left-over bytes. We could just fall through to the 96 ; simple code after the "and" but the version below is quicker 97 ; and only takes 10 more bytes. 98 and.w #3,r2 99 beq quit 100 mov.w r2,r4 101 movmd.b 102quit: 103 rts/l er4-er6 104 105simple: 106 ; Simple bytewise copy. We need to handle all lengths, including zero. 107 mov.w r2,r4 108 beq simple_loop_next 109simple_loop: 110 movmd.b 111simple_loop_next: 112 sub.w #1,e2 113 bhs simple_loop 114 rts/l er4-er6 115#endif 116 117#else 118 119 .global _memcpy 120_memcpy: 121; MOVP @(2/4,r7),A0P ; dst 122; MOVP @(4/8,r7),A1P ; src 123; MOVP @(6/12,r7),A2P ; len 124 125 MOVP A0P,A3P ; keep copy of final dst 126 ADDP A2P,A0P ; point to end of dst 127 CMPP A0P,A3P ; see if anything to do 128 beq quit 129 130 ADDP A2P,A1P ; point to end of src 131 132 ; lets see if we can do this in words 133 or A0L,A2L ; or in the dst address 134 or A3L,A2L ; or the length 135 or A1L,A2L ; or the src address 136 btst #0,A2L ; see if the lsb is zero 137 bne byteloop 138 139wordloop: 140#ifdef __NORMAL_MODE__ 141 sub #2,A1P 142#else 143 subs #2,A1P ; point to word 144#endif 145 mov.w @A1P,A2 ; get word 146 mov.w A2,@-A0P ; save word 147 CMPP A0P,A3P ; at the front again ? 148 bne wordloop 149 rts 150 151byteloop: 152#ifdef __NORMAL_MODE__ 153 sub #1,A1P 154#else 155 subs #1,A1P ; point to byte 156#endif 157 mov.b @A1P,A2L ; get byte 158 mov.b A2L,@-A0P ; save byte 159 CMPP A0P,A3P ; at the front again ? 160 bne byteloop 161 162 ; return with A0 pointing to dst 163quit: rts 164 165#endif 166