/* Copyright (c) 1990 The Regents of the University of California. All rights reserved. Redistribution and use in source and binary forms are permitted provided that the above copyright notice and this paragraph are duplicated in all such forms and that any documentation, and/or other materials related to such distribution and use acknowledge that the software was developed by the University of California, Berkeley. The name of the University may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #include "setarch.h" #include "defines.h" #ifdef __H8300SX__ .global _memcpy _memcpy: stm.l er4-er6,@-er7 ; Set up source and destination pointers for movmd. mov.l er0,er6 mov.l er1,er5 ; See whether the copy is long enough to use the movmd.l code. ; Although the code can handle anything longer than 6 bytes, ; it can be more expensive than movmd.b for small moves. ; It's better to use a higher threshold to account for this. ; ; Note that the exact overhead of the movmd.l checks depends on ; the alignments of the length and pointers. They are faster when ; er0 & 3 == er1 & 3 == er2 & 3, faster still when these values ; are 0. This threshold is a compromise between the various cases. cmp #16,LEN(r2) blo simple ; movmd.l only works for even addresses. If one of the addresses ; is odd and the other is not, fall back on a simple move. bld #0,r5l bxor #0,r6l bcs simple ; Make the addresses even. bld #0,r5l bcc word_aligned mov.b @er5+,@er6+ sub #1,LEN(r2) word_aligned: ; See if copying one word would make the first operand longword ; aligned. Although this is only really worthwhile if it aligns ; the second operand as well, it's no worse if doesn't, so it ; hardly seems worth the overhead of a "band" check. bld #1,r6l bcc fast_copy mov.w @er5+,@er6+ sub #2,LEN(r2) fast_copy: ; Set (e)r4 to the number of longwords to copy. mov LEN(r2),LEN(r4) shlr #2,LEN(r4) #ifdef __NORMAL_MODE__ ; 16-bit pointers and size_ts: one movmd.l is enough. This code ; is never reached with r4 == 0. movmd.l and.w #3,r2 simple: mov.w r2,r4 beq quit movmd.b quit: rts/l er4-er6 #else ; Skip the first iteration if the number of longwords is divisible ; by 0x10000. mov.w r4,r4 beq fast_loop_next ; This loop copies r4 (!= 0) longwords the first time round and 65536 ; longwords on each iteration after that. fast_loop: movmd.l fast_loop_next: sub.w #1,e4 bhs fast_loop ; Mop up any left-over bytes. We could just fall through to the ; simple code after the "and" but the version below is quicker ; and only takes 10 more bytes. and.w #3,r2 beq quit mov.w r2,r4 movmd.b quit: rts/l er4-er6 simple: ; Simple bytewise copy. We need to handle all lengths, including zero. mov.w r2,r4 beq simple_loop_next simple_loop: movmd.b simple_loop_next: sub.w #1,e2 bhs simple_loop rts/l er4-er6 #endif #else .global _memcpy _memcpy: ; MOVP @(2/4,r7),A0P ; dst ; MOVP @(4/8,r7),A1P ; src ; MOVP @(6/12,r7),A2P ; len MOVP A0P,A3P ; keep copy of final dst ADDP A2P,A0P ; point to end of dst CMPP A0P,A3P ; see if anything to do beq quit ADDP A2P,A1P ; point to end of src ; lets see if we can do this in words or A0L,A2L ; or in the dst address or A3L,A2L ; or the length or A1L,A2L ; or the src address btst #0,A2L ; see if the lsb is zero bne byteloop wordloop: #ifdef __NORMAL_MODE__ sub #2,A1P #else subs #2,A1P ; point to word #endif mov.w @A1P,A2 ; get word mov.w A2,@-A0P ; save word CMPP A0P,A3P ; at the front again ? bne wordloop rts byteloop: #ifdef __NORMAL_MODE__ sub #1,A1P #else subs #1,A1P ; point to byte #endif mov.b @A1P,A2L ; get byte mov.b A2L,@-A0P ; save byte CMPP A0P,A3P ; at the front again ? bne byteloop ; return with A0 pointing to dst quit: rts #endif