1/* a-memcpy.s -- memcpy, optimised for m68k asm 2 * 3 * Copyright (c) 2007 mocom software GmbH & Co KG) 4 * 5 * The authors hereby grant permission to use, copy, modify, distribute, 6 * and license this software and its documentation for any purpose, provided 7 * that existing copyright notices are retained in all copies and that this 8 * notice is included verbatim in any distributions. No written agreement, 9 * license, or royalty fee is required for any of the authorized uses. 10 * Modifications to this software may be copyrighted by their authors 11 * and need not follow the licensing terms described here, provided that 12 * the new terms are clearly indicated on the first page of each file where 13 * they apply. 14 */ 15 16#include "m68kasm.h" 17 18#if defined (__mcoldfire__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__) 19# define MISALIGNED_OK 1 20#else 21# define MISALIGNED_OK 0 22#endif 23 24 .text 25 .align 4 26 27 .globl SYM(memcpy) 28 .type SYM(memcpy), @function 29 30/* memcpy, optimised 31 * 32 * strategy: 33 * - no argument testing (the original memcpy from the GNU lib does 34 * no checking either) 35 * - make sure the destination pointer (the write pointer) is long word 36 * aligned. This is the best you can do, because writing to unaligned 37 * addresses can be the most costfull thing you could do. 38 * - Once you have figured that out, we do a little loop unrolling 39 * to further improve speed. 40 */ 41 42SYM(memcpy): 43 move.l 4(sp),a0 | dest ptr 44 move.l 8(sp),a1 | src ptr 45 move.l 12(sp),d1 | len 46 cmp.l #8,d1 | if fewer than 8 bytes to transfer, 47 blo .Lresidue | do not optimise 48 49#if !MISALIGNED_OK 50 /* Goto .Lresidue if either dest or src is not 4-byte aligned */ 51 move.l a0,d0 52 and.l #3,d0 53 bne .Lresidue 54 move.l a1,d0 55 and.l #3,d0 56 bne .Lresidue 57#else /* MISALIGNED_OK */ 58 /* align dest */ 59 move.l a0,d0 | copy of dest 60 neg.l d0 61 and.l #3,d0 | look for the lower two only 62 beq 2f | is aligned? 63 sub.l d0,d1 64 lsr.l #1,d0 | word align needed? 65 bcc 1f 66 move.b (a1)+,(a0)+ 671: 68 lsr.l #1,d0 | long align needed? 69 bcc 2f 70 move.w (a1)+,(a0)+ 712: 72#endif /* !MISALIGNED_OK */ 73 74 /* long word transfers */ 75 move.l d1,d0 76 and.l #3,d1 | byte residue 77 lsr.l #3,d0 78 bcc 1f | carry set for 4-byte residue 79 move.l (a1)+,(a0)+ 801: 81 lsr.l #1,d0 | number of 16-byte transfers 82 bcc .Lcopy | carry set for 8-byte residue 83 bra .Lcopy8 84 851: 86 move.l (a1)+,(a0)+ 87 move.l (a1)+,(a0)+ 88.Lcopy8: 89 move.l (a1)+,(a0)+ 90 move.l (a1)+,(a0)+ 91.Lcopy: 92#if !defined (__mcoldfire__) 93 dbra d0,1b 94 sub.l #0x10000,d0 95#else 96 subq.l #1,d0 97#endif 98 bpl 1b 99 bra .Lresidue 100 1011: 102 move.b (a1)+,(a0)+ | move residue bytes 103 104.Lresidue: 105#if !defined (__mcoldfire__) 106 dbra d1,1b | loop until done 107#else 108 subq.l #1,d1 109 bpl 1b 110#endif 111 move.l 4(sp),d0 | return value 112 rts 113