1/* a-memcpy.s -- memcpy, optimised for m68k asm 2 * 3 * Copyright (c) 2007 mocom software GmbH & Co KG) 4 * 5 * The authors hereby grant permission to use, copy, modify, distribute, 6 * and license this software and its documentation for any purpose, provided 7 * that existing copyright notices are retained in all copies and that this 8 * notice is included verbatim in any distributions. No written agreement, 9 * license, or royalty fee is required for any of the authorized uses. 10 * Modifications to this software may be copyrighted by their authors 11 * and need not follow the licensing terms described here, provided that 12 * the new terms are clearly indicated on the first page of each file where 13 * they apply. 14 */ 15 16#include <picolibc.h> 17 18#include "m68kasm.h" 19 20#if defined (__mcoldfire__) || defined (__mc68020__) || defined (__mc68030__) || defined (__mc68040__) || defined (__mc68060__) 21# define MISALIGNED_OK 1 22#else 23# define MISALIGNED_OK 0 24#endif 25 26 .text 27 .align 4 28 29 .globl SYM(memcpy) 30 .type SYM(memcpy), @function 31 32/* memcpy, optimised 33 * 34 * strategy: 35 * - no argument testing (the original memcpy from the GNU lib does 36 * no checking either) 37 * - make sure the destination pointer (the write pointer) is long word 38 * aligned. This is the best you can do, because writing to unaligned 39 * addresses can be the most costfull thing you could do. 40 * - Once you have figured that out, we do a little loop unrolling 41 * to further improve speed. 42 */ 43 44SYM(memcpy): 45 move.l 4(sp),a0 | dest ptr 46 move.l 8(sp),a1 | src ptr 47 move.l 12(sp),d1 | len 48 cmp.l #8,d1 | if fewer than 8 bytes to transfer, 49 blo .Lresidue | do not optimise 50 51#if !MISALIGNED_OK 52 /* Goto .Lresidue if either dest or src is not 4-byte aligned */ 53 move.l a0,d0 54 and.l #3,d0 55 bne .Lresidue 56 move.l a1,d0 57 and.l #3,d0 58 bne .Lresidue 59#else /* MISALIGNED_OK */ 60 /* align dest */ 61 move.l a0,d0 | copy of dest 62 neg.l d0 63 and.l #3,d0 | look for the lower two only 64 beq 2f | is aligned? 65 sub.l d0,d1 66 lsr.l #1,d0 | word align needed? 67 bcc 1f 68 move.b (a1)+,(a0)+ 691: 70 lsr.l #1,d0 | long align needed? 71 bcc 2f 72 move.w (a1)+,(a0)+ 732: 74#endif /* !MISALIGNED_OK */ 75 76 /* long word transfers */ 77 move.l d1,d0 78 and.l #3,d1 | byte residue 79 lsr.l #3,d0 80 bcc 1f | carry set for 4-byte residue 81 move.l (a1)+,(a0)+ 821: 83 lsr.l #1,d0 | number of 16-byte transfers 84 bcc .Lcopy | carry set for 8-byte residue 85 bra .Lcopy8 86 871: 88 move.l (a1)+,(a0)+ 89 move.l (a1)+,(a0)+ 90.Lcopy8: 91 move.l (a1)+,(a0)+ 92 move.l (a1)+,(a0)+ 93.Lcopy: 94#if !defined (__mcoldfire__) 95 dbra d0,1b 96 sub.l #0x10000,d0 97#else 98 subq.l #1,d0 99#endif 100 bpl 1b 101 bra .Lresidue 102 1031: 104 move.b (a1)+,(a0)+ | move residue bytes 105 106.Lresidue: 107#if !defined (__mcoldfire__) 108 dbra d1,1b | loop until done 109#else 110 subq.l #1,d1 111 bpl 1b 112#endif 113 move.l 4(sp),a0 | return value 114 move.l a0,d0 | in both a0 and d0 115 rts 116 .size SYM(memcpy), . - SYM(memcpy) 117 118#if defined(__linux__) && defined(__ELF__) 119 .section .note.GNU-stack,"",%progbits 120#endif 121