1/* a-memset.s -- memset, optimised for fido asm 2 * 3 * Copyright (c) 2007 mocom software GmbH & Co KG) 4 * 5 * The authors hereby grant permission to use, copy, modify, distribute, 6 * and license this software and its documentation for any purpose, provided 7 * that existing copyright notices are retained in all copies and that this 8 * notice is included verbatim in any distributions. No written agreement, 9 * license, or royalty fee is required for any of the authorized uses. 10 * Modifications to this software may be copyrighted by their authors 11 * and need not follow the licensing terms described here, provided that 12 * the new terms are clearly indicated on the first page of each file where 13 * they apply. 14 */ 15 16#include "m68kasm.h" 17 18 .text 19 .align 4 20 21 .globl SYM(memset) 22 .type SYM(memset), @function 23 24| memset, optimised 25| 26| strategy: 27| - no argument testing (the original memcpy from the GNU lib does 28| no checking either) 29| - make sure the destination pointer (the write pointer) is long word 30| aligned. This is the best you can do, because writing to unaligned 31| addresses can be the most costfull thing one could do. 32| - we fill long word wise if possible 33| 34| VG, 2006 35| 36| bugfixes: 37| - distribution of byte value improved - in cases someone gives 38| non-byte value 39| - residue byte transfer was not working 40| 41| VG, April 2007 42| 43SYM(memset): 44 move.l 4(sp),a0 | dest ptr 45 move.l 8(sp),d0 | value 46 move.l 12(sp),d1 | len 47 cmp.l #16,d1 48 blo .Lbset | below, byte fills 49 | 50 move.l d2,-(sp) | need a register 51 move.b d0,d2 | distribute low byte to all byte in word 52 lsl.l #8,d0 53 move.b d2,d0 54 move.w d0,d2 55 swap d0 | rotate 16 56 move.w d2,d0 57 | 58 move.l a0,d2 | copy of src 59 neg.l d2 | 1 2 3 ==> 3 2 1 60 and.l #3,d2 61 beq 2f | is aligned 62 | 63 sub.l d2,d1 | fix length 64 lsr.l #1,d2 | word align needed? 65 bcc 1f 66 move.b d0,(a0)+ | fill byte 671: 68 lsr.l #1,d2 | long align needed? 69 bcc 2f 70 move.w d0,(a0)+ | fill word 712: 72 move.l d1,d2 | number of long transfers (at least 3) 73 lsr.l #2,d2 74 subq.l #1,d2 75 761: 77 move.l d0,(a0)+ | fill long words 78.Llset: 79#if !defined (__mcoldfire__) 80 dbra d2,1b | loop until done 81 sub.l #0x10000,d2 82#else 83 subq.l #1,d2 84#endif 85 bpl 1b 86 and.l #3,d1 | residue byte transfers, fixed 87 move.l (sp)+,d2 | restore d2 88 bra .Lbset 89 901: 91 move.b d0,(a0)+ | fill residue bytes 92.Lbset: 93#if !defined (__mcoldfire__) 94 dbra d1,1b | loop until done 95#else 96 subq.l #1,d1 97 bpl 1b 98#endif 99 move.l 4(sp),d0 | return value 100 rts 101