1/* a-memset.s -- memset, optimised for fido asm 2 * 3 * Copyright (c) 2007 mocom software GmbH & Co KG) 4 * 5 * The authors hereby grant permission to use, copy, modify, distribute, 6 * and license this software and its documentation for any purpose, provided 7 * that existing copyright notices are retained in all copies and that this 8 * notice is included verbatim in any distributions. No written agreement, 9 * license, or royalty fee is required for any of the authorized uses. 10 * Modifications to this software may be copyrighted by their authors 11 * and need not follow the licensing terms described here, provided that 12 * the new terms are clearly indicated on the first page of each file where 13 * they apply. 14 */ 15 16#include <picolibc.h> 17 18#include "m68kasm.h" 19 20 .text 21 .align 4 22 23 .globl SYM(memset) 24 .type SYM(memset), @function 25 26| memset, optimised 27| 28| strategy: 29| - no argument testing (the original memcpy from the GNU lib does 30| no checking either) 31| - make sure the destination pointer (the write pointer) is long word 32| aligned. This is the best you can do, because writing to unaligned 33| addresses can be the most costfull thing one could do. 34| - we fill long word wise if possible 35| 36| VG, 2006 37| 38| bugfixes: 39| - distribution of byte value improved - in cases someone gives 40| non-byte value 41| - residue byte transfer was not working 42| 43| VG, April 2007 44| 45SYM(memset): 46 move.l 4(sp),a0 | dest ptr 47 move.l 8(sp),d0 | value 48 move.l 12(sp),d1 | len 49 cmp.l #16,d1 50 blo .Lbset | below, byte fills 51 | 52 move.l d2,-(sp) | need a register 53 move.b d0,d2 | distribute low byte to all byte in word 54 lsl.l #8,d0 55 move.b d2,d0 56 move.w d0,d2 57 swap d0 | rotate 16 58 move.w d2,d0 59 | 60 move.l a0,d2 | copy of src 61 neg.l d2 | 1 2 3 ==> 3 2 1 62 and.l #3,d2 63 beq 2f | is aligned 64 | 65 sub.l d2,d1 | fix length 66 lsr.l #1,d2 | word align needed? 67 bcc 1f 68 move.b d0,(a0)+ | fill byte 691: 70 lsr.l #1,d2 | long align needed? 71 bcc 2f 72 move.w d0,(a0)+ | fill word 732: 74 move.l d1,d2 | number of long transfers (at least 3) 75 lsr.l #2,d2 76 subq.l #1,d2 77 781: 79 move.l d0,(a0)+ | fill long words 80.Llset: 81#if !defined (__mcoldfire__) 82 dbra d2,1b | loop until done 83 sub.l #0x10000,d2 84#else 85 subq.l #1,d2 86#endif 87 bpl 1b 88 and.l #3,d1 | residue byte transfers, fixed 89 move.l (sp)+,d2 | restore d2 90 bra .Lbset 91 921: 93 move.b d0,(a0)+ | fill residue bytes 94.Lbset: 95#if !defined (__mcoldfire__) 96 dbra d1,1b | loop until done 97#else 98 subq.l #1,d1 99 bpl 1b 100#endif 101 move.l 4(sp),a0 | return value 102 move.l a0,d0 | in both a0 and d0 103 rts 104 .size SYM(memset), . - SYM(memset) 105 106#if defined(__linux__) && defined(__ELF__) 107 .section .note.GNU-stack,"",%progbits 108#endif 109