1! 2! Fast SH memset 3! 4! by Toshiyasu Morita (tm@netcom.com) 5! 6! SH5 code by J"orn Rennecke (joern.rennecke@superh.com) 7/* Copyright 2002 SuperH Ltd. */ 8! 9 10#include "asm.h" 11 12ENTRY(memset) 13#if __SHMEDIA__ 14 pta/l multiquad, tr0 15 ptabs r18, tr2 16 17 andi r2, -8, r25 18 add r2, r4, r5 19 addi r5, -1, r20 // calculate end address. 20 andi r20, -8, r20 21 cmveq r4, r25, r20 22 bne/u r25, r20, tr0 // multiquad 23 24! This sequence could clobber volatile objects that are in the same 25! quadword as a very short char array. 26! ldlo.q r2, 0, r7 27! shlli r4, 2, r4 28! movi -1, r8 29! SHHI r8, r4, r8 30! SHHI r8, r4, r8 31! mcmv r7, r8, r3 32! stlo.q r2, 0, r3 33 34 pta/l setlongs, tr0 35 movi 4, r8 36 bgeu/u r4, r8, tr0 37 pta/l endset, tr0 38 beqi/u r4, 0, tr0 39 st.b r2, 0, r3 40 beqi/u r4, 1, tr0 41 nop 42 st.b r2, 1, r3 43 beqi/l r4, 2, tr0 44 st.b r2,2,r3 45endset: blink tr2, r63 46setlongs: 47 mshflo.b r3, r3, r3 48 mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 49 stlo.l r2, 0, r3 50 nop 51 nop 52 sthi.l r5, -1, r3 53 blink tr2, r63 54 55multiquad: 56 mshflo.b r3, r3, r3 57 mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 58 pta/l lastquad, tr0 59 stlo.q r2, 0, r3 60 sub r20, r25, r24 61 movi 64, r9 62 beqi/u r24, 8, tr0 // lastquad 63 pta/l loop, tr1 64 addi r20, -7*8, r8 // loop end address; This might overflow, so we need 65 // to use a different test before we start the loop 66 bgeu/u r24, r9, tr1// loop 67 st.q r25, 8, r3 68 shlri r24, 4, r24 69 st.q r20, -8, r3 70 beqi/u r24, 1, tr0 // lastquad 71 st.q r25, 16, r3 72 st.q r20, -16, r3 73 beqi/u r24, 2, tr0 // lastquad 74 st.q r25, 24, r3 75 st.q r20, -24, r3 76lastquad: 77 sthi.q r5, -1, r3 78 blink tr2,r63 79 80loop: 81 alloco r25, 32 82 st.q r25, 8, r3 83 st.q r25, 16, r3 84 st.q r25, 24, r3 85 st.q r25, 32, r3 86 addi r25, 32, r25 87 bgeu/l r8, r25, tr1 // loop 88 89 st.q r20, -40, r3 90 st.q r20, -32, r3 91 st.q r20, -24, r3 92 st.q r20, -16, r3 93 st.q r20, -8, r3 94 sthi.q r5, -1, r3 95 blink tr2,r63 96#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */ 97! Entry: r4: destination pointer 98! r5: fill value 99! r6: byte count 100! 101! Exit: r0-r3: trashed 102! 103 104! This assumes that the first four bytes of the address space (0..3) are 105! reserved - usually by the linker script. Otherwise, we would had to check 106! for the case of objects of the size 12..15 at address 0..3 . 107 108#ifdef __SH5__ 109#define DST r2 110#define VAL r3 111#define CNT r4 112#define TMP r5 113#else 114#define DST r4 115#define VAL r5 116#define CNT r6 117#define TMP r2 118#endif 119 120 mov #12,r0 ! Check for small number of bytes 121 cmp/gt CNT,r0 122 mov DST,r0 123 SL(bt, L_store_byte_loop_check0, add DST,CNT) 124 125 tst #3,r0 ! Align destination 126 SL(bt, L_dup_bytes, extu.b r5,r5) 127 .balignw 4,0x0009 128L_align_loop: 129 mov.b VAL,@r0 130 add #1,r0 131 tst #3,r0 132 bf L_align_loop 133 134L_dup_bytes: 135 swap.b VAL,TMP ! Duplicate bytes across longword 136 or TMP,VAL 137 swap.w VAL,TMP 138 or TMP,VAL 139 140 add #-16,CNT 141 142 .balignw 4,0x0009 143L_store_long_loop: 144 mov.l VAL,@r0 ! Store double longs to memory 145 cmp/hs CNT,r0 146 mov.l VAL,@(4,r0) 147 SL(bf, L_store_long_loop, add #8,r0) 148 149 add #16,CNT 150 151L_store_byte_loop_check0: 152 cmp/eq CNT,r0 153 bt L_exit 154 .balignw 4,0x0009 155L_store_byte_loop: 156 mov.b VAL,@r0 ! Store bytes to memory 157 add #1,r0 158 cmp/eq CNT,r0 159 bf L_store_byte_loop 160 161L_exit: 162 rts 163 mov r4,r0 164#endif /* ! SHMEDIA */ 165