1#include <picolibc.h> 2 3! 4! Fast SH memset 5! 6! by Toshiyasu Morita (tm@netcom.com) 7! 8! SH5 code by J"orn Rennecke (joern.rennecke@superh.com) 9/* Copyright 2002 SuperH Ltd. */ 10! 11 12#include "asm.h" 13 14ENTRY(memset) 15#if __SHMEDIA__ 16 pta/l multiquad, tr0 17 ptabs r18, tr2 18 19 andi r2, -8, r25 20 add r2, r4, r5 21 addi r5, -1, r20 // calculate end address. 22 andi r20, -8, r20 23 cmveq r4, r25, r20 24 bne/u r25, r20, tr0 // multiquad 25 26! This sequence could clobber volatile objects that are in the same 27! quadword as a very short char array. 28! ldlo.q r2, 0, r7 29! shlli r4, 2, r4 30! movi -1, r8 31! SHHI r8, r4, r8 32! SHHI r8, r4, r8 33! mcmv r7, r8, r3 34! stlo.q r2, 0, r3 35 36 pta/l setlongs, tr0 37 movi 4, r8 38 bgeu/u r4, r8, tr0 39 pta/l endset, tr0 40 beqi/u r4, 0, tr0 41 st.b r2, 0, r3 42 beqi/u r4, 1, tr0 43 nop 44 st.b r2, 1, r3 45 beqi/l r4, 2, tr0 46 st.b r2,2,r3 47endset: blink tr2, r63 48setlongs: 49 mshflo.b r3, r3, r3 50 mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 51 stlo.l r2, 0, r3 52 nop 53 nop 54 sthi.l r5, -1, r3 55 blink tr2, r63 56 57multiquad: 58 mshflo.b r3, r3, r3 59 mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 60 pta/l lastquad, tr0 61 stlo.q r2, 0, r3 62 sub r20, r25, r24 63 movi 64, r9 64 beqi/u r24, 8, tr0 // lastquad 65 pta/l loop, tr1 66 addi r20, -7*8, r8 // loop end address; This might overflow, so we need 67 // to use a different test before we start the loop 68 bgeu/u r24, r9, tr1// loop 69 st.q r25, 8, r3 70 shlri r24, 4, r24 71 st.q r20, -8, r3 72 beqi/u r24, 1, tr0 // lastquad 73 st.q r25, 16, r3 74 st.q r20, -16, r3 75 beqi/u r24, 2, tr0 // lastquad 76 st.q r25, 24, r3 77 st.q r20, -24, r3 78lastquad: 79 sthi.q r5, -1, r3 80 blink tr2,r63 81 82loop: 83 alloco r25, 32 84 st.q r25, 8, r3 85 st.q r25, 16, r3 86 st.q r25, 24, r3 87 st.q r25, 32, r3 88 addi r25, 32, r25 89 bgeu/l r8, r25, tr1 // loop 90 91 st.q r20, -40, r3 92 st.q r20, -32, r3 93 st.q r20, -24, r3 94 st.q r20, -16, r3 95 st.q r20, -8, r3 96 sthi.q r5, -1, r3 97 blink tr2,r63 98#else /* ! SHMEDIA, i.e. SH1 .. SH4 / SHcompact */ 99! Entry: r4: destination pointer 100! r5: fill value 101! r6: byte count 102! 103! Exit: r0-r3: trashed 104! 105 106! This assumes that the first four bytes of the address space (0..3) are 107! reserved - usually by the linker script. Otherwise, we would had to check 108! for the case of objects of the size 12..15 at address 0..3 . 109 110#ifdef __SH5__ 111#define DST r2 112#define VAL r3 113#define CNT r4 114#define TMP r5 115#else 116#define DST r4 117#define VAL r5 118#define CNT r6 119#define TMP r2 120#endif 121 122 mov #12,r0 ! Check for small number of bytes 123 cmp/gt CNT,r0 124 mov DST,r0 125 SL(bt, L_store_byte_loop_check0, add DST,CNT) 126 127 tst #3,r0 ! Align destination 128 SL(bt, L_dup_bytes, extu.b r5,r5) 129 .balignw 4,0x0009 130L_align_loop: 131 mov.b VAL,@r0 132 add #1,r0 133 tst #3,r0 134 bf L_align_loop 135 136L_dup_bytes: 137 swap.b VAL,TMP ! Duplicate bytes across longword 138 or TMP,VAL 139 swap.w VAL,TMP 140 or TMP,VAL 141 142 add #-16,CNT 143 144 .balignw 4,0x0009 145L_store_long_loop: 146 mov.l VAL,@r0 ! Store double longs to memory 147 cmp/hs CNT,r0 148 mov.l VAL,@(4,r0) 149 SL(bf, L_store_long_loop, add #8,r0) 150 151 add #16,CNT 152 153L_store_byte_loop_check0: 154 cmp/eq CNT,r0 155 bt L_exit 156 .balignw 4,0x0009 157L_store_byte_loop: 158 mov.b VAL,@r0 ! Store bytes to memory 159 add #1,r0 160 cmp/eq CNT,r0 161 bf L_store_byte_loop 162 163L_exit: 164 rts 165 mov r4,r0 166#endif /* ! SHMEDIA */ 167