1/* 2 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY 3 * 4 * To anyone who acknowledges that this file is provided "AS IS" 5 * without any express or implied warranty: 6 * permission to use, copy, modify, and distribute this file 7 * for any purpose is hereby granted without fee, provided that 8 * the above copyright notice and this notice appears in all 9 * copies, and that the name of Hewlett-Packard Company not be 10 * used in advertising or publicity pertaining to distribution 11 * of the software without specific, written prior permission. 12 * Hewlett-Packard Company makes no representations about the 13 * suitability of this software for any purpose. 14 */ 15 16/* 17 A faster strcpy. 18 19 by 20 21 Jerry Huck (aligned case) 22 Daryl Odnert (equal-alignment case) 23 Edgar Circenis (non-aligned case) 24*/ 25/* 26 * strcpy(s1, s2) 27 * 28 * Copy string s2 to s1. s1 must be large enough. 29 * return s1 30 */ 31 32#include "DEFS.h" 33 34#define d_addr r26 35#define s_addr r25 36#define tmp6 r24 37#define tmp1 r19 38#define evenside r19 39#define tmp2 r20 40#define oddside r20 41#define tmp3 r21 42#define tmp4 r22 43#define tmp5 arg3 44#define save r1 45 46 47ENTRY(strcpy) 48/* Do some quick alignment checking on and fast path both word aligned */ 49 extru,<> s_addr,31,2,tmp6 /*Is source word aligned? */ 50 ldwm 4(0,s_addr),oddside /*Assume yes and guess that it 51 is double-word aligned. */ 52 dep,= d_addr,29,2,tmp6 /*Is target word aligned? */ 53 b case_analysis 54 copy d_addr,ret0 55/* Both are aligned. First source word already loaded assuming that 56 source was oddword aligned. Fall through (therefore fastest) code 57 shuffles the registers to join the main loop */ 58bothaligned: 59 bb,>= s_addr,29,twoatatime /*Branch if source was odd aligned*/ 60 uxor,nbz oddside,r0,save 61 62/* Even aligned source. save holds that operand. 63 Do one iteration of the main copy loop juggling the registers to avoid 64 one copy. */ 65 b,n nullfound 66 ldwm 4(s_addr),oddside 67 stwm save,4(d_addr) 68 uxor,nbz oddside,r0,save 69 b,n nullfound 70 ldwm 4(s_addr),evenside 71 stwm oddside,4(d_addr) 72 uxor,nbz evenside,r0,save 73 b,n nullfound 74 ldwm 4(s_addr),oddside 75 76/* Main loop body. Entry expects evenside still to be stored, oddside 77 just loaded. */ 78loop: 79 stwm evenside,4(d_addr) 80 uxor,nbz oddside,r0,save 81 82/* mid loop entry */ 83twoatatime: 84 b,n nullfound 85 ldwm 4(s_addr),evenside 86 stwm oddside,4(d_addr) 87 uxor,sbz evenside,r0,save 88 b loop 89 ldwm 4(s_addr),oddside 90 91/* fall through when null found in evenside. oddside actually loaded */ 92nullfound: /* adjust d_addr and store final word */ 93 94 extru,<> save,7,8,r0 /* pick up leftmost byte */ 95 addib,tr,n 1,d_addr,store_final 96 extru,<> save,15,8,r0 97 addib,tr,n 2,d_addr,store_final 98 extru,<> save,23,8,r0 99 addib,tr 3,d_addr,store_final2 100 bv 0(rp) 101 stw save,0(d_addr) 102 103store_final: 104 bv 0(rp) 105store_final2: 106 stbys,e save,0(d_addr) /* delay slot */ 107 108case_analysis: 109 110 blr tmp6,r0 111 nop 112 113 /* NOTE: the delay slots for the non-aligned cases load a */ 114 /* shift quantity which is TGT-SRC into tmp3. */ 115 /* Note also, the case for both strings being word aligned */ 116 /* is already checked before the BLR is executed, so that */ 117 /* case can never occur. */ 118 119 /* TGT SRC */ 120 nop /* 00 00 can't happen */ 121 nop 122 b neg_aligned_copy /* 00 01 */ 123 ldi -1,tmp3 /* load shift quantity. delay slot */ 124 b neg_aligned_copy /* 00 10 */ 125 ldi -2,tmp3 /* load shift quantity. delay slot */ 126 b neg_aligned_copy /* 00 11 */ 127 ldi -3,tmp3 /* load shift quantity. delay slot */ 128 b pos_aligned_copy0 /* 01 00 */ 129 ldi 1,tmp3 /* load shift quantity. delay slot */ 130 b equal_alignment_1 /* 01 01 */ 131 ldbs,ma 1(s_addr),tmp1 132 b neg_aligned_copy /* 01 10 */ 133 ldi -1,tmp3 /* load shift quantity. delay slot */ 134 b neg_aligned_copy /* 01 11 */ 135 ldi -2,tmp3 /* load shift quantity. delay slot */ 136 b pos_aligned_copy0 /* 10 00 */ 137 ldi 2,tmp3 /* load shift quantity. delay slot */ 138 b pos_aligned_copy /* 10 01 */ 139 ldi 1,tmp3 /* load shift quantity. delay slot */ 140 b equal_alignment_2 /* 10 10 */ 141 ldhs,ma 2(s_addr),tmp1 142 b neg_aligned_copy /* 10 11 */ 143 ldi -1,tmp3 /* load shift quantity. delay slot */ 144 b pos_aligned_copy0 /* 11 00 */ 145 ldi 3,tmp3 /* load shift quantity. delay slot */ 146 b pos_aligned_copy /* 11 01 */ 147 ldi 2,tmp3 /* load shift quantity. delay slot */ 148 b pos_aligned_copy /* 11 10 */ 149 ldi 1,tmp3 /* load shift quantity. delay slot */ 150 ldbs,ma 1(s_addr),tmp1 /* 11 11 */ 151 comiclr,<> r0,tmp1,r0 152 bv 0(rp) /* return if 1st byte was null */ 153 stbs,ma tmp1,1(d_addr) /* store a byte to dst string */ 154 b bothaligned /* can now goto word_aligned */ 155 ldwm 4(s_addr),oddside /* load next word of source */ 156 157equal_alignment_1: 158 comiclr,<> r0,tmp1,r0 /* nullify next if tmp1 <> 0 */ 159 bv 0(rp) /* return if null byte found */ 160 stbs,ma tmp1,1(d_addr) /* store a byte to dst string */ 161 ldhs,ma 2(s_addr),tmp1 /* load next halfword */ 162equal_alignment_2: 163 extru,<> tmp1,23,8,tmp6 /* look at left byte of halfword */ 164 bv 0(rp) /* return if 1st byte was null */ 165 stbs,ma tmp6,1(d_addr) 166 extru,<> tmp1,31,8,r0 167 bv 0(rp) /* return if 2nd byte was null */ 168 stbs,ma tmp1,1(d_addr) 169 b bothaligned 170 ldwm 4(s_addr),oddside /* load next word */ 171 172/* source and destination are not aligned, so we do it the hard way. */ 173 174/* target alignment is greater than source alignment */ 175pos_aligned_copy0: 176 addi -4,s_addr,s_addr 177pos_aligned_copy: 178 extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */ 179 extru s_addr,31,2,tmp1 /* Extract low 2 bits of the src addr */ 180 dep r0,31,2,s_addr /* Compute word address of the source. */ 181 sh3add tmp3,r0,tmp4 /* compute shift amt */ 182 ldwm 4(0,s_addr),tmp2 /* get 1st source word */ 183 sh3add tmp1,r0,save /* setup mask shift amount */ 184 mtctl save,r11 /* set-up cr11 for mask */ 185 zvdepi -2,32,save /* create mask */ 186 or save,tmp2,tmp2 /* mask unused bytes in src */ 187 ldi -1,tmp1 /* load tmp1 with 0xffffffff */ 188 mtctl tmp4,r11 /* shift count -> shift count reg */ 189 vshd tmp1,tmp2,tmp3 /* position data ! */ 190 uxor,nbz tmp3,r0,save 191 b,n first_null 192 uxor,nbz tmp2,r0,save 193 b nullfound1 194 mtctl tmp4,r11 /* re-load shift cnt (delay slot) */ 195 b loop_entry 196 ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */ 197 198neg_aligned_copy: 199 extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */ 200 extru s_addr,31,2,tmp2 /* Extract low 2 bits of the src addr */ 201 dep r0,31,2,s_addr /* Compute word address of the source. */ 202 sh3add tmp3,r0,tmp4 /* compute shift amt */ 203 ldwm 4(0,s_addr),tmp1 /* load first word from source. */ 204/* check to see if next word can be read safely */ 205 sh3add tmp2,r0,save 206 mtctl save,r11 /* shift count -> shift count reg */ 207 zvdepi -2,32,save 208 or save, tmp1, tmp1 209 uxor,nbz tmp1,r0,save /* any nulls in first word? */ 210 b first_null0 211 mtctl tmp4,r11 212 ldwm 4(0,s_addr),tmp2 /* load second word from source */ 213 combt,= tmp6,r0,chunk1 /* don't mask if whole word valid */ 214 vshd tmp1,tmp2,tmp3 /* position data ! */ 215 sh3add tmp6,r0,save /* setup r1 */ 216 mtctl save,r11 /* set-up cr11 for mask */ 217 zvdepi -2,32,save 218 or save, tmp3, tmp3 219 uxor,nbz tmp3,r0,save 220 b,n first_null 221 uxor,nbz tmp2,r0,save 222 b nullfound1 223 mtctl tmp4,r11 /* re-load shift cnt (delay slot) */ 224 b loop_entry 225 ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */ 226 227chunk1: 228 uxor,nbz tmp2,r0,save 229 b nullfound0 230 vshd tmp1,tmp2,tmp3 231did_mask: 232 ldwm 4(0,s_addr),tmp1 /* get next word ! */ 233loop_entry: 234 stbys,b,m tmp3,4(0,d_addr) /* store ! */ 235 236 uxor,nbz tmp1, r0, save 237 b nullfound2 238 vshd tmp2,tmp1,tmp3 /* position data ! */ 239 ldwm 4(s_addr),tmp2 240 stwm tmp3,4(d_addr) 241 uxor,sbz tmp2,r0,save 242 b did_mask 243nullfound0: 244 vshd tmp1,tmp2,tmp3 /* delay slot */ 245 uxor,nbz tmp3,r0,save 246 b,n nullfound 247nullfound1: 248 stbys,b,m tmp3,4(0,d_addr) 249 b nullfound 250 vshd tmp2,r0,save /* delay slot */ 251 252nullfound2: 253 uxor,nbz tmp3,r0,save 254 b,n nullfound 255 stwm tmp3,4(d_addr) 256 b nullfound 257 /* notice that delay slot is in next routine */ 258 259first_null0: /* null found in first word of non-aligned (wrt d_addr) */ 260 vshd tmp1,r0,save /* delay slot */ 261 combt,= tmp6,r0,check4 262 extru save,7,8,tmp4 263first_null: 264 addibt,= -1,tmp6,check3 /* check last 3 bytes of word */ 265 extru save,15,8,tmp4 266 addibt,=,n -1,tmp6,check2 /* check last 2 bytes */ 267 bv 0(rp) /* null in last byte--store and exit */ 268 stbys,b save, 0(d_addr) 269 270check4: 271 combt,= tmp4,r0,done 272 stbs,ma tmp4,1(d_addr) 273 extru,<> save,15,8,tmp4 274check3: 275 combt,= tmp4,r0,done 276 stbs,ma tmp4,1(d_addr) 277check2: 278 extru,<> save,23,8,tmp4 279 bv 0(rp) 280 stbs,ma tmp4,1(d_addr) 281 bv 0(rp) 282 stbs r0,0(d_addr) 283 284done: 285EXIT(strcpy) 286