1/* 2 * (c) Copyright 1986 HEWLETT-PACKARD COMPANY 3 * 4 * To anyone who acknowledges that this file is provided "AS IS" 5 * without any express or implied warranty: 6 * permission to use, copy, modify, and distribute this file 7 * for any purpose is hereby granted without fee, provided that 8 * the above copyright notice and this notice appears in all 9 * copies, and that the name of Hewlett-Packard Company not be 10 * used in advertising or publicity pertaining to distribution 11 * of the software without specific, written prior permission. 12 * Hewlett-Packard Company makes no representations about the 13 * suitability of this software for any purpose. 14 */ 15 16/* 17 A faster strcpy. 18 19 by 20 21 Jerry Huck (aligned case) 22 Daryl Odnert (equal-alignment case) 23 Edgar Circenis (non-aligned case) 24*/ 25/* 26 * strcpy(s1, s2) 27 * 28 * Copy string s2 to s1. s1 must be large enough. 29 * return s1 30 */ 31 32#include <picolibc.h> 33 34#include "DEFS.h" 35 36#define d_addr r26 37#define s_addr r25 38#define tmp6 r24 39#define tmp1 r19 40#define evenside r19 41#define tmp2 r20 42#define oddside r20 43#define tmp3 r21 44#define tmp4 r22 45#define tmp5 arg3 46#define save r1 47 48 49ENTRY(strcpy) 50/* Do some quick alignment checking on and fast path both word aligned */ 51 extru,<> s_addr,31,2,tmp6 /*Is source word aligned? */ 52 ldwm 4(0,s_addr),oddside /*Assume yes and guess that it 53 is double-word aligned. */ 54 dep,= d_addr,29,2,tmp6 /*Is target word aligned? */ 55 b case_analysis 56 copy d_addr,ret0 57/* Both are aligned. First source word already loaded assuming that 58 source was oddword aligned. Fall through (therefore fastest) code 59 shuffles the registers to join the main loop */ 60bothaligned: 61 bb,>= s_addr,29,twoatatime /*Branch if source was odd aligned*/ 62 uxor,nbz oddside,r0,save 63 64/* Even aligned source. save holds that operand. 65 Do one iteration of the main copy loop juggling the registers to avoid 66 one copy. */ 67 b,n nullfound 68 ldwm 4(s_addr),oddside 69 stwm save,4(d_addr) 70 uxor,nbz oddside,r0,save 71 b,n nullfound 72 ldwm 4(s_addr),evenside 73 stwm oddside,4(d_addr) 74 uxor,nbz evenside,r0,save 75 b,n nullfound 76 ldwm 4(s_addr),oddside 77 78/* Main loop body. Entry expects evenside still to be stored, oddside 79 just loaded. */ 80loop: 81 stwm evenside,4(d_addr) 82 uxor,nbz oddside,r0,save 83 84/* mid loop entry */ 85twoatatime: 86 b,n nullfound 87 ldwm 4(s_addr),evenside 88 stwm oddside,4(d_addr) 89 uxor,sbz evenside,r0,save 90 b loop 91 ldwm 4(s_addr),oddside 92 93/* fall through when null found in evenside. oddside actually loaded */ 94nullfound: /* adjust d_addr and store final word */ 95 96 extru,<> save,7,8,r0 /* pick up leftmost byte */ 97 addib,tr,n 1,d_addr,store_final 98 extru,<> save,15,8,r0 99 addib,tr,n 2,d_addr,store_final 100 extru,<> save,23,8,r0 101 addib,tr 3,d_addr,store_final2 102 bv 0(rp) 103 stw save,0(d_addr) 104 105store_final: 106 bv 0(rp) 107store_final2: 108 stbys,e save,0(d_addr) /* delay slot */ 109 110case_analysis: 111 112 blr tmp6,r0 113 nop 114 115 /* NOTE: the delay slots for the non-aligned cases load a */ 116 /* shift quantity which is TGT-SRC into tmp3. */ 117 /* Note also, the case for both strings being word aligned */ 118 /* is already checked before the BLR is executed, so that */ 119 /* case can never occur. */ 120 121 /* TGT SRC */ 122 nop /* 00 00 can't happen */ 123 nop 124 b neg_aligned_copy /* 00 01 */ 125 ldi -1,tmp3 /* load shift quantity. delay slot */ 126 b neg_aligned_copy /* 00 10 */ 127 ldi -2,tmp3 /* load shift quantity. delay slot */ 128 b neg_aligned_copy /* 00 11 */ 129 ldi -3,tmp3 /* load shift quantity. delay slot */ 130 b pos_aligned_copy0 /* 01 00 */ 131 ldi 1,tmp3 /* load shift quantity. delay slot */ 132 b equal_alignment_1 /* 01 01 */ 133 ldbs,ma 1(s_addr),tmp1 134 b neg_aligned_copy /* 01 10 */ 135 ldi -1,tmp3 /* load shift quantity. delay slot */ 136 b neg_aligned_copy /* 01 11 */ 137 ldi -2,tmp3 /* load shift quantity. delay slot */ 138 b pos_aligned_copy0 /* 10 00 */ 139 ldi 2,tmp3 /* load shift quantity. delay slot */ 140 b pos_aligned_copy /* 10 01 */ 141 ldi 1,tmp3 /* load shift quantity. delay slot */ 142 b equal_alignment_2 /* 10 10 */ 143 ldhs,ma 2(s_addr),tmp1 144 b neg_aligned_copy /* 10 11 */ 145 ldi -1,tmp3 /* load shift quantity. delay slot */ 146 b pos_aligned_copy0 /* 11 00 */ 147 ldi 3,tmp3 /* load shift quantity. delay slot */ 148 b pos_aligned_copy /* 11 01 */ 149 ldi 2,tmp3 /* load shift quantity. delay slot */ 150 b pos_aligned_copy /* 11 10 */ 151 ldi 1,tmp3 /* load shift quantity. delay slot */ 152 ldbs,ma 1(s_addr),tmp1 /* 11 11 */ 153 comiclr,<> r0,tmp1,r0 154 bv 0(rp) /* return if 1st byte was null */ 155 stbs,ma tmp1,1(d_addr) /* store a byte to dst string */ 156 b bothaligned /* can now goto word_aligned */ 157 ldwm 4(s_addr),oddside /* load next word of source */ 158 159equal_alignment_1: 160 comiclr,<> r0,tmp1,r0 /* nullify next if tmp1 <> 0 */ 161 bv 0(rp) /* return if null byte found */ 162 stbs,ma tmp1,1(d_addr) /* store a byte to dst string */ 163 ldhs,ma 2(s_addr),tmp1 /* load next halfword */ 164equal_alignment_2: 165 extru,<> tmp1,23,8,tmp6 /* look at left byte of halfword */ 166 bv 0(rp) /* return if 1st byte was null */ 167 stbs,ma tmp6,1(d_addr) 168 extru,<> tmp1,31,8,r0 169 bv 0(rp) /* return if 2nd byte was null */ 170 stbs,ma tmp1,1(d_addr) 171 b bothaligned 172 ldwm 4(s_addr),oddside /* load next word */ 173 174/* source and destination are not aligned, so we do it the hard way. */ 175 176/* target alignment is greater than source alignment */ 177pos_aligned_copy0: 178 addi -4,s_addr,s_addr 179pos_aligned_copy: 180 extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */ 181 extru s_addr,31,2,tmp1 /* Extract low 2 bits of the src addr */ 182 dep r0,31,2,s_addr /* Compute word address of the source. */ 183 sh3add tmp3,r0,tmp4 /* compute shift amt */ 184 ldwm 4(0,s_addr),tmp2 /* get 1st source word */ 185 sh3add tmp1,r0,save /* setup mask shift amount */ 186 mtctl save,r11 /* set-up cr11 for mask */ 187 zvdepi -2,32,save /* create mask */ 188 or save,tmp2,tmp2 /* mask unused bytes in src */ 189 ldi -1,tmp1 /* load tmp1 with 0xffffffff */ 190 mtctl tmp4,r11 /* shift count -> shift count reg */ 191 vshd tmp1,tmp2,tmp3 /* position data ! */ 192 uxor,nbz tmp3,r0,save 193 b,n first_null 194 uxor,nbz tmp2,r0,save 195 b nullfound1 196 mtctl tmp4,r11 /* re-load shift cnt (delay slot) */ 197 b loop_entry 198 ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */ 199 200neg_aligned_copy: 201 extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */ 202 extru s_addr,31,2,tmp2 /* Extract low 2 bits of the src addr */ 203 dep r0,31,2,s_addr /* Compute word address of the source. */ 204 sh3add tmp3,r0,tmp4 /* compute shift amt */ 205 ldwm 4(0,s_addr),tmp1 /* load first word from source. */ 206/* check to see if next word can be read safely */ 207 sh3add tmp2,r0,save 208 mtctl save,r11 /* shift count -> shift count reg */ 209 zvdepi -2,32,save 210 or save, tmp1, tmp1 211 uxor,nbz tmp1,r0,save /* any nulls in first word? */ 212 b first_null0 213 mtctl tmp4,r11 214 ldwm 4(0,s_addr),tmp2 /* load second word from source */ 215 combt,= tmp6,r0,chunk1 /* don't mask if whole word valid */ 216 vshd tmp1,tmp2,tmp3 /* position data ! */ 217 sh3add tmp6,r0,save /* setup r1 */ 218 mtctl save,r11 /* set-up cr11 for mask */ 219 zvdepi -2,32,save 220 or save, tmp3, tmp3 221 uxor,nbz tmp3,r0,save 222 b,n first_null 223 uxor,nbz tmp2,r0,save 224 b nullfound1 225 mtctl tmp4,r11 /* re-load shift cnt (delay slot) */ 226 b loop_entry 227 ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */ 228 229chunk1: 230 uxor,nbz tmp2,r0,save 231 b nullfound0 232 vshd tmp1,tmp2,tmp3 233did_mask: 234 ldwm 4(0,s_addr),tmp1 /* get next word ! */ 235loop_entry: 236 stbys,b,m tmp3,4(0,d_addr) /* store ! */ 237 238 uxor,nbz tmp1, r0, save 239 b nullfound2 240 vshd tmp2,tmp1,tmp3 /* position data ! */ 241 ldwm 4(s_addr),tmp2 242 stwm tmp3,4(d_addr) 243 uxor,sbz tmp2,r0,save 244 b did_mask 245nullfound0: 246 vshd tmp1,tmp2,tmp3 /* delay slot */ 247 uxor,nbz tmp3,r0,save 248 b,n nullfound 249nullfound1: 250 stbys,b,m tmp3,4(0,d_addr) 251 b nullfound 252 vshd tmp2,r0,save /* delay slot */ 253 254nullfound2: 255 uxor,nbz tmp3,r0,save 256 b,n nullfound 257 stwm tmp3,4(d_addr) 258 b nullfound 259 /* notice that delay slot is in next routine */ 260 261first_null0: /* null found in first word of non-aligned (wrt d_addr) */ 262 vshd tmp1,r0,save /* delay slot */ 263 combt,= tmp6,r0,check4 264 extru save,7,8,tmp4 265first_null: 266 addibt,= -1,tmp6,check3 /* check last 3 bytes of word */ 267 extru save,15,8,tmp4 268 addibt,=,n -1,tmp6,check2 /* check last 2 bytes */ 269 bv 0(rp) /* null in last byte--store and exit */ 270 stbys,b save, 0(d_addr) 271 272check4: 273 combt,= tmp4,r0,done 274 stbs,ma tmp4,1(d_addr) 275 extru,<> save,15,8,tmp4 276check3: 277 combt,= tmp4,r0,done 278 stbs,ma tmp4,1(d_addr) 279check2: 280 extru,<> save,23,8,tmp4 281 bv 0(rp) 282 stbs,ma tmp4,1(d_addr) 283 bv 0(rp) 284 stbs r0,0(d_addr) 285 286done: 287EXIT(strcpy) 288