1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31 .file "memcp_ca.s" 32#ifdef __PIC 33 .pic 34#endif 35#ifdef __PID 36 .pid 37#endif 38/* 39 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved 40 */ 41 42/* 43 procedure memmove (optimized assembler version for the CA) 44 procedure memcpy (optimized assembler version for the CA) 45 46 dest_addr = memmove (dest_addr, src_addr, len) 47 dest_addr = memcpy (dest_addr, src_addr, len) 48 49 copy len bytes pointed to by src_addr to the space pointed to by 50 dest_addr. Return the original dest_addr. 51 52 Memcpy will fail if the source and destination string overlap 53 (in particular, if the end of the source is overlapped by the 54 beginning of the destination). The behavior is undefined. 55 This is acceptable according to the draft C standard. 56 Memmove will not fail if overlap exists. 57 58 Undefined behavior will also occur if the end of the source string 59 (i.e. the terminating null byte) is in the last word of the program's 60 allocated memory space. This is so because, in several cases, the 61 routine will fetch ahead one word. Disallowing the fetch ahead would 62 impose a severe performance penalty. 63 64 This program handles five cases: 65 66 1) both arguments start on a word boundary 67 2) neither are word aligned, but they are offset by the same amount 68 3) source is word aligned, destination is not 69 4) destination is word aligned, source is not 70 5) neither is word aligned, and they are offset by differing amounts 71 72 At the time of this writing, only g0 thru g7 and g13 are available 73 for use in this leafproc; other registers would have to be saved and 74 restored. These nine registers, plus tricky use of g14 are sufficient 75 to implement the routine. The registers are used as follows: 76 77 g0 dest ptr; not modified, so that it may be returned 78 g1 src ptr; shift count 79 g2 len 80 g3 src ptr (word aligned) 81 g4 dest ptr (word aligned) 82 g5 -4 for Lbackwards move 83 Little endian 84 g6 lsw of double word for extraction of 4 bytes 85 g7 msw of double word for extraction of 4 bytes 86 Big endian 87 g6 msw of double word for extraction of 4 bytes 88 g7 lsw of double word for extraction of 4 bytes 89 g13 return address 90 g14 byte extracted. 91*/ 92 93#if __i960_BIG_ENDIAN__ 94#define MSW g6 95#define LSW g7 96#else 97#define LSW g6 98#define MSW g7 99#endif 100 101 .globl _memmove, _memcpy 102 .globl __memmove, __memcpy 103 .leafproc _memmove, __memmove 104 .leafproc _memcpy, __memcpy 105 .align 2 106_memcpy: 107_memmove: 108#ifndef __PIC 109 lda Lrett,g14 110#else 111 lda Lrett-(.+8)(ip),g14 112#endif 113__memcpy: 114__memmove: 115 cmpibge.f 0,g2,Lquick_exit # Lexit if number of bytes to move is <= zero. 116 cmpo g0,g1 # if dest starts earlier than src ... 117 lda (g14),g13 # preserve return address 118 addo g2,g1,g5 # compute addr of byte after last byte of src 119 be.f Lexit_code # no move necessary if src and dest are same 120 concmpo g5,g0 # ... or if dest starts after end of src ... 121 notand g1,3,g3 # extract word addr of start of src 122 bg.f Lbackwards # ... then drop thru, else do move backwards 123 cmpo g3,g1 # check alignment of src 124 ld (g3),LSW # fetch word containing at least first byte 125 notand g0,3,g4 # extract word addr of start of dest 126 lda 4(g3),g3 # advance src word addr 127 bne.f Lcase_245 # branch if src is NOT word aligned 128 129Lcase_13: 130 cmpo g0,g4 # check alignment of dest 131 subo 4,g4,g4 # store is pre-incrementing; back up dest addr 132 be.t Lcase_1 # branch if dest word aligned 133 134Lcase_3: # src is word aligned; dest is not 135 addo 8,g4,g4 # move dest word ptr to first word boundary 136 lda (g0),g1 # copy dest byte ptr 137 mov LSW,MSW # make copy of first word of src 138 lda 32,g14 # initialize shift count to zero (mod 32) 139 140Lcase_25: 141Lcase_3_cloop_at_start: # character copying loop for start of dest str 142 cmpdeci 0,g2,g2 # is max_bytes exhausted? 143 be.f Lexit_code # Lexit if max_bytes is exhausted 144#if __i960_BIG_ENDIAN__ 145 rotate 8,MSW,MSW # move next byte into position for extraction 146 subo 8,g14,g14 # augment the shift counter 147 stob MSW,(g1) # store the byte in dest 148#else 149 addo 8,g14,g14 # augment the shift counter 150 stob MSW,(g1) # store the byte in dest 151 shro 8,MSW,MSW # move next byte into position for extraction 152#endif 153 lda 1(g1),g1 # post-increment dest ptr 154 cmpobne.t g1,g4,Lcase_3_cloop_at_start # branch if reached word boundary 155 156 ld (g3),MSW # fetch msw of operand for double shift 157 158Lcase_4: 159Lcase_3_wloop: 160 cmpi g2,4 # less than four bytes to move? 161 lda 4(g3),g3 # post-increment src word addr 162 eshro g14,g6,g1 # extract 4 bytes of src 163 bl.f Lcase_3_cloop # branch if < four bytes left to move 164 mov MSW,LSW # move msw to lsw 165 ld (g3),MSW # pre-fetch msw of operand for double shift 166 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 167 st g1,(g4) # store 4 bytes to dest 168 addo 4,g4,g4 # post-increment dest ptr 169 b Lcase_3_wloop 170 171Lcase_1_wloop: # word copying loop 172 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 173 ld (g3),LSW # pre-fetch next word of src 174 addo 4,g3,g3 # post-increment src addr 175 st g1,(g4) # store word in dest string 176Lcase_1: # src and dest are word aligned 177 cmpi g2,4 # check for fewer than four bytes to move 178 addo 4,g4,g4 # pre-increment dest addr 179 lda (LSW),g1 # keep a copy of the src word 180 bge.t Lcase_1_wloop # branch if at least four bytes to copy 181Lcase_3_cloop: 182 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted 183 184Lcase_1_cloop: 185#if __i960_BIG_ENDIAN__ 186 rotate 8,g1,g1 # move next byte into position for extraction 187#endif 188 subi 1,g2,g2 189 stob g1,(g4) # store the byte in dest 190 cmpi 0,g2 191 lda 1(g4),g4 # post-increment dest byte addr 192#if ! __i960_BIG_ENDIAN__ 193 shro 8,g1,g1 # move next byte into position for extraction 194#endif 195 bne.t Lcase_1_cloop # Lexit if max_bytes is exhausted 196 197Lexit_code: 198 mov 0,g14 # conform to register conventions 199 bx (g13) # g0 = addr of dest; g14 = 0 200Lrett: 201 ret 202 203 204Lcase_245: 205 cmpo g0,g4 # check alignment of dest 206 ld (g3),MSW # pre-fetch second half 207 and 3,g1,g1 # compute shift count 208 shlo 3,g1,g14 209#if __i960_BIG_ENDIAN__ 210 subo g14,0,g14 # adjust shift count for big endian 211#endif 212 be.t Lcase_4 # branch if dest is word aligned 213 or g4,g1,g1 # is src earlier in word, later, or sync w/ dst 214 cmpo g0,g1 # < indicates first word of dest has more bytes 215 lda 4(g4),g4 # move dest word addr to first word boundary 216 eshro g14,g6,g5 # extract four bytes 217 lda (g0),g1 218#if __i960_BIG_ENDIAN__ 219 bge.f 1f 220#else 221 bg.f 1f 222#endif 223 mov MSW,LSW 224 lda 4(g3),g3 # move src word addr to second word boundary 2251: 226 mov g5,MSW 227 b Lcase_25 228 229 230Lbackwards: 231 notand g5,3,MSW # extract word addr of byte after end of src 232 cmpo MSW,g5 # check alignment of end of src 233 subo 4,MSW,g3 # retreat src word addr 234 addo g2,g0,g1 # compute addr of byte after end of dest 235 notand g1,3,g4 # extract word addr of start of dest 236 bne.f Lcase.245 # branch if src is NOT word aligned 237 238Lcase.13: 239 cmpo g1,g4 # check alignment of dest 240 ld (g3),MSW # fetch last word of src 241 subo 4,g3,g3 # retreat src word addr 242 be.t Lcase.1 # branch if dest word aligned 243 244Lcase.3: # src is word aligned; dest is not 245 mov MSW,LSW # make copy of first word of src 246 lda 32,g14 # initialize shift count to zero (mod 32) 247 248Lcase.25: 249Lcase.3_cloop_at_start: # character copying loop for start of dest str 250 cmpdeci 0,g2,g2 # is max.bytes exhausted? 251 be.f Lexit_code # Lexit if max_bytes is exhausted 252#if ! __i960_BIG_ENDIAN__ 253 rotate 8,LSW,LSW # move next byte into position for storing 254#endif 255 lda -1(g1),g1 # pre-decrement dest ptr 256 cmpo g1,g4 # have we reached word boundary in dest yet? 257 stob LSW,(g1) # store the byte in dest 258#if __i960_BIG_ENDIAN__ 259 shro 8,LSW,LSW # move next byte into position for storing 260 addo 8,g14,g14 # augment the shift counter 261#else 262 subo 8,g14,g14 # augment the shift counter 263#endif 264 bne.t Lcase.3_cloop_at_start # branch if reached word boundary? 265 266 ld (g3),LSW # fetch lsw of operand for double shift 267 268#if __i960_BIG_ENDIAN__ 269 cmpobne 0,g14,Lcase.3_wloop 270Lcase.3_wloop2: 271 cmpi g2,4 # less than four bytes to move? 272 lda -4(g3),g3 # post-decrement src word addr 273 mov MSW,g1 # extract 4 bytes of src 274 lda (LSW),MSW # move lsw to msw 275 subo 4,g4,g4 # pre-decrement dest ptr 276 bl.f Lcase.3_cloop # branch if < four bytes left to move 277 ld (g3),LSW # pre-fetch lsw of operand for double shift 278 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 279 st g1,(g4) # store 4 bytes to dest 280 b Lcase.3_wloop2 281#endif 282 283Lcase.4: 284Lcase.3_wloop: 285 cmpi g2,4 # less than four bytes to move? 286 lda -4(g3),g3 # post-decrement src word addr 287 eshro g14,g6,g1 # extract 4 bytes of src 288 lda (LSW),MSW # move lsw to msw 289 subo 4,g4,g4 # pre-decrement dest ptr 290 bl.f Lcase.3_cloop # branch if < four bytes left to move 291 ld (g3),LSW # pre-fetch lsw of operand for double shift 292 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 293 st g1,(g4) # store 4 bytes to dest 294 b Lcase.3_wloop 295 296Lcase.1_wloop: # word copying loop 297 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 298 ld (g3),MSW # pre-fetch next word of src 299 subo 4,g3,g3 # post-decrement src addr 300 st g1,(g4) # store word in dest string 301Lcase.1: # src and dest are word aligned 302 cmpi g2,4 # check for fewer than four bytes to move 303 subo 4,g4,g4 # pre-decrement dest addr 304 lda (MSW),g1 # keep a copy of the src word 305 bge.t Lcase.1_wloop # branch if at least four bytes to copy 306Lcase.3_cloop: 307 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted 308#if ! __i960_BIG_ENDIAN__ 309 rotate 8,g1,g1 # move next byte into position for storing 310#endif 311 lda 4(g4),g4 # pre-decremented dest addr 4 too much 312 313Lcase.1_cloop: 314 subi 1,g4,g4 # pre-decrement dest byte addr 315 cmpi g4,g0 # has dest ptr reached beginning of dest? 316 stob g1,(g4) # store the byte in dest 317#if __i960_BIG_ENDIAN__ 318 shro 8,g1,g1 # move next byte into position for storing 319#else 320 rotate 8,g1,g1 # move next byte into position for storing 321#endif 322 bne.t Lcase.1_cloop # Lexit if move is completed 323 b Lexit_code 324 325Lcase.245: 326 cmpo g1,g4 # check alignment of dest 327 ld (MSW),MSW # pre-fetch word with at least last byte 328 and 3,g5,g5 # compute shift count 329 ld (g3),LSW # pre-fetch second to last word 330 shlo 3,g5,g14 331#if __i960_BIG_ENDIAN__ 332 subo g14,0,g14 # adjust shift count for big endian 333#endif 334 be.t Lcase.4 # branch if dest is word aligned 335 or g4,g5,g5 # is src earlier in word, later, or sync w/ dst 336 cmpo g1,g5 # < indicates last word of dest has less bytes 337 eshro g14,g6,g5 # extract four bytes 338 bl.t 1f 339 mov LSW,MSW 340#if ! __i960_BIG_ENDIAN__ 341 be.t 1f 342#endif 343 subo 4,g3,g3 # move src word addr to second word boundary 3441: 345 mov g5,LSW 346 b Lcase.25 347 348 349Lquick_exit: 350 mov g14,g13 351 b Lexit_code 352 353/* end of memmove */ 354