1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31#include <picolibc.h> 32 33 .file "memcp_ca.s" 34#ifdef __PIC 35 .pic 36#endif 37#ifdef __PID 38 .pid 39#endif 40/* 41 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved 42 */ 43 44/* 45 procedure memmove (optimized assembler version for the CA) 46 procedure memcpy (optimized assembler version for the CA) 47 48 dest_addr = memmove (dest_addr, src_addr, len) 49 dest_addr = memcpy (dest_addr, src_addr, len) 50 51 copy len bytes pointed to by src_addr to the space pointed to by 52 dest_addr. Return the original dest_addr. 53 54 Memcpy will fail if the source and destination string overlap 55 (in particular, if the end of the source is overlapped by the 56 beginning of the destination). The behavior is undefined. 57 This is acceptable according to the draft C standard. 58 Memmove will not fail if overlap exists. 59 60 Undefined behavior will also occur if the end of the source string 61 (i.e. the terminating null byte) is in the last word of the program's 62 allocated memory space. This is so because, in several cases, the 63 routine will fetch ahead one word. Disallowing the fetch ahead would 64 impose a severe performance penalty. 65 66 This program handles five cases: 67 68 1) both arguments start on a word boundary 69 2) neither are word aligned, but they are offset by the same amount 70 3) source is word aligned, destination is not 71 4) destination is word aligned, source is not 72 5) neither is word aligned, and they are offset by differing amounts 73 74 At the time of this writing, only g0 thru g7 and g13 are available 75 for use in this leafproc; other registers would have to be saved and 76 restored. These nine registers, plus tricky use of g14 are sufficient 77 to implement the routine. The registers are used as follows: 78 79 g0 dest ptr; not modified, so that it may be returned 80 g1 src ptr; shift count 81 g2 len 82 g3 src ptr (word aligned) 83 g4 dest ptr (word aligned) 84 g5 -4 for Lbackwards move 85 Little endian 86 g6 lsw of double word for extraction of 4 bytes 87 g7 msw of double word for extraction of 4 bytes 88 Big endian 89 g6 msw of double word for extraction of 4 bytes 90 g7 lsw of double word for extraction of 4 bytes 91 g13 return address 92 g14 byte extracted. 93*/ 94 95#if __i960_BIG_ENDIAN__ 96#define MSW g6 97#define LSW g7 98#else 99#define LSW g6 100#define MSW g7 101#endif 102 103 .globl _memmove, _memcpy 104 .globl __memmove, __memcpy 105 .leafproc _memmove, __memmove 106 .leafproc _memcpy, __memcpy 107 .align 2 108_memcpy: 109_memmove: 110#ifndef __PIC 111 lda Lrett,g14 112#else 113 lda Lrett-(.+8)(ip),g14 114#endif 115__memcpy: 116__memmove: 117 cmpibge.f 0,g2,Lquick_exit # Lexit if number of bytes to move is <= zero. 118 cmpo g0,g1 # if dest starts earlier than src ... 119 lda (g14),g13 # preserve return address 120 addo g2,g1,g5 # compute addr of byte after last byte of src 121 be.f Lexit_code # no move necessary if src and dest are same 122 concmpo g5,g0 # ... or if dest starts after end of src ... 123 notand g1,3,g3 # extract word addr of start of src 124 bg.f Lbackwards # ... then drop thru, else do move backwards 125 cmpo g3,g1 # check alignment of src 126 ld (g3),LSW # fetch word containing at least first byte 127 notand g0,3,g4 # extract word addr of start of dest 128 lda 4(g3),g3 # advance src word addr 129 bne.f Lcase_245 # branch if src is NOT word aligned 130 131Lcase_13: 132 cmpo g0,g4 # check alignment of dest 133 subo 4,g4,g4 # store is pre-incrementing; back up dest addr 134 be.t Lcase_1 # branch if dest word aligned 135 136Lcase_3: # src is word aligned; dest is not 137 addo 8,g4,g4 # move dest word ptr to first word boundary 138 lda (g0),g1 # copy dest byte ptr 139 mov LSW,MSW # make copy of first word of src 140 lda 32,g14 # initialize shift count to zero (mod 32) 141 142Lcase_25: 143Lcase_3_cloop_at_start: # character copying loop for start of dest str 144 cmpdeci 0,g2,g2 # is max_bytes exhausted? 145 be.f Lexit_code # Lexit if max_bytes is exhausted 146#if __i960_BIG_ENDIAN__ 147 rotate 8,MSW,MSW # move next byte into position for extraction 148 subo 8,g14,g14 # augment the shift counter 149 stob MSW,(g1) # store the byte in dest 150#else 151 addo 8,g14,g14 # augment the shift counter 152 stob MSW,(g1) # store the byte in dest 153 shro 8,MSW,MSW # move next byte into position for extraction 154#endif 155 lda 1(g1),g1 # post-increment dest ptr 156 cmpobne.t g1,g4,Lcase_3_cloop_at_start # branch if reached word boundary 157 158 ld (g3),MSW # fetch msw of operand for double shift 159 160Lcase_4: 161Lcase_3_wloop: 162 cmpi g2,4 # less than four bytes to move? 163 lda 4(g3),g3 # post-increment src word addr 164 eshro g14,g6,g1 # extract 4 bytes of src 165 bl.f Lcase_3_cloop # branch if < four bytes left to move 166 mov MSW,LSW # move msw to lsw 167 ld (g3),MSW # pre-fetch msw of operand for double shift 168 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 169 st g1,(g4) # store 4 bytes to dest 170 addo 4,g4,g4 # post-increment dest ptr 171 b Lcase_3_wloop 172 173Lcase_1_wloop: # word copying loop 174 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved 175 ld (g3),LSW # pre-fetch next word of src 176 addo 4,g3,g3 # post-increment src addr 177 st g1,(g4) # store word in dest string 178Lcase_1: # src and dest are word aligned 179 cmpi g2,4 # check for fewer than four bytes to move 180 addo 4,g4,g4 # pre-increment dest addr 181 lda (LSW),g1 # keep a copy of the src word 182 bge.t Lcase_1_wloop # branch if at least four bytes to copy 183Lcase_3_cloop: 184 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted 185 186Lcase_1_cloop: 187#if __i960_BIG_ENDIAN__ 188 rotate 8,g1,g1 # move next byte into position for extraction 189#endif 190 subi 1,g2,g2 191 stob g1,(g4) # store the byte in dest 192 cmpi 0,g2 193 lda 1(g4),g4 # post-increment dest byte addr 194#if ! __i960_BIG_ENDIAN__ 195 shro 8,g1,g1 # move next byte into position for extraction 196#endif 197 bne.t Lcase_1_cloop # Lexit if max_bytes is exhausted 198 199Lexit_code: 200 mov 0,g14 # conform to register conventions 201 bx (g13) # g0 = addr of dest; g14 = 0 202Lrett: 203 ret 204 205 206Lcase_245: 207 cmpo g0,g4 # check alignment of dest 208 ld (g3),MSW # pre-fetch second half 209 and 3,g1,g1 # compute shift count 210 shlo 3,g1,g14 211#if __i960_BIG_ENDIAN__ 212 subo g14,0,g14 # adjust shift count for big endian 213#endif 214 be.t Lcase_4 # branch if dest is word aligned 215 or g4,g1,g1 # is src earlier in word, later, or sync w/ dst 216 cmpo g0,g1 # < indicates first word of dest has more bytes 217 lda 4(g4),g4 # move dest word addr to first word boundary 218 eshro g14,g6,g5 # extract four bytes 219 lda (g0),g1 220#if __i960_BIG_ENDIAN__ 221 bge.f 1f 222#else 223 bg.f 1f 224#endif 225 mov MSW,LSW 226 lda 4(g3),g3 # move src word addr to second word boundary 2271: 228 mov g5,MSW 229 b Lcase_25 230 231 232Lbackwards: 233 notand g5,3,MSW # extract word addr of byte after end of src 234 cmpo MSW,g5 # check alignment of end of src 235 subo 4,MSW,g3 # retreat src word addr 236 addo g2,g0,g1 # compute addr of byte after end of dest 237 notand g1,3,g4 # extract word addr of start of dest 238 bne.f Lcase.245 # branch if src is NOT word aligned 239 240Lcase.13: 241 cmpo g1,g4 # check alignment of dest 242 ld (g3),MSW # fetch last word of src 243 subo 4,g3,g3 # retreat src word addr 244 be.t Lcase.1 # branch if dest word aligned 245 246Lcase.3: # src is word aligned; dest is not 247 mov MSW,LSW # make copy of first word of src 248 lda 32,g14 # initialize shift count to zero (mod 32) 249 250Lcase.25: 251Lcase.3_cloop_at_start: # character copying loop for start of dest str 252 cmpdeci 0,g2,g2 # is max.bytes exhausted? 253 be.f Lexit_code # Lexit if max_bytes is exhausted 254#if ! __i960_BIG_ENDIAN__ 255 rotate 8,LSW,LSW # move next byte into position for storing 256#endif 257 lda -1(g1),g1 # pre-decrement dest ptr 258 cmpo g1,g4 # have we reached word boundary in dest yet? 259 stob LSW,(g1) # store the byte in dest 260#if __i960_BIG_ENDIAN__ 261 shro 8,LSW,LSW # move next byte into position for storing 262 addo 8,g14,g14 # augment the shift counter 263#else 264 subo 8,g14,g14 # augment the shift counter 265#endif 266 bne.t Lcase.3_cloop_at_start # branch if reached word boundary? 267 268 ld (g3),LSW # fetch lsw of operand for double shift 269 270#if __i960_BIG_ENDIAN__ 271 cmpobne 0,g14,Lcase.3_wloop 272Lcase.3_wloop2: 273 cmpi g2,4 # less than four bytes to move? 274 lda -4(g3),g3 # post-decrement src word addr 275 mov MSW,g1 # extract 4 bytes of src 276 lda (LSW),MSW # move lsw to msw 277 subo 4,g4,g4 # pre-decrement dest ptr 278 bl.f Lcase.3_cloop # branch if < four bytes left to move 279 ld (g3),LSW # pre-fetch lsw of operand for double shift 280 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 281 st g1,(g4) # store 4 bytes to dest 282 b Lcase.3_wloop2 283#endif 284 285Lcase.4: 286Lcase.3_wloop: 287 cmpi g2,4 # less than four bytes to move? 288 lda -4(g3),g3 # post-decrement src word addr 289 eshro g14,g6,g1 # extract 4 bytes of src 290 lda (LSW),MSW # move lsw to msw 291 subo 4,g4,g4 # pre-decrement dest ptr 292 bl.f Lcase.3_cloop # branch if < four bytes left to move 293 ld (g3),LSW # pre-fetch lsw of operand for double shift 294 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 295 st g1,(g4) # store 4 bytes to dest 296 b Lcase.3_wloop 297 298Lcase.1_wloop: # word copying loop 299 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved 300 ld (g3),MSW # pre-fetch next word of src 301 subo 4,g3,g3 # post-decrement src addr 302 st g1,(g4) # store word in dest string 303Lcase.1: # src and dest are word aligned 304 cmpi g2,4 # check for fewer than four bytes to move 305 subo 4,g4,g4 # pre-decrement dest addr 306 lda (MSW),g1 # keep a copy of the src word 307 bge.t Lcase.1_wloop # branch if at least four bytes to copy 308Lcase.3_cloop: 309 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted 310#if ! __i960_BIG_ENDIAN__ 311 rotate 8,g1,g1 # move next byte into position for storing 312#endif 313 lda 4(g4),g4 # pre-decremented dest addr 4 too much 314 315Lcase.1_cloop: 316 subi 1,g4,g4 # pre-decrement dest byte addr 317 cmpi g4,g0 # has dest ptr reached beginning of dest? 318 stob g1,(g4) # store the byte in dest 319#if __i960_BIG_ENDIAN__ 320 shro 8,g1,g1 # move next byte into position for storing 321#else 322 rotate 8,g1,g1 # move next byte into position for storing 323#endif 324 bne.t Lcase.1_cloop # Lexit if move is completed 325 b Lexit_code 326 327Lcase.245: 328 cmpo g1,g4 # check alignment of dest 329 ld (MSW),MSW # pre-fetch word with at least last byte 330 and 3,g5,g5 # compute shift count 331 ld (g3),LSW # pre-fetch second to last word 332 shlo 3,g5,g14 333#if __i960_BIG_ENDIAN__ 334 subo g14,0,g14 # adjust shift count for big endian 335#endif 336 be.t Lcase.4 # branch if dest is word aligned 337 or g4,g5,g5 # is src earlier in word, later, or sync w/ dst 338 cmpo g1,g5 # < indicates last word of dest has less bytes 339 eshro g14,g6,g5 # extract four bytes 340 bl.t 1f 341 mov LSW,MSW 342#if ! __i960_BIG_ENDIAN__ 343 be.t 1f 344#endif 345 subo 4,g3,g3 # move src word addr to second word boundary 3461: 347 mov g5,LSW 348 b Lcase.25 349 350 351Lquick_exit: 352 mov g14,g13 353 b Lexit_code 354 355/* end of memmove */ 356