1/******************************************************************************* 2 * 3 * Copyright (c) 1993 Intel Corporation 4 * 5 * Intel hereby grants you permission to copy, modify, and distribute this 6 * software and its documentation. Intel grants this permission provided 7 * that the above copyright notice appears in all copies and that both the 8 * copyright notice and this permission notice appear in supporting 9 * documentation. In addition, Intel grants this permission provided that 10 * you prominently mark as "not part of the original" any modifications 11 * made to this software or documentation, and that the name of Intel 12 * Corporation not be used in advertising or publicity pertaining to 13 * distribution of the software or the documentation without specific, 14 * written prior permission. 15 * 16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR 17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY 18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or 19 * representations regarding the use of, or the results of the use of, 20 * the software and documentation in terms of correctness, accuracy, 21 * reliability, currentness, or otherwise; and you rely on the software, 22 * documentation and results solely at your own risk. 23 * 24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS, 25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES 26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM 27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER. 28 * 29 ******************************************************************************/ 30 31#include <picolibc.h> 32 33 .file "memcpy.s" 34#ifdef __PIC 35 .pic 36#endif 37#ifdef __PID 38 .pid 39#endif 40/* 41 * (c) copyright 1988,1993 Intel Corp., all rights reserved 42 */ 43/* 44 procedure memmove (optimized assembler version for the 80960K series) 45 procedure memcpy (optimized assembler version for the 80960K series) 46 47 dest_addr = memmove (dest_addr, src_addr, len) 48 dest_addr = memcpy (dest_addr, src_addr, len) 49 50 copy len bytes pointed to by src_addr to the space pointed to by 51 dest_addr. Return the original dest_addr. 52 53 These routines will work even if the arrays overlap. The standard 54 requires this of memmove, but memcpy is allowed to fail if overlap 55 is present. Nevertheless, it is implemented the same as memmove 56 because the overhead is trifling. 57 58 Undefined behavior will occur if the end of the source array is in 59 the last two words of the program's allocated memory space. This 60 is so because the routine fetches ahead. Disallowing the fetch 61 ahead would impose a severe performance penalty. 62 63 Strategy: 64 65 Fetch the source array by words and store them by words to the 66 destination array, until there are fewer than three bytes left 67 to copy. Then, using the last word of the source (the one that 68 contains the remaining 0, 1, 2, or 3 bytes to be copied), store 69 a byte at a time until Ldone. 70 71 Tactics: 72 73 1) Do NOT try to fetch and store the words in a word aligned manner 74 because, in my judgement, the performance degradation experienced due 75 to non-aligned accesses does NOT outweigh the time and complexity added 76 by the preamble and convoluted body that would be necessary to assure 77 alignment. This is supported by the intuition that most source and 78 destination arrays (even more true of most big source arrays) will 79 be word aligned to begin with. 80 81 2) For non-overlapping arrays, rather than decrementing len to zero, 82 I calculate the address of the byte after the last byte of the 83 destination array, and quit when the destination byte pointer passes 84 that. 85 86 3) For overlapping arrays where the source starts at a lower address 87 than the destination the move is performed in reverse order. 88 89 4) Overlapping arrays where the source starts at a higher address 90 are treated like non-overlapping case. Where the two arrays exactly 91 coincide, the routine is short-circuited; no move is Ldone at all. 92 This costs only one cycle. 93*/ 94 95 .globl _memcpy, _memmove 96 .globl __memcpy, __memmove 97 .leafproc _memmove, __memmove 98 .leafproc _memcpy, __memcpy 99 .align 2 100_memmove: 101_memcpy: 102#ifndef __PIC 103 lda Lrett,g14 104#else 105 lda Lrett-(.+8)(ip),g14 106#endif 107__memmove: 108__memcpy: 109 mov g14, g13 # preserve return address 110 cmpibge 0,g2,Lexit # exit if number of bytes to move is <= zero. 111 cmpo g0,g1 # does start of dest overlap end of src? 112 addo g2,g1,g3 113 be Lexit # no move necessary if src and dest are same 114 concmpo g3,g0 115 addo g2, g0, g6 116 bg Lbackwards # if overlap, then do move backwards 117 118 ld (g1), g7 # fetch first word of source 119 mov g0, g5 120 b Lwloop_b 121 122Lwloop_a: 123 ld (g1), g7 # fetch ahead next word of source 124 st g4, (g5) # store word to dest 125 addo 4, g5, g5 # post-increment dest pointer 126Lwloop_b: # word copying loop 127 addo 4, g1, g1 # pre-increment src pointer 128 cmpo g3, g1 # is len <= 3 ? 129 mov g7, g4 # keep a copy of the current word 130 bge Lwloop_a # loop if more than 3 bytes to move 131 cmpobe g6, g5, Lexit # quit if no more bytes to move 132 133Lcloop_a: # character copying loop (len < 3) 134 stob g4, (g5) # store a byte 135 shro 8, g4, g4 # position next byte for storing 136 addo 1, g5, g5 137 cmpobne g6, g5, Lcloop_a # quit if no more bytes to move 138 139Lexit: 140 mov 0, g14 141 bx (g13) # g0 = dest array address; g14 = 0 142Lrett: 143 ret 144 145Lwloop.a: 146 subo 4, g6, g6 # pre-decrement dest pointer 147 st g7, (g6) # store word to dest 148Lbackwards: # word copying loop 149 subo 4, g3, g3 # pre-decrement src pointer 150 cmpo g1, g3 # is len <= 3? 151 ld (g3), g7 # fetch ahead next word of source 152 ble Lwloop.a # loop if more than 3 bytes to move 153 cmpobe g6, g0, Lexit # quit if no more bytes to move 154 155Lcloop.a: 156 subo 1, g6, g6 157 rotate 8, g7, g7 # position byte for storing 158 stob g7, (g6) # store byte 159 cmpobne g6, g0, Lcloop.a # quit if no more bytes to move 160 b Lexit 161 162/* end of memmove */ 163