1/* 2 Copyright (c) 2015, Synopsys, Inc. All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 7 1) Redistributions of source code must retain the above copyright notice, 8 this list of conditions and the following disclaimer. 9 10 2) Redistributions in binary form must reproduce the above copyright notice, 11 this list of conditions and the following disclaimer in the documentation 12 and/or other materials provided with the distribution. 13 14 3) Neither the name of the Synopsys, Inc., nor the names of its contributors 15 may be used to endorse or promote products derived from this software 16 without specific prior written permission. 17 18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31/* This implementation is optimized for performance. For code size a generic 32 implementation of this function from newlib/libc/string/memcpy.c will be 33 used. */ 34#if !defined (__OPTIMIZE_SIZE__) && !defined (PREFER_SIZE_OVER_SPEED) 35 36#include "asm.h" 37 38#if defined (__ARCHS__) 39 40#ifdef __LITTLE_ENDIAN__ 41# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << 42# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> 43# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM 44# define MERGE_2(RX,RY,IMM) 45# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF 46# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM 47#else 48# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> 49# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << 50# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << 51# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << 52# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM 53# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 54#endif 55 56#ifdef __ARC_LL64__ 57# define PREFETCH_READ(RX) prefetch [RX, 56] 58# define PREFETCH_WRITE(RX) prefetchw [RX, 64] 59# define LOADX(DST,RX) ldd.ab DST, [RX, 8] 60# define STOREX(SRC,RX) std.ab SRC, [RX, 8] 61# define ZOLSHFT 5 62# define ZOLAND 0x1F 63#else 64# define PREFETCH_READ(RX) prefetch [RX, 28] 65# define PREFETCH_WRITE(RX) prefetchw [RX, 32] 66# define LOADX(DST,RX) ld.ab DST, [RX, 4] 67# define STOREX(SRC,RX) st.ab SRC, [RX, 4] 68# define ZOLSHFT 4 69# define ZOLAND 0xF 70#endif 71 72#ifdef __ARC_ALIGNED_ACCESS__ 73ENTRY (memcpy) 74 prefetch [r1] ; Prefetch the read location 75 prefetchw [r0] ; Prefetch the write location 76 mov.f 0, r2 77; if size is zero 78 jz.d [blink] 79 mov r3, r0 ; don't clobber ret val 80 81; if size <= 8 82 cmp r2, 8 83 bls.d @.Lsmallchunk 84 mov.f lp_count, r2 85 86 and.f r4, r0, 0x03 87 rsub lp_count, r4, 4 88 lpnz @.Laligndestination 89 ; LOOP BEGIN 90 ldb.ab r5, [r1,1] 91 sub r2, r2, 1 92 stb.ab r5, [r3,1] 93.Laligndestination: 94 95; Check the alignment of the source 96 and.f r4, r1, 0x03 97 bnz.d @.Lsourceunaligned 98 99; CASE 0: Both source and destination are 32bit aligned 100; Convert len to Dwords, unfold x4 101 lsr.f lp_count, r2, ZOLSHFT 102 lpnz @.Lcopy32_64bytes 103 ; LOOP START 104 LOADX (r6, r1) 105 PREFETCH_READ (r1) 106 PREFETCH_WRITE (r3) 107 LOADX (r8, r1) 108 LOADX (r10, r1) 109 LOADX (r4, r1) 110 STOREX (r6, r3) 111 STOREX (r8, r3) 112 STOREX (r10, r3) 113 STOREX (r4, r3) 114.Lcopy32_64bytes: 115 116 and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes 117.Lsmallchunk: 118 lpnz @.Lcopyremainingbytes 119 ; LOOP START 120 ldb.ab r5, [r1,1] 121 stb.ab r5, [r3,1] 122.Lcopyremainingbytes: 123 124 j [blink] 125; END CASE 0 126 127.Lsourceunaligned: 128 cmp r4, 2 129 beq.d @.LunalignedOffby2 130 sub r2, r2, 1 131 132 bhi.d @.LunalignedOffby3 133 ldb.ab r5, [r1, 1] 134 135; CASE 1: The source is unaligned, off by 1 136 ; Hence I need to read 1 byte for a 16bit alignment 137 ; and 2bytes to reach 32bit alignment 138 ldh.ab r6, [r1, 2] 139 sub r2, r2, 2 140 ; Convert to words, unfold x2 141 lsr.f lp_count, r2, 3 142 MERGE_1 (r6, r6, 8) 143 MERGE_2 (r5, r5, 24) 144 or r5, r5, r6 145 146 ; Both src and dst are aligned 147 lpnz @.Lcopy8bytes_1 148 ; LOOP START 149 ld.ab r6, [r1, 4] 150 prefetch [r1, 28] ;Prefetch the next read location 151 ld.ab r8, [r1,4] 152 prefetchw [r3, 32] ;Prefetch the next write location 153 154 SHIFT_1 (r7, r6, 24) 155 or r7, r7, r5 156 SHIFT_2 (r5, r6, 8) 157 158 SHIFT_1 (r9, r8, 24) 159 or r9, r9, r5 160 SHIFT_2 (r5, r8, 8) 161 162 st.ab r7, [r3, 4] 163 st.ab r9, [r3, 4] 164.Lcopy8bytes_1: 165 166 ; Write back the remaining 16bits 167 EXTRACT_1 (r6, r5, 16) 168 sth.ab r6, [r3, 2] 169 ; Write back the remaining 8bits 170 EXTRACT_2 (r5, r5, 16) 171 stb.ab r5, [r3, 1] 172 173 and.f lp_count, r2, 0x07 ;Last 8bytes 174 lpnz @.Lcopybytewise_1 175 ; LOOP START 176 ldb.ab r6, [r1,1] 177 stb.ab r6, [r3,1] 178.Lcopybytewise_1: 179 j [blink] 180 181.LunalignedOffby2: 182; CASE 2: The source is unaligned, off by 2 183 ldh.ab r5, [r1, 2] 184 sub r2, r2, 1 185 186 ; Both src and dst are aligned 187 ; Convert to words, unfold x2 188 lsr.f lp_count, r2, 3 189#ifdef __BIG_ENDIAN__ 190 asl.nz r5, r5, 16 191#endif 192 lpnz @.Lcopy8bytes_2 193 ; LOOP START 194 ld.ab r6, [r1, 4] 195 prefetch [r1, 28] ;Prefetch the next read location 196 ld.ab r8, [r1,4] 197 prefetchw [r3, 32] ;Prefetch the next write location 198 199 SHIFT_1 (r7, r6, 16) 200 or r7, r7, r5 201 SHIFT_2 (r5, r6, 16) 202 203 SHIFT_1 (r9, r8, 16) 204 or r9, r9, r5 205 SHIFT_2 (r5, r8, 16) 206 207 st.ab r7, [r3, 4] 208 st.ab r9, [r3, 4] 209.Lcopy8bytes_2: 210 211#ifdef __BIG_ENDIAN__ 212 lsr.nz r5, r5, 16 213#endif 214 sth.ab r5, [r3, 2] 215 216 and.f lp_count, r2, 0x07 ;Last 8bytes 217 lpnz @.Lcopybytewise_2 218 ; LOOP START 219 ldb.ab r6, [r1,1] 220 stb.ab r6, [r3,1] 221.Lcopybytewise_2: 222 j [blink] 223 224.LunalignedOffby3: 225; CASE 3: The source is unaligned, off by 3 226; Hence, I need to read 1byte for achieve the 32bit alignment 227 228 ; Both src and dst are aligned 229 ; Convert to words, unfold x2 230 lsr.f lp_count, r2, 3 231#ifdef __BIG_ENDIAN__ 232 asl.ne r5, r5, 24 233#endif 234 lpnz @.Lcopy8bytes_3 235 ; LOOP START 236 ld.ab r6, [r1, 4] 237 prefetch [r1, 28] ;Prefetch the next read location 238 ld.ab r8, [r1,4] 239 prefetchw [r3, 32] ;Prefetch the next write location 240 241 SHIFT_1 (r7, r6, 8) 242 or r7, r7, r5 243 SHIFT_2 (r5, r6, 24) 244 245 SHIFT_1 (r9, r8, 8) 246 or r9, r9, r5 247 SHIFT_2 (r5, r8, 24) 248 249 st.ab r7, [r3, 4] 250 st.ab r9, [r3, 4] 251.Lcopy8bytes_3: 252 253#ifdef __BIG_ENDIAN__ 254 lsr.nz r5, r5, 24 255#endif 256 stb.ab r5, [r3, 1] 257 258 and.f lp_count, r2, 0x07 ;Last 8bytes 259 lpnz @.Lcopybytewise_3 260 ; LOOP START 261 ldb.ab r6, [r1,1] 262 stb.ab r6, [r3,1] 263.Lcopybytewise_3: 264 j [blink] 265 266ENDFUNC (memcpy) 267 268#else 269 270ENTRY(memcpy) 271 prefetch [r1] ; Prefetch the read location 272 prefetchw [r0] ; Prefetch the write location 273 mov.f 0, r2 274;;; if size is zero 275 jz.d [blink] 276 mov r3, r0 ; don't clobber ret val 277 278;;; if size <= 8 279 cmp r2, 8 280 bls.d @.Lsmallchunk 281 mov.f lp_count, r2 282 283;;; Convert len to Dwords, unfold x4 284 lsr.f lp_count, r2, ZOLSHFT 285 lpnz @.Lcopyfast 286 ;; LOOP START 287 LOADX (r6, r1) 288 PREFETCH_READ (r1) 289 PREFETCH_WRITE (r3) 290 LOADX (r8, r1) 291 LOADX (r10, r1) 292 LOADX (r4, r1) 293 STOREX (r6, r3) 294 STOREX (r8, r3) 295 STOREX (r10, r3) 296 STOREX (r4, r3) 297.Lcopyfast: 298 299#ifdef __ARC_LL64__ 300 and r2, r2, ZOLAND ;Remaining 31 bytes 301 lsr.f lp_count, r2, 3 ;Convert to 64-bit words. 302 lpnz @.Lcopy64b 303 ;; LOOP START 304 ldd.ab r6,[r1,8] 305 std.ab r6,[r3,8] 306.Lcopy64b: 307 308 and.f lp_count, r2, 0x07 ; Last 7 bytes 309#else 310 and.f lp_count, r2, ZOLAND 311#endif 312 313.Lsmallchunk: 314 lpnz @.Lcopyremainingbytes 315 ;; LOOP START 316 ldb.ab r5, [r1,1] 317 stb.ab r5, [r3,1] 318.Lcopyremainingbytes: 319 320 j [blink] 321 322ENDFUNC(memcpy) 323#endif 324 325#endif /* __ARCHS__ */ 326 327#endif /* !__OPTIMIZE_SIZE__ && !PREFER_SIZE_OVER_SPEED */ 328