1/* Copyright (c) 2013, Linaro Limited 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 * Redistributions of source code must retain the above copyright 7 notice, this list of conditions and the following disclaimer. 8 * Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 * Neither the name of the Linaro nor the 12 names of its contributors may be used to endorse or promote products 13 derived from this software without specific prior written permission. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ 26 27/* 28 * Copyright (c) 2015 ARM Ltd 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 3. The name of the company may not be used to endorse or promote 40 * products derived from this software without specific prior written 41 * permission. 42 * 43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 */ 54 55/* Assumptions: 56 * 57 * ARMv8-a, AArch64, unaligned accesses 58 */ 59 60#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__) 61/* See memmove-stub.c */ 62#else 63 64 .macro def_fn f p2align=0 65 .text 66 .p2align \p2align 67 .global \f 68 .type \f, %function 69\f: 70 .endm 71 72/* Parameters and result. */ 73#define dstin x0 74#define src x1 75#define count x2 76#define srcend x3 77#define dstend x4 78#define tmp1 x5 79#define A_l x6 80#define A_h x7 81#define B_l x8 82#define B_h x9 83#define C_l x10 84#define C_h x11 85#define D_l x12 86#define D_h x13 87#define E_l count 88#define E_h tmp1 89 90/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps. 91 Larger backwards copies are also handled by memcpy. The only remaining 92 case is forward large copies. The destination is aligned, and an 93 unrolled loop processes 64 bytes per iteration. 94*/ 95 96def_fn memmove, 6 97 sub tmp1, dstin, src 98 cmp count, 96 99 ccmp tmp1, count, 2, hi 100 b.hs memcpy 101 102 cbz tmp1, 3f 103 add dstend, dstin, count 104 add srcend, src, count 105 106 /* Align dstend to 16 byte alignment so that we don't cross cache line 107 boundaries on both loads and stores. There are at least 96 bytes 108 to copy, so copy 16 bytes unaligned and then align. The loop 109 copies 64 bytes per iteration and prefetches one iteration ahead. */ 110 111 and tmp1, dstend, 15 112 ldp D_l, D_h, [srcend, -16] 113 sub srcend, srcend, tmp1 114 sub count, count, tmp1 115 ldp A_l, A_h, [srcend, -16] 116 stp D_l, D_h, [dstend, -16] 117 ldp B_l, B_h, [srcend, -32] 118 ldp C_l, C_h, [srcend, -48] 119 ldp D_l, D_h, [srcend, -64]! 120 sub dstend, dstend, tmp1 121 subs count, count, 128 122 b.ls 2f 123 nop 1241: 125 stp A_l, A_h, [dstend, -16] 126 ldp A_l, A_h, [srcend, -16] 127 stp B_l, B_h, [dstend, -32] 128 ldp B_l, B_h, [srcend, -32] 129 stp C_l, C_h, [dstend, -48] 130 ldp C_l, C_h, [srcend, -48] 131 stp D_l, D_h, [dstend, -64]! 132 ldp D_l, D_h, [srcend, -64]! 133 subs count, count, 64 134 b.hi 1b 135 136 /* Write the last full set of 64 bytes. The remainder is at most 64 137 bytes, so it is safe to always copy 64 bytes from the start even if 138 there is just 1 byte left. */ 1392: 140 ldp E_l, E_h, [src, 48] 141 stp A_l, A_h, [dstend, -16] 142 ldp A_l, A_h, [src, 32] 143 stp B_l, B_h, [dstend, -32] 144 ldp B_l, B_h, [src, 16] 145 stp C_l, C_h, [dstend, -48] 146 ldp C_l, C_h, [src] 147 stp D_l, D_h, [dstend, -64] 148 stp E_l, E_h, [dstin, 48] 149 stp A_l, A_h, [dstin, 32] 150 stp B_l, B_h, [dstin, 16] 151 stp C_l, C_h, [dstin] 1523: ret 153 154 .size memmove, . - memmove 155#endif 156