1/* Copyright (c) 2013, Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6       * Redistributions of source code must retain the above copyright
7         notice, this list of conditions and the following disclaimer.
8       * Redistributions in binary form must reproduce the above copyright
9         notice, this list of conditions and the following disclaimer in the
10         documentation and/or other materials provided with the distribution.
11       * Neither the name of the Linaro nor the
12         names of its contributors may be used to endorse or promote products
13         derived from this software without specific prior written permission.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
27/*
28 * Copyright (c) 2015 ARM Ltd
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 *    notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 *    notice, this list of conditions and the following disclaimer in the
38 *    documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 *    products derived from this software without specific prior written
41 *    permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 */
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64, unaligned accesses
58 */
59
60#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) || !defined(__LP64__)
61/* See memmove-stub.c  */
62#else
63
64	.macro def_fn f p2align=0
65	.text
66	.p2align \p2align
67	.global \f
68	.type \f, %function
69\f:
70	.endm
71
72/* Parameters and result.  */
73#define dstin	x0
74#define src	x1
75#define count	x2
76#define srcend	x3
77#define dstend	x4
78#define tmp1	x5
79#define A_l	x6
80#define A_h	x7
81#define B_l	x8
82#define B_h	x9
83#define C_l	x10
84#define C_h	x11
85#define D_l	x12
86#define D_h	x13
87#define E_l	count
88#define E_h	tmp1
89
90/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
91   Larger backwards copies are also handled by memcpy. The only remaining
92   case is forward large copies.  The destination is aligned, and an
93   unrolled loop processes 64 bytes per iteration.
94*/
95
96def_fn memmove, 6
97	sub	tmp1, dstin, src
98	cmp	count, 96
99	ccmp	tmp1, count, 2, hi
100	b.hs	memcpy
101
102	cbz	tmp1, 3f
103	add	dstend, dstin, count
104	add	srcend, src, count
105
106	/* Align dstend to 16 byte alignment so that we don't cross cache line
107	   boundaries on both loads and stores.	 There are at least 96 bytes
108	   to copy, so copy 16 bytes unaligned and then align.	The loop
109	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
110
111	and	tmp1, dstend, 15
112	ldp	D_l, D_h, [srcend, -16]
113	sub	srcend, srcend, tmp1
114	sub	count, count, tmp1
115	ldp	A_l, A_h, [srcend, -16]
116	stp	D_l, D_h, [dstend, -16]
117	ldp	B_l, B_h, [srcend, -32]
118	ldp	C_l, C_h, [srcend, -48]
119	ldp	D_l, D_h, [srcend, -64]!
120	sub	dstend, dstend, tmp1
121	subs	count, count, 128
122	b.ls	2f
123	nop
1241:
125	stp	A_l, A_h, [dstend, -16]
126	ldp	A_l, A_h, [srcend, -16]
127	stp	B_l, B_h, [dstend, -32]
128	ldp	B_l, B_h, [srcend, -32]
129	stp	C_l, C_h, [dstend, -48]
130	ldp	C_l, C_h, [srcend, -48]
131	stp	D_l, D_h, [dstend, -64]!
132	ldp	D_l, D_h, [srcend, -64]!
133	subs	count, count, 64
134	b.hi	1b
135
136	/* Write the last full set of 64 bytes.	 The remainder is at most 64
137	   bytes, so it is safe to always copy 64 bytes from the start even if
138	   there is just 1 byte left.  */
1392:
140	ldp	E_l, E_h, [src, 48]
141	stp	A_l, A_h, [dstend, -16]
142	ldp	A_l, A_h, [src, 32]
143	stp	B_l, B_h, [dstend, -32]
144	ldp	B_l, B_h, [src, 16]
145	stp	C_l, C_h, [dstend, -48]
146	ldp	C_l, C_h, [src]
147	stp	D_l, D_h, [dstend, -64]
148	stp	E_l, E_h, [dstin, 48]
149	stp	A_l, A_h, [dstin, 32]
150	stp	B_l, B_h, [dstin, 16]
151	stp	C_l, C_h, [dstin]
1523:	ret
153
154	.size	memmove, . - memmove
155#endif
156