lusercopy.S - OpenGrok cross reference for /Linux-v6.1/arch/parisc/lib/lusercopy.S

Lines Matching +full:src +full:- +full:2
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5  *    Copyright (C) 2000-2002 Hewlett-Packard (John Marvin)
6  *    Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
8  *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
40 	addib,<>    -1,%r25,$lclu_loop
47 2:	b           $lclu_done
50 	ASM_EXCEPTIONTABLE_ENTRY(1b,2b)
58  * - sr1 already contains space of source region
59  * - sr2 already contains space of destination region
62  * - number of bytes that could not be copied.
65  * This code is based on a C-implementation of a copy routine written by
69  * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
71  * aligning the destination and then using shift-and-write method, or in a few
72  * cases by falling back to a byte-at-a-time copy.
75  * often >10x faster than a simple byte-at-a-time copy, even for strangely
78  * it by 30-40% for aligned copies because of the loop unrolling, but in some
83  * - add cache prefetching
84  * - try not to use the post-increment address modifiers; they may create
90 	src = arg1  define
118 	xor	src,dst,t0
119 	extru	t0,31,2,t1
123 	/* only do 64-bit copies if we can get aligned. */
127 	/* loop until we are 64-bit aligned */
131 20:	ldb,ma	1(srcspc,src),t1
134 	ldo	-1(len),len
144 10:	ldd	0(srcspc,src),t1
145 11:	ldd	8(srcspc,src),t2
146 	ldo	16(src),src
149 14:	ldd	0(srcspc,src),t1
150 15:	ldd	8(srcspc,src),t2
151 	ldo	16(src),src
165 	ldo	-32(len),len
169 20:	ldw,ma	4(srcspc,src),t1
172 	ldo	-4(len),len
179 	/* loop until we are 32-bit aligned */
181 	extru	dst,31,2,t1
183 20:	ldb,ma	1(srcspc,src),t1
186 	ldo	-1(len),len
195 10:	ldw	0(srcspc,src),t1
196 11:	ldw	4(srcspc,src),t2
199 14:	ldw	8(srcspc,src),t1
200 15:	ldw	12(srcspc,src),t2
201 	ldo	16(src),src
215 	ldo	-16(len),len
220 20:	ldb	0(srcspc,src),t1
221 	ldo	1(src),src
224 	ldo	-1(len),len
234 	/* src and dst are not aligned the same way. */
237 	/* align until dst is 32bit-word-aligned */
238 	extru	dst,31,2,t1
240 20:	ldb	0(srcspc,src),t1
241 	ldo	1(src),src
244 	ldo	-1(len),len
251 	/* store src, dst and len in safe place */
252 	copy	src,save_src
257 	SHRREG	len,2,len
260 	 * Copy from a not-aligned src to an aligned dst using shifts.
264 	depw,z src,28,2,t0
267 	extru len,31,2,t0
268 	cmpib,= 2,t0,.Lcase2
269 	/* Make src aligned by rounding it down.  */
270 	depi 0,31,2,src
280 1:	ldw,ma 4(srcspc,src), a3
282 1:	ldw,ma 4(srcspc,src), a0
286 1:	ldw,ma 4(srcspc,src), a2
288 1:	ldw,ma 4(srcspc,src), a3
290 	ldo -1(len),len
293 1:	ldw,ma 4(srcspc,src), a0
299 1:	ldw,ma 4(srcspc,src), a1
305 1:	ldw,ma 4(srcspc,src), a2
311 1:	ldw,ma 4(srcspc,src), a3
316 	ldo -4(len),len
326 	/* calculate new src, dst and len and jump to byte-copy loop */
328 	add	save_src,t0,src
333 1:	ldw,ma 4(srcspc,src), a0
335 1:	ldw,ma 4(srcspc,src), a1
340 1:	ldw,ma 4(srcspc,src), a1
342 1:	ldw,ma 4(srcspc,src), a2
345 	ldo 2(len),len