memcpy.S - OpenGrok cross reference for /Linux-v6.1/arch/mips/lib/memcpy.S

Lines Matching +full:src +full:- +full:2
23  * dma-coherent systems.
36 #include <asm/asm-offsets.h>
41 #define src a1  macro
47  * memcpy copies len bytes from src to dst and sets v0 to dst.
49  *   - src and dst don't overlap
50  *   - src is readable
51  *   - dst is writable
54  * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
56  * __copy_user assumes that src and dst don't overlap, and that the call is
59  *     - src is readable  (no exceptions when reading src)
61  *     - dst is writable  (no exceptions when writing dst)
62  * __copy_user uses a non-standard calling convention; see
63  * include/asm-mips/uaccess.h
76  *  1- AT contain the address of the byte just past the end of the source
78  *  2- src_entry <= src < AT, and
79  *  3- (dst - src) == (dst_entry - src_entry),
83  * (2) is met by incrementing src by the number of bytes copied
84  * (3) is met by not doing loads between a pair of increments of dst and src
95 #define ST_INSN 2
98 #define DST_PREFETCH 2
100 #define EVA_MODE    2
102 #define KERNELOP 2
140  * Only on the 64-bit kernel we can made use of 64-bit registers.
200 #define LOG_NBYTES 2
250 #define REST(unit)  (FIRST(unit)+NBYTES-1)
253 #define ADDRMASK (NBYTES-1)
281 	 * Note: dst & src may be unaligned, len may be 0
294 	PREFS(	0, 0(src) )
298 	PREFS(	0, 1*32(src) )
301 	 and	t0, src, ADDRMASK
302 	PREFS(	0, 2*32(src) )
303 	PREFD(	1, 2*32(dst) )
313 	 * use delay slot for fall-through
314 	 * src and dst are aligned; need to compute rem
319 	 and	rem, len, (8*NBYTES-1)	 # rem = len % (8*NBYTES)
320 	PREFS(	0, 3*32(src) )
325 	LOAD(t0, UNIT(0)(src), .Ll_exc\@)
326 	LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
327 	LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
328 	LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
330 	LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
331 	LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
334 	LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
335 	LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
336 	ADD	src, src, 8*NBYTES
338 	STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
339 	STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
340 	STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
341 	STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
342 	STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
343 	STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
344 	PREFS(	0, 8*32(src) )
356 	 and	rem, len, (NBYTES-1)	# rem = len % NBYTES
360 	LOAD( t0, UNIT(0)(src),	.Ll_exc\@)
361 	LOAD( t1, UNIT(1)(src),	.Ll_exc_copy\@)
362 	LOAD( t2, UNIT(2)(src),	.Ll_exc_copy\@)
363 	LOAD( t3, UNIT(3)(src),	.Ll_exc_copy\@)
365 	ADD	src, src, 4*NBYTES
369 	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
383 	LOAD(t0, 0(src), .Ll_exc\@)
384 	ADD	src, src, NBYTES
394 	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
397 	 * because can't assume read-access to dst.  Instead, use
401 	 * wide-issue mips processors because the code has fewer branches and
402 	 * more instruction-level parallelism.
409 	LOAD(t0, 0(src), .Ll_exc\@)
412 	STREST(t0, -1(t1), .Ls_exc\@)
418 	 * t0 = src & ADDRMASK
423 	 * Set match = (src and dst have same alignment)
426 	LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
428 	LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
437 	 ADD	src, src, t2
440 	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
441 	PREFS(	0, 3*32(src) )
443 	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
450  * are to the same unit (unless src is aligned, but it's not).
453 	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
454 	LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
456 	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
457 	LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
458 	LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
459 	LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
460 	LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
461 	LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
462 	PREFS(	0, 9*32(src) )		# 0 is PREF_LOAD  (not streamed)
463 	ADD	src, src, 4*NBYTES
469 	STORE(t2, UNIT(2)(dst),	.Ls_exc_p2u\@)
479 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
484 	LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
485 	LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
486 	ADD	src, src, NBYTES
502 	LOADB(t0, N(src), .Ll_exc\@);	\
510 	COPY_BYTE(2)
515 	LOADB(t0, NBYTES-2(src), .Ll_exc\@)
518 	STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
528 	COPY_BYTE(2)
534 	ADD	src, src, 8
546 	 * Copy bytes from src until faulting load address (or until a
553 	 * Assumes src < THREAD_BUADDR($28)
559 	LOADB(t1, 0(src), .Ll_exc\@)
560 	ADD	src, src, 1
561 	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
564 	bne	src, t0, 1b
588 SEXC(2)
607 	sltu	t0, a1, t0			# dst + len <= src -> memcpy
608 	sltu	t1, a0, t1			# dst >= src + len -> memcpy
616 LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
618 	beqz	t0, .Lr_end_bytes_up		# src >= dst
621 	ADD	a1, a2				# src = src + len
625 	lb	t0, -1(a1)
627 	sb	t0, -1(a0)
660 LEAF(memcpy)					/* a0=dst a1=src a2=len */
670 	/* Legacy Mode, user <-> user */
680  * virtual <-> physical translation when a virtual address is actually in user