copy_user.S - OpenGrok cross reference for /Linux-v6.1/arch/ia64/lib/copy

Lines Matching +full:byte +full:- +full:len
1 /* SPDX-License-Identifier: GPL-2.0 */
23  * Copyright (C) 2000-2001 Hewlett-Packard Co
27  *	- handle the case where we have more than 16 bytes and the alignment
29  *	- more benchmarking
30  *	- fix extraneous stop bit introduced by the EX() macro.
39 #define COPY_BREAK	16	// we do byte copy below (must be >=16)
42 #define EPI		p[PIPE_DEPTH-1]
49 #define len		in2  macro
83 	adds len2=-1,len	// br.ctop is repeat/until
86 	;;			// RAW of cfm when len=0
87 	cmp.eq p8,p0=r0,len	// check for zero length
92 	add enddst=dst,len	// first byte after end of source
93 	add endsrc=src,len	// first byte after end of destination
105 	cmp.lt p10,p7=COPY_BREAK,len	// if len > COPY_BREAK then long copy
111 	// Now we do the byte by byte loop with software pipeline
116 	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
125 	// Not 8-byte aligned
133 	// The basic idea is that we copy byte-by-byte at the head so
134 	// that we can reach 8-byte alignment for both src1 and dst1.
135 	// Then copy the body using software pipelined 8-byte copy,
136 	// shifting the two back-to-back words right and left, then copy
137 	// the tail by copying byte-by-byte.
139 	// Fault handling. If the byte-by-byte at the head fails on the
142 	// If 8-byte software pipeline fails on the load, do the same as
143 	// failure_in3 does. If the byte-by-byte at the tail fails, it is
153 	// Optimization. If dst1 is 8-byte aligned (quite common), we don't need
154 	// to copy the head to dst1, to start 8-byte copy software pipeline.
155 	// We know src1 is not 8-byte aligned in this case.
164 	sub len1=len,t1					// set len1
175 (p14)	sub word1=8,src2				// (8 - src offset)
177 (p15)	sub word1=8,dst2				// (8 - dst offset)
184 	sub len1=len,word1				// resulting len
189 	adds cnt=-1,word1
198 	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
205 (p9)	br.cond.spnt 4f			// if (16 > len1) skip 8-byte copy
207 	shr.u cnt=len1,3		// number of 64-bit words
209 	adds cnt=-1,cnt
215 	// Now both src1 and dst1 point to an 8-byte aligned address. And
230 	// because we need 2 back-to-back val1[] to get tmp.
234 #define EPI_1		p[PIPE_DEPTH-2]
242 (EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
250 (EPI_1)	shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift;	\
259 	// Since the instruction 'shrp' requires a fixed 128-bit value
290 (p14)	adds dst1=-8,dst1
298 	// To fix that, we simply copy the tail byte by byte.
309 	EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
325 	mov len1=len		// copy because of rotation
331 	// forward slowly until we reach 16byte alignment: no need to
334 	EX(.failure_in1,(p6) ld1 val1[0]=[src1],1)	// 1-byte aligned
335 (p6)	adds len1=-1,len1;;
338 	EX(.failure_in1,(p7) ld2 val1[1]=[src1],2)	// 2-byte aligned
339 (p7)	adds len1=-2,len1;;
346 	EX(.failure_in1,(p8) ld4 val2[0]=[src1],4)	// 4-byte aligned
355 	EX(.failure_in1,(p9) ld8 val2[1]=[src1],8)	// 8-byte aligned
357 (p8)	adds len1=-4,len1
360 (p9)	adds len1=-8,len1;;
361 	shr.u cnt=len1,4		// number of 128-bit (2x64bit) words
366 	adds tmp=-1,cnt			// br.ctop is repeat/until
380 	EX(.failure_out, (EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16)
381 (EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
385 	// Tail correction based on len only
388 	// is 16 byte aligned AND we have less than 16 bytes to copy.
402 	EX(.failure_in1,(p9) ld1 val2[1]=[src1])	// only 1 byte left
416 	// Here we handle the case where the byte by byte copy fails
420 	//	- the pipeline: loads/stores are not in sync (pipeline)
425 	//	- pipeline effect
430 	//	- single/multi dispersal independence.
433 	//	- we don't disrupt the pipeline, i.e. data in transit in
443 (EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
452 	// This is the case where the byte by byte copy fails on the load
460 (EPI)	st1 [dst1]=val1[PIPE_DEPTH-1],1
463 	sub len=enddst,dst1,1		// precompute len
481 	//  ---------|-----
495 	// As we move towards eight byte alignment we may encounter faults.
499 	//	- if you fail on 1, 2, 4 then you have never executed any smaller
510 	//	- if you fail on the ld8 in the head, it means you went straight
511 	//	  to it, i.e. 8byte alignment within an unexisting page.
513 	// you are 8byte aligned but also 16byte align, therefore you would
514 	// either go for the 16byte copy loop OR the ld8 in the tail part.
517 	// would have defaulted to the byte by byte copy.
521 	// Here we now we have less than 16 bytes AND we are either 8 or 16 byte
526 	//		- are right on a page boundary
528 	//		- are at more than 16 bytes from a page boundary with
541 	sub len=endsrc,src1,1
551 	mov ar.lc=len		// Continue with a stupid byte store.
578 (EPI)	st8 [dst1]=val1[PIPE_DEPTH-1],16
579 (EPI)	st8 [dst2]=val2[PIPE_DEPTH-1],16
583 	sub len=enddst,dst1,1		// precompute len
594 	sub len=enddst,dst1,1		// precompute len