memset.S - OpenGrok cross reference for /Linux-v5.10/arch/ia64/lib/memset.S

Lines Matching +full:many +full:- +full:to +full:- +full:one
1 /* SPDX-License-Identifier: GPL-2.0 */
4    Copyright (c) 2002 Hewlett-Packard Co/CERN
15    we get to a 16B-aligned address, then loop on 128 B chunks using an
18    Since a stf.spill f0 can store 16B in one go, we use this instruction
19    to get peak speed when value = 0.  */
42 // This routine uses only scratch predicate registers (p6 - p15)
43 #define p_scr		p6			// default register for same-cycle branches
72 	and	ptr2 = -(MIN1+1), dest		// aligned address
73 	and	tmp = MIN1, dest		// prepare to check for correct alignment
78 (p_scr)	br.ret.dpnt.many rp			// return immediately if count = 0
83 	sub	bytecnt = (MIN1+1), tmp		// NB: # of bytes to move is 1 higher than loopcnt
85 (p_scr)	br.cond.dptk.many .move_bytes_unaligned	// go move just a few (M_B_U)
93 (p_y)	add	cnt = -8, cnt			//
96 (p_y)	st8	[ptr2] = value,-4		//
100 (p_yy)	add	cnt = -4, cnt			//
103 (p_yy)	st4	[ptr2] = value,-2		//
108 (p_y)	add	cnt = -2, cnt			//
111 	setf.sig fvalue=value			// transfer value to FLP side
112 (p_y)	st2	[ptr2] = value,-1		//
120 (p_yy)	add	cnt = -1, cnt			//
121 (p_scr)	br.cond.dpnt.many .fraction_of_line	// go move just a few
127 (p_zr)	br.cond.dptk.many .l1b			// Jump to use stf.spill
130 	TEXT_ALIGN(32) // --------------------- //  L1A: store ahead into cache lines; fill later
132 	and	tmp = -(LINE_SIZE), cnt		// compute end of range
134 	and	cnt = (LINE_SIZE-1), cnt	// remainder
136 	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
140 (p_scr)	add	loopcnt = -1, linecnt		//
145 	add	tmp = -1, linecnt		// next loop count
150 	stf8 [ptr9] = fvalue, 128		// Do stores one cache line apart
195 (p_scr) br.cond.dpnt.many  .fraction_of_line	// Branch no. 2
196 	br.cond.dpnt.many  .move_bytes_from_alignment	// Branch no. 3
200 .l1b:	// ------------------------------------ //  L1B: store ahead into cache lines; fill later
202 	and	tmp = -(LINE_SIZE), cnt		// compute end of range
204 	and	cnt = (LINE_SIZE-1), cnt	// remainder
206 	mov	loopcnt = PREF_AHEAD-1		// default prefetch loop
210 (p_scr)	add	loopcnt = -1, linecnt
215 	add	tmp = -1, linecnt		// next loop count
220 	stf.spill [ptr9] = f0, 128		// Do stores one cache line apart
249 (p_scr)	br.cond.dpnt.many  .move_bytes_from_alignment	//
259 	add	loopcnt = -1, loopcnt
260 (p_scr)	br.cond.dpnt.many .store_words
267 .l2:	// ------------------------------------ //  L2A:  store 32B in 2 cycles
274 	br.cloop.dptk.many .l2
279 (p_scr)	br.cond.dpnt.many .move_bytes_from_alignment	// Branch
285 	add	cnt = -8, cnt			// subtract
290 (p_y)	add	cnt = -8, cnt			// subtract
294 (p_yy)	add	cnt = -8, cnt			// subtract
319 	br.ret.sptk.many rp
331 (p_y)	st1	[ptr1] = value, 1		// fill 1 (odd-aligned) byte [15, 14 (or less) left]
332 (p_y)	add	cnt = -1, cnt
341 (p_yy)	add	cnt = -4, cnt
345 	add	ptr3 = -1, ptr3			// last store
350 (p_y)	add	cnt = -4, cnt
357 (p_yy)	add	cnt = -4, cnt
362 	br.ret.sptk.many rp