Lines Matching +full:src +full:- +full:2
15 #include <asm/asm-offsets.h>
75 #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ argument
76 LOAD _t0, (offset + UNIT(0))(src); \
77 LOAD _t1, (offset + UNIT(1))(src); \
78 LOAD _t2, (offset + UNIT(2))(src); \
79 LOAD _t3, (offset + UNIT(3))(src); \
86 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ argument
87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
89 #define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ argument
90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
100 #define src a0 macro
115 andi t7, src, 0x1 /* odd buffer? */
119 andi t8, src, 0x2
121 lbu t0, (src)
127 PTR_ADDU src, src, 0x1
128 andi t8, src, 0x2
134 lhu t0, (src)
138 PTR_ADDU src, src, 0x2
144 andi t8, src, 0x4
146 andi t8, src, 0x8
148 LOAD32 t0, 0x00(src)
151 PTR_ADDU src, src, 0x4
152 andi t8, src, 0x8
156 andi t8, src, 0x10
159 ld t0, 0x00(src)
163 lw t0, 0x00(src)
164 lw t1, 0x04(src)
169 PTR_ADDU src, src, 0x8
170 andi t8, src, 0x10
177 ld t0, 0x00(src)
178 ld t1, 0x08(src)
182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
185 PTR_ADDU src, src, 0x10
193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
199 PTR_ADDU src, src, 0x80
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
210 PTR_ADDU src, src, 0x40
217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
219 PTR_ADDU src, src, 0x20
227 LOAD32 t0, (src)
231 PTR_ADDU src, src, 0x4
235 /* unknown src alignment and < 8 bytes to go */
241 andi t0, a1, 2
244 ulw t1, (src)
245 PTR_ADDIU src, 4
256 ulhu t1, (src)
257 PTR_ADDIU src, 2
262 lbu t2, (src)
311 * csum_partial_copy_nocheck(src, dst, len)
312 * __csum_partial_copy_kernel(src, dst, len)
318 #define src a0 macro
330 #define ST_INSN 2
332 #define EVA_MODE 2
334 #define KERNELOP 2
407 #define LOG_NBYTES 2
428 #define REST(unit) (FIRST(unit)+NBYTES-1)
430 #define ADDRMASK (NBYTES-1)
440 li sum, -1
443 * Note: dst & src may be unaligned, len may be 0
456 and t0, src, ADDRMASK
462 * use delay slot for fall-through
463 * src and dst are aligned; need to compute rem
472 LOAD(t0, UNIT(0)(src))
473 LOAD(t1, UNIT(1)(src))
474 LOAD(t2, UNIT(2)(src))
475 LOAD(t3, UNIT(3)(src))
476 LOAD(t4, UNIT(4)(src))
477 LOAD(t5, UNIT(5)(src))
478 LOAD(t6, UNIT(6)(src))
479 LOAD(t7, UNIT(7)(src))
481 ADD src, src, 8*NBYTES
486 STORE(t2, UNIT(2)(dst))
512 and rem, len, (NBYTES-1) # rem = len % NBYTES
516 LOAD(t0, UNIT(0)(src))
517 LOAD(t1, UNIT(1)(src))
518 LOAD(t2, UNIT(2)(src))
519 LOAD(t3, UNIT(3)(src))
521 ADD src, src, 4*NBYTES
526 STORE(t2, UNIT(2)(dst))
541 LOAD(t0, 0(src))
542 ADD src, src, NBYTES
552 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
555 * because can't assume read-access to dst. Instead, use
559 * wide-issue mips processors because the code has fewer branches and
560 * more instruction-level parallelism.
567 LOAD(t0, 0(src))
570 STREST(t0, -1(t1))
579 * t0 = src & ADDRMASK
584 * Set match = (src and dst have same alignment)
587 LDFIRST(t3, FIRST(0)(src))
589 LDREST(t3, REST(0)(src))
601 ADD src, src, t2
604 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
606 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
612 * are to the same unit (unless src is aligned, but it's not).
614 LDFIRST(t0, FIRST(0)(src))
615 LDFIRST(t1, FIRST(1)(src))
617 LDREST(t0, REST(0)(src))
618 LDREST(t1, REST(1)(src))
619 LDFIRST(t2, FIRST(2)(src))
620 LDFIRST(t3, FIRST(3)(src))
621 LDREST(t2, REST(2)(src))
622 LDREST(t3, REST(3)(src))
623 ADD src, src, 4*NBYTES
631 STORE(t2, UNIT(2)(dst))
642 and rem, len, NBYTES-1 # rem = len % NBYTES
646 LDFIRST(t0, FIRST(0)(src))
647 LDREST(t0, REST(0)(src))
648 ADD src, src, NBYTES
666 #define SHIFT_START 8*(NBYTES-1)
667 #define SHIFT_INC -8
672 LOADBU(t0, N(src)); \
683 COPY_BYTE(2)
688 LOADBU(t0, NBYTES-2(src))
690 STOREB(t0, NBYTES-2(dst))