Lines Matching +full:64 +full:- +full:byte

15  * Fast assembler language version of the following C-program for memcpy
16 * which represents the `standard' for the C-library.
26 * } while (--n != 0);
49 * if dst on byte 1
51 * if dst on byte 3
52 * load words, shift 1 byte, store words; branch to finish_up
60 * copy with ldx/stx in 8-way unrolled loop;
61 * copy final 0-63 bytes; exit with dst addr
63 * align dst on 64 byte boundary; for main data movement:
66 * lines from memory. But pre-store first element of each cache line
74 * move words in 8-way unrolled loop
75 * move final 0-31 bytes; exit with dst addr
77 * use alignaddr/faligndata combined with ldd/std in 8-way
82 * align dst on 64 byte boundary; prefetch src data to L1 cache
83 * loadx8, falign, block-store, prefetch loop
84 * (only use block-init-store when src/dst on 8 byte boundaries.)
165 #define BLOCK_SIZE 64
168 #define SHORT_LONG 64 /* max copy for short longword-aligned case */
169 /* must be at least 64 */
171 #define MED_UMAX 1024 /* max copy for medium un-aligned case */
172 #define MED_WMAX 1024 /* max copy for medium word-aligned case */
173 #define MED_MAX 1024 /* max copy for medium longword-aligned case */
174 #define ST_CHUNK 24 /* ST_CHUNK - block of values for BIS Store */
201 andcc %o5, 7, %o5 ! bytes till DST 8 byte aligned
209 EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5) ! load one byte
233 ble,pn %xcc, .Lmedl63 ! skip big loop if less than 64 bytes
237 subcc %o2, 64, %o2 ! decrement length count
239 EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56) ! a block of 64
247 EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64
248 add %o1, 64, %o1 ! increase src ptr by 64
250 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16)
251 add %o0, 64, %o0 ! increase dst ptr by 64
252 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16)
253 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8)
254 bgu,pt %xcc, .Lmedl64 ! repeat if at least 64 bytes left
255 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8)
266 EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
268 EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16)
269 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8)
270 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8)
279 EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8)
281 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8)
293 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8) ! and store 8
299 ! DST is 8-byte aligned, src is not
333 EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
335 EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16)
339 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8)
341 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8)
344 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8)
360 EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8)
363 EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8)
365 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8)
375 EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4) ! load 4 bytes
377 EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
388 EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
393 .Llarge_align8_copy: ! Src and dst share 8 byte alignment
394 ! align dst to 64 byte boundary
395 andcc %o0, 0x3f, %o3 ! %o3 == 0 means dst is 64 byte aligned
404 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
413 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
415 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
428 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
430 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
458 cmp %o5, ST_CHUNK*64
465 add %o1, 64, %o1
472 sllx %o3, 6, %o4 ! ST_CHUNK*64
494 add %o1, 64, %o1
498 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5)
499 sub %o5, 64, %o5
504 cmp %o5, ST_CHUNK*64
518 subcc %o5, 64, %o5
526 add %o1, 64, %o1
528 add %o0, 64, %o0
529 EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64)
541 ! Dst is on 8 byte boundary; src is not; remaining count > SMALL_MAX
561 add %o2, 64, %o2 ! adjust to leave loop
562 sub %o5, 64, %o5 ! early if necessary
601 andcc %o0, 0x3f, %o3 ! is dst 64-byte block aligned?
603 sub %o3, 64, %o3 ! %o3 will be multiple of 8
604 neg %o3 ! bytes until dest is 64 byte aligned
608 bnz %xcc, .Lunalignbyte ! check for byte alignment
619 EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4
622 EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4)
626 ! Src is half-word aligned
643 ! Src is Byte aligned
668 ! Destination is now block (64 byte aligned)
672 add %o2, 64, %o2 ! Insure we do not load beyond
673 sub %o5, 64, %o5 ! end of source buffer
689 subcc %o5, 64, %o5
702 add %o4, 64, %o4
703 EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40)
707 add %o0, 64, %o0
708 EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40)
714 ! Handle trailing bytes, 64 to 127
764 andncc %o2, 0x20 - 1, %o5
787 EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
797 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4)
801 /* First get dest 8 byte aligned. */
812 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
817 mov 64, %g2
822 andn %o2, 0x08 - 1, %o5
856 andn %o2, 0x4 - 1, %o5
864 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
875 EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1)
886 EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_3)! read byte
888 EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_7)! write byte & repeat
892 EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5)
894 EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5)
895 EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4)
897 EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4)
902 EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_1) ! load one byte
904 EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_1) ! store one byte
905 EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2) ! load second byte
908 EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_1)! store second byte
909 EX_LD(LOAD(ldub, %o1+2, %o3), memcpy_retl_o2) ! load third byte
910 EX_ST(STORE(stb, %o3, %o0+2), memcpy_retl_o2) ! store third byte
923 .size FUNC_NAME, .-FUNC_NAME