Lines Matching full:32
51 32-way camellia
193 vpxor 5 * 32(mem_cd), x1, x1; \
200 vpxor 4 * 32(mem_cd), x0, x0; \
203 vpxor 6 * 32(mem_cd), x2, x2; \
206 vpxor 7 * 32(mem_cd), x3, x3; \
209 vpxor 0 * 32(mem_cd), x4, x4; \
212 vpxor 1 * 32(mem_cd), x5, x5; \
215 vpxor 2 * 32(mem_cd), x6, x6; \
218 vpxor 3 * 32(mem_cd), x7, x7;
251 vmovdqu x0, 4 * 32(mem_cd); \
252 vmovdqu x1, 5 * 32(mem_cd); \
253 vmovdqu x2, 6 * 32(mem_cd); \
254 vmovdqu x3, 7 * 32(mem_cd); \
255 vmovdqu x4, 0 * 32(mem_cd); \
256 vmovdqu x5, 1 * 32(mem_cd); \
257 vmovdqu x6, 2 * 32(mem_cd); \
258 vmovdqu x7, 3 * 32(mem_cd); \
269 vmovdqu x4, 4 * 32(mem_ab); \
270 vmovdqu x5, 5 * 32(mem_ab); \
271 vmovdqu x6, 6 * 32(mem_ab); \
272 vmovdqu x7, 7 * 32(mem_ab); \
273 vmovdqu x0, 0 * 32(mem_ab); \
274 vmovdqu x1, 1 * 32(mem_ab); \
275 vmovdqu x2, 2 * 32(mem_ab); \
276 vmovdqu x3, 3 * 32(mem_ab);
298 * v0..3: byte-sliced 32-bit integers
339 vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
357 vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
358 vmovdqu l4, 4 * 32(l); \
360 vmovdqu l5, 5 * 32(l); \
362 vmovdqu l6, 6 * 32(l); \
364 vmovdqu l7, 7 * 32(l); \
380 vpor 4 * 32(r), t0, t0; \
381 vpor 5 * 32(r), t1, t1; \
382 vpor 6 * 32(r), t2, t2; \
383 vpor 7 * 32(r), t3, t3; \
385 vpxor 0 * 32(r), t0, t0; \
386 vpxor 1 * 32(r), t1, t1; \
387 vpxor 2 * 32(r), t2, t2; \
388 vpxor 3 * 32(r), t3, t3; \
389 vmovdqu t0, 0 * 32(r); \
390 vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
391 vmovdqu t1, 1 * 32(r); \
392 vmovdqu t2, 2 * 32(r); \
393 vmovdqu t3, 3 * 32(r); \
408 vpand 0 * 32(r), t0, t0; \
409 vpand 1 * 32(r), t1, t1; \
410 vpand 2 * 32(r), t2, t2; \
411 vpand 3 * 32(r), t3, t3; \
415 vpxor 4 * 32(r), t0, t0; \
416 vpxor 5 * 32(r), t1, t1; \
417 vpxor 6 * 32(r), t2, t2; \
418 vpxor 7 * 32(r), t3, t3; \
419 vmovdqu t0, 4 * 32(r); \
420 vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
421 vmovdqu t1, 5 * 32(r); \
422 vmovdqu t2, 6 * 32(r); \
423 vmovdqu t3, 7 * 32(r); \
445 vmovdqu l0, 0 * 32(l); \
447 vmovdqu l1, 1 * 32(l); \
449 vmovdqu l2, 2 * 32(l); \
451 vmovdqu l3, 3 * 32(l);
521 vpxor 0 * 32(rio), x0, y7; \
522 vpxor 1 * 32(rio), x0, y6; \
523 vpxor 2 * 32(rio), x0, y5; \
524 vpxor 3 * 32(rio), x0, y4; \
525 vpxor 4 * 32(rio), x0, y3; \
526 vpxor 5 * 32(rio), x0, y2; \
527 vpxor 6 * 32(rio), x0, y1; \
528 vpxor 7 * 32(rio), x0, y0; \
529 vpxor 8 * 32(rio), x0, x7; \
530 vpxor 9 * 32(rio), x0, x6; \
531 vpxor 10 * 32(rio), x0, x5; \
532 vpxor 11 * 32(rio), x0, x4; \
533 vpxor 12 * 32(rio), x0, x3; \
534 vpxor 13 * 32(rio), x0, x2; \
535 vpxor 14 * 32(rio), x0, x1; \
536 vpxor 15 * 32(rio), x0, x0;
544 vmovdqu x0, 0 * 32(mem_ab); \
545 vmovdqu x1, 1 * 32(mem_ab); \
546 vmovdqu x2, 2 * 32(mem_ab); \
547 vmovdqu x3, 3 * 32(mem_ab); \
548 vmovdqu x4, 4 * 32(mem_ab); \
549 vmovdqu x5, 5 * 32(mem_ab); \
550 vmovdqu x6, 6 * 32(mem_ab); \
551 vmovdqu x7, 7 * 32(mem_ab); \
552 vmovdqu y0, 0 * 32(mem_cd); \
553 vmovdqu y1, 1 * 32(mem_cd); \
554 vmovdqu y2, 2 * 32(mem_cd); \
555 vmovdqu y3, 3 * 32(mem_cd); \
556 vmovdqu y4, 4 * 32(mem_cd); \
557 vmovdqu y5, 5 * 32(mem_cd); \
558 vmovdqu y6, 6 * 32(mem_cd); \
559 vmovdqu y7, 7 * 32(mem_cd);
591 vmovdqu x0, 0 * 32(rio); \
592 vmovdqu x1, 1 * 32(rio); \
593 vmovdqu x2, 2 * 32(rio); \
594 vmovdqu x3, 3 * 32(rio); \
595 vmovdqu x4, 4 * 32(rio); \
596 vmovdqu x5, 5 * 32(rio); \
597 vmovdqu x6, 6 * 32(rio); \
598 vmovdqu x7, 7 * 32(rio); \
599 vmovdqu y0, 8 * 32(rio); \
600 vmovdqu y1, 9 * 32(rio); \
601 vmovdqu y2, 10 * 32(rio); \
602 vmovdqu y3, 11 * 32(rio); \
603 vmovdqu y4, 12 * 32(rio); \
604 vmovdqu y5, 13 * 32(rio); \
605 vmovdqu y6, 14 * 32(rio); \
606 vmovdqu y7, 15 * 32(rio);
609 .section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
610 .align 32
617 .section .rodata.cst32.pack_bswap, "aM", @progbits, 32
618 .align 32
756 * %ymm0..%ymm15: 32 plaintext blocks
758 * %ymm0..%ymm15: 32 encrypted blocks, order swapped:
763 leaq 8 * 32(%rax), %rcx;
803 vmovdqu 0 * 32(%rcx), %ymm8;
804 vmovdqu 1 * 32(%rcx), %ymm9;
805 vmovdqu 2 * 32(%rcx), %ymm10;
806 vmovdqu 3 * 32(%rcx), %ymm11;
807 vmovdqu 4 * 32(%rcx), %ymm12;
808 vmovdqu 5 * 32(%rcx), %ymm13;
809 vmovdqu 6 * 32(%rcx), %ymm14;
810 vmovdqu 7 * 32(%rcx), %ymm15;
814 %ymm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 32(%rax));
821 movl $32, %r8d;
843 * %r8d: 24 for 16 byte key, 32 for larger
851 leaq 8 * 32(%rax), %rcx;
857 cmpl $32, %r8d;
890 vmovdqu 0 * 32(%rcx), %ymm8;
891 vmovdqu 1 * 32(%rcx), %ymm9;
892 vmovdqu 2 * 32(%rcx), %ymm10;
893 vmovdqu 3 * 32(%rcx), %ymm11;
894 vmovdqu 4 * 32(%rcx), %ymm12;
895 vmovdqu 5 * 32(%rcx), %ymm13;
896 vmovdqu 6 * 32(%rcx), %ymm14;
897 vmovdqu 7 * 32(%rcx), %ymm15;
901 %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
926 * %rsi: dst (32 blocks)
927 * %rdx: src (32 blocks)
955 * %rsi: dst (32 blocks)
956 * %rdx: src (32 blocks)
963 movl $32, %r8d;
989 * %rsi: dst (32 blocks)
990 * %rdx: src (32 blocks)
993 subq $(16 * 32), %rsp;
998 movl $32, %r8d;
1027 vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
1028 vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
1029 vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
1030 vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3;
1031 vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2;
1032 vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1;
1033 vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0;
1034 vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15;
1035 vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14;
1036 vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13;
1037 vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12;
1038 vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11;
1039 vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10;
1040 vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9;
1041 vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8;
1048 addq $(16 * 32), %rsp;