aesni-intel_avx-x86_64.S - OpenGrok cross reference for /Linux-v5.15/arch/x86/crypto/aesni-intel_avx-x86

Lines Matching +full:phase +full:- +full:shift
48 ##               Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
51 ##               Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
61 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
63 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
68 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
82 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
84 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
85 ##       |                     32-bit Sequence Number (A0)               |
86 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
88 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
90 ##                                       AAD Format with 32-bit Sequence Number
98 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
101 ##       |                 64-bit Extended Sequence Number {A1,A0}       |
103 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 ##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107 ##        AAD Format with 64-bit Extended Sequence Number
300         and     $-16, %r13                  # r13 = r13 - (r13 mod 16)
377         cmp     $(255-8), %r15d
440 						     # able to shift 16-r13 bytes (r13 is the
456         # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
461         # shift right 16-r13 bytes
468         vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
469 						     # mask out top 16-r13 bytes of xmm9
470         vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
478         vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
479 						     # mask out top 16-r13 bytes of xmm9
480         vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
515 # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
647 	/* finalize: shift out the extra bytes we read, and align
648 	left. since pslldq can only shift by an immediate, we use
727         mov -1(\DPTR, \DLEN, 1), %al
738 # Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
764         # adjust the shuffle mask pointer to be able to shift r13 bytes
765         # r16-r13 is the number of bytes in plaintext mod 16)
768         vpshufb %xmm2, %xmm9, %xmm9		# shift right r13 bytes
779         # shift mask accordingly
784         vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
815         # shift mask accordingly
820         vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
880 # Input: A and B (128-bits each, bit-reflected)
898         vpslldq         $8, \T2,\T3             # shift-L T3 2 DWs
899         vpsrldq         $8, \T2,\T2             # shift-R T2 2 DWs
903         #first phase of the reduction
905         vpslld  $30, \GH, \T3                   # packed right shifting shift << 30
906         vpslld  $25, \GH, \T4                   # packed right shifting shift << 25
911         vpsrldq $4, \T2, \T5                    # shift-R T5 1 DW
913         vpslldq $12, \T2, \T2                   # shift-L T2 3 DWs
914         vpxor   \T2, \GH, \GH                   # first phase of the reduction complete
916         #second phase of the reduction
992 	i = (8-\num_initial_blocks)
999 	i = (9-\num_initial_blocks)
1010 	i = (9-\num_initial_blocks)
1022 	i = (9-\num_initial_blocks)
1035 	i = (9-\num_initial_blocks)
1043 	i = (9-\num_initial_blocks)
1059 	i = (8-\num_initial_blocks)
1060 	j = (9-\num_initial_blocks)
1494 .rep (\REP-9)
1529 	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
1530 	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
1537 	#first phase of the reduction
1540         vpslld  $30, \T7, \T3                           # packed right shifting shift << 30
1541         vpslld  $25, \T7, \T4                           # packed right shifting shift << 25
1546         vpsrldq $4, \T2, \T1                            # shift-R T1 1 DW
1548         vpslldq $12, \T2, \T2                           # shift-L T2 3 DWs
1549         vpxor   \T2, \T7, \T7                           # first phase of the reduction complete
1563 	#second phase of the reduction
1723 				# the accumulated carry-less multiplications
1726         #first phase of the reduction
1728         vpslld  $30, \T7, \T3   # packed right shifting shift << 30
1729         vpslld  $25, \T7, \T4   # packed right shifting shift << 25
1734         vpsrldq $4, \T2, \T1    # shift-R T1 1 DW
1736         vpslldq $12, \T2, \T2   # shift-L T2 3 DWs
1737         vpxor   \T2, \T7, \T7   # first phase of the reduction complete
1741         #second phase of the reduction
1758 #        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
1759 #        u8      *iv, /* Pre-counter block j0: 4 byte salt
1762 #			concatenated with 0x00000001. 16-byte aligned pointer. */
1777 #        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
1806 #        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
1862 # Input: A and B (128-bits each, bit-reflected)
1876         vpsrldq         $8 , \GH, \T3          # shift-R GH 2 DWs
1877         vpslldq         $8 , \GH, \GH          # shift-L GH 2 DWs
1883         #first phase of the reduction
1887         vpslldq         $8, \T2, \T2           # shift-L T2 2 DWs
1889         vpxor           \T2, \GH, \GH          # first phase of the reduction complete
1891         #second phase of the reduction
1893 …vpsrldq         $4, \T2, \T2           # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-…
1896 …vpslldq         $4, \GH, \GH           # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no sh…
1898         vpxor           \T2, \GH, \GH          # second phase of the reduction complete
1940 	i = (8-\num_initial_blocks)
1947 	i = (9-\num_initial_blocks)
1958 	i = (9-\num_initial_blocks)
1970 	i = (9-\num_initial_blocks)
1984 	i = (9-\num_initial_blocks)
1992 	i = (9-\num_initial_blocks)
2009 	i = (8-\num_initial_blocks)
2010 	j = (9-\num_initial_blocks)
2452 .rep (\REP-9)
2486 	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
2487 	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
2494 	#first phase of the reduction
2498 	vpslldq		$8, \T2, \T2			# shift-L xmm2 2 DWs
2500 	vpxor		\T2, \T7, \T7			# first phase of the reduction complete
2514 	#second phase of the reduction
2516 	vpsrldq		$4, \T2, \T2			# shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
2519 	vpslldq		$4, \T4, \T4			# shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
2521 	vpxor		\T2, \T4, \T4			# second phase of the reduction complete
2695 						   # accumulated carry-less multiplications
2698         #first phase of the reduction
2702         vpslldq         $8, \T2, \T2               # shift-L xmm2 2 DWs
2704         vpxor           \T2, \T7, \T7              # first phase of the reduction complete
2708         #second phase of the reduction
2710 …vpsrldq         $4, \T2, \T2               # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs sh…
2713 …vpslldq         $4, \T4, \T4               # shift-L T4 1 DW (Shift-L 1-DW to obtain result with n…
2715         vpxor           \T2, \T4, \T4              # second phase of the reduction complete
2726 #        u8      *iv, /* Pre-counter block j0: 4 byte salt
2729 #			concatenated with 0x00000001. 16-byte aligned pointer. */
2730 #        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
2745 #        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
2774 #        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */