sha512-ssse3-asm.S - OpenGrok cross reference for /Linux-v5.15/arch/x86/crypto/sha512-ssse3-asm.S

Lines Matching +full:6 +full:e +full:- +full:7
2 # Implement fast SHA-512 with SSSE3 instructions. (x86_64)
22 #      - Redistributions of source code must retain the above
26 #      - Redistributions in binary form must reproduce the above
42 # This code is described in an Intel White-Paper:
43 # "Fast SHA-512 Implementations on Intel Architecture Processors"
118 	mov	e_64, tmp0        # tmp = e
120 	ror	$23, tmp0 # 41    # tmp = e ror 23
121 	and	e_64, T1          # T1 = (f ^ g) & e
122 	xor	e_64, tmp0        # tmp = (e ror 23) ^ e
123 	xor	g_64, T1          # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
126 	ror	$4, tmp0  # 18    # tmp = ((e ror 23) ^ e) ror 4
127 	xor	e_64, tmp0        # tmp = (((e ror 23) ^ e) ror 4) ^ e
129 	add	h_64, T1          # T1 = CH(e,f,g) + W[t] + K[t] + h
130 	ror	$14, tmp0 # 14    # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
131 	add	tmp0, T1          # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
140 	add	T1, d_64          # e(next_state) = d + T1
141 	ror	$6, tmp0 # 34     # tmp = ((a ror 5) ^ a) ror 6
142 	xor	a_64, tmp0        # tmp = (((a ror 5) ^ a) ror 6) ^ a
151 	# Compute rounds t-2 and t-1
154 	#   Two rounds are computed based on the values for K[t-2]+W[t-2] and
155 	# K[t-1]+W[t-1] which were previously stored at WK_2 by the message
162 	# Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]}
164 	# stitched to take advantage of instruction-level parallelism.
170 	idx = \rnd -2
171 	movdqa	W_t(idx), %xmm2		    # XMM2 = W[t-2]
174 	movdqa	%xmm2, %xmm0	            # XMM0 = W[t-2]
178 	idx = \rnd - 15
179 	movdqu	W_t(idx), %xmm5		    # XMM5 = W[t-15]
182 	movdqa	%xmm5, %xmm3	            # XMM3 = W[t-15]
185 	psrlq	$61-19, %xmm0		    # XMM0 = W[t-2] >> 42
188 	psrlq	$(8-7), %xmm3		    # XMM3 = W[t-15] >> 1
191 	pxor	%xmm2, %xmm0                # XMM0 = (W[t-2] >> 42) ^ W[t-2]
194 	pxor	%xmm5, %xmm3                # XMM3 = (W[t-15] >> 1) ^ W[t-15]
197 	psrlq	$(19-6), %xmm0		    # XMM0 = ((W[t-2]>>42)^W[t-2])>>13
200 	psrlq	$(7-1), %xmm3		    # XMM3 = ((W[t-15]>>1)^W[t-15])>>6
203 	pxor	%xmm2, %xmm0	            # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2]
205 	ror	$6, tmp0 # 34
206 	pxor	%xmm5, %xmm3                # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]
209 	psrlq	$6, %xmm0                   # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6
212 	psrlq	$1, %xmm3                   # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1
215 	movdqa	%xmm2, %xmm1	            # XMM1 = W[t-2]
218 	movdqa	%xmm5, %xmm4		    # XMM4 = W[t-15]
221 	psllq	$(64-19)-(64-61) , %xmm1    # XMM1 = W[t-2] << 42
225 	psllq	$(64-1)-(64-8), %xmm4	    # XMM4 = W[t-15] << 7
228 	pxor	%xmm2, %xmm1		    # XMM1 = (W[t-2] << 42)^W[t-2]
231 	pxor	%xmm5, %xmm4		    # XMM4 = (W[t-15]<<7)^W[t-15]
234 	psllq	$(64-61), %xmm1		    # XMM1 = ((W[t-2] << 42)^W[t-2])<<3
237 	psllq	$(64-8), %xmm4		    # XMM4 = ((W[t-15]<<7)^W[t-15])<<56
240 	pxor	%xmm1, %xmm0		    # XMM0 = s1(W[t-2])
243 	idx = \rnd - 7
244 	movdqu	W_t(idx), %xmm1		    # XMM1 = W[t-7]
246 	pxor	%xmm4, %xmm3                # XMM3 = s0(W[t-15])
248 	paddq	%xmm3, %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15])
250 	idx =\rnd-16
251 	paddq	W_t(idx), %xmm0		    # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16]
253 	paddq	%xmm1, %xmm0	            # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]
254 	ror	$6, tmp0 # 34
293 	and	$~(0x20 - 1), %rsp
304 	mov	DIGEST(6), g_64
305 	mov	DIGEST(7), h_64
323 			SHA512_Round t-2	# Round t-2
326 			SHA512_Round t-1	# Round t-1
333 			SHA512_Round t-2
334 			SHA512_Round t-1
346 	add	g_64, DIGEST(6)
347 	add	h_64, DIGEST(7)
374 # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
378 # Mergeable 640-byte rodata section. This allows linker to merge the table
379 # with other, exactly the same 640-byte fragment of another rodata section