Lines Matching +full:6 +full:e +full:- +full:7
2 # Implement fast SHA-512 with SSSE3 instructions. (x86_64)
22 # - Redistributions of source code must retain the above
26 # - Redistributions in binary form must reproduce the above
42 # This code is described in an Intel White-Paper:
43 # "Fast SHA-512 Implementations on Intel Architecture Processors"
118 mov e_64, tmp0 # tmp = e
120 ror $23, tmp0 # 41 # tmp = e ror 23
121 and e_64, T1 # T1 = (f ^ g) & e
122 xor e_64, tmp0 # tmp = (e ror 23) ^ e
123 xor g_64, T1 # T1 = ((f ^ g) & e) ^ g = CH(e,f,g)
126 ror $4, tmp0 # 18 # tmp = ((e ror 23) ^ e) ror 4
127 xor e_64, tmp0 # tmp = (((e ror 23) ^ e) ror 4) ^ e
129 add h_64, T1 # T1 = CH(e,f,g) + W[t] + K[t] + h
130 ror $14, tmp0 # 14 # tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e)
131 add tmp0, T1 # T1 = CH(e,f,g) + W[t] + K[t] + S1(e)
140 add T1, d_64 # e(next_state) = d + T1
141 ror $6, tmp0 # 34 # tmp = ((a ror 5) ^ a) ror 6
142 xor a_64, tmp0 # tmp = (((a ror 5) ^ a) ror 6) ^ a
151 # Compute rounds t-2 and t-1
154 # Two rounds are computed based on the values for K[t-2]+W[t-2] and
155 # K[t-1]+W[t-1] which were previously stored at WK_2 by the message
162 # Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]}
164 # stitched to take advantage of instruction-level parallelism.
170 idx = \rnd -2
171 movdqa W_t(idx), %xmm2 # XMM2 = W[t-2]
174 movdqa %xmm2, %xmm0 # XMM0 = W[t-2]
178 idx = \rnd - 15
179 movdqu W_t(idx), %xmm5 # XMM5 = W[t-15]
182 movdqa %xmm5, %xmm3 # XMM3 = W[t-15]
185 psrlq $61-19, %xmm0 # XMM0 = W[t-2] >> 42
188 psrlq $(8-7), %xmm3 # XMM3 = W[t-15] >> 1
191 pxor %xmm2, %xmm0 # XMM0 = (W[t-2] >> 42) ^ W[t-2]
194 pxor %xmm5, %xmm3 # XMM3 = (W[t-15] >> 1) ^ W[t-15]
197 psrlq $(19-6), %xmm0 # XMM0 = ((W[t-2]>>42)^W[t-2])>>13
200 psrlq $(7-1), %xmm3 # XMM3 = ((W[t-15]>>1)^W[t-15])>>6
203 pxor %xmm2, %xmm0 # XMM0 = (((W[t-2]>>42)^W[t-2])>>13)^W[t-2]
205 ror $6, tmp0 # 34
206 pxor %xmm5, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]
209 psrlq $6, %xmm0 # XMM0 = ((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6
212 psrlq $1, %xmm3 # XMM3 = (((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1
215 movdqa %xmm2, %xmm1 # XMM1 = W[t-2]
218 movdqa %xmm5, %xmm4 # XMM4 = W[t-15]
221 psllq $(64-19)-(64-61) , %xmm1 # XMM1 = W[t-2] << 42
225 psllq $(64-1)-(64-8), %xmm4 # XMM4 = W[t-15] << 7
228 pxor %xmm2, %xmm1 # XMM1 = (W[t-2] << 42)^W[t-2]
231 pxor %xmm5, %xmm4 # XMM4 = (W[t-15]<<7)^W[t-15]
234 psllq $(64-61), %xmm1 # XMM1 = ((W[t-2] << 42)^W[t-2])<<3
237 psllq $(64-8), %xmm4 # XMM4 = ((W[t-15]<<7)^W[t-15])<<56
240 pxor %xmm1, %xmm0 # XMM0 = s1(W[t-2])
243 idx = \rnd - 7
244 movdqu W_t(idx), %xmm1 # XMM1 = W[t-7]
246 pxor %xmm4, %xmm3 # XMM3 = s0(W[t-15])
248 paddq %xmm3, %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15])
250 idx =\rnd-16
251 paddq W_t(idx), %xmm0 # XMM0 = s1(W[t-2]) + s0(W[t-15]) + W[t-16]
253 paddq %xmm1, %xmm0 # XMM0 = s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]
254 ror $6, tmp0 # 34
293 and $~(0x20 - 1), %rsp
304 mov DIGEST(6), g_64
305 mov DIGEST(7), h_64
323 SHA512_Round t-2 # Round t-2
326 SHA512_Round t-1 # Round t-1
333 SHA512_Round t-2
334 SHA512_Round t-1
346 add g_64, DIGEST(6)
347 add h_64, DIGEST(7)
374 # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
378 # Mergeable 640-byte rodata section. This allows linker to merge the table
379 # with other, exactly the same 640-byte fragment of another rodata section