Lines Matching +full:left +full:- +full:shifted
2 # Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
50 # /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
137 # While >= 128 data bytes remain (not counting xmm0-7), fold the 128
138 # bytes xmm0-7 into them, storing the result back into xmm0-7.
148 # Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
168 add $128-16, len
201 movdqu -16(buf, len), %xmm1
204 # xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
210 # xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
214 # xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
215 # then '16-len' bytes from xmm2 (high-order bytes).
226 # Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
232 # x^64. This produces a 128-bit value congruent to x^64 * M(x) and
239 # Fold the high 32 bits into the low 96 bits. This produces a 96-bit
326 # For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
327 # is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
328 # 0x80} XOR the index vector to shift right by '16 - len' bytes.