Lines Matching +full:1 +full:- +full:v0

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
35 ld1 {v16.16b-v19.16b}, [x5], #64; \
36 ld1 {v20.16b-v23.16b}, [x5], #64; \
37 ld1 {v24.16b-v27.16b}, [x5], #64; \
38 ld1 {v28.16b-v31.16b}, [x5];
103 /* sbox, non-linear part */ \
105 tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
107 tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
109 tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
111 tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
117 sri RTMP1.4s, RTMP0.4s, #(32-8); \
118 sri RTMP2.4s, RTMP0.4s, #(32-16); \
119 sri RTMP3.4s, RTMP0.4s, #(32-24); \
126 sri RTMP2.4s, RTMP1.4s, #(32-2); \
135 subs x6, x6, #1; \
138 ROUND4(1, b1, b2, b3, b0); \
172 /* sbox, non-linear part */ \
174 tbl RTMP0.16b, {v16.16b-v19.16b}, RX0.16b; \
175 tbl RTMP1.16b, {v16.16b-v19.16b}, RX1.16b; \
178 tbx RTMP0.16b, {v20.16b-v23.16b}, RX0.16b; \
179 tbx RTMP1.16b, {v20.16b-v23.16b}, RX1.16b; \
182 tbx RTMP0.16b, {v24.16b-v27.16b}, RX0.16b; \
183 tbx RTMP1.16b, {v24.16b-v27.16b}, RX1.16b; \
186 tbx RTMP0.16b, {v28.16b-v31.16b}, RX0.16b; \
187 tbx RTMP1.16b, {v28.16b-v31.16b}, RX1.16b; \
194 sri RX0.4s, RTMP0.4s, #(32 - 8); \
195 sri RX1.4s, RTMP1.4s, #(32 - 8); \
196 sri RTMP2.4s, RTMP0.4s, #(32 - 16); \
197 sri RTMP3.4s, RTMP1.4s, #(32 - 16); \
203 /* RTMP0/1 ^= x ^ rol32(x, 24) ^ rol32(RX, 2) */ \
206 sri RTMP2.4s, RTMP0.4s, #(32 - 24); \
207 sri RTMP3.4s, RTMP1.4s, #(32 - 24); \
212 sri RTMP2.4s, RX0.4s, #(32 - 2); \
213 sri RTMP3.4s, RX1.4s, #(32 - 2); \
216 /* s0/t0 ^= RTMP0/1 */ \
233 subs x6, x6, #1; \
236 ROUND8(1, b1, b2, b3, b0, b5, b6, b7, b4); \
273 ld4 {v0.4s-v3.4s}, [x2], #64
274 ld4 {v4.4s-v7.4s}, [x2], #64
276 SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
278 st1 {v0.16b-v3.16b}, [x1], #64
279 st1 {v4.16b-v7.16b}, [x1], #64
291 ld4 {v0.4s-v3.4s}, [x2], #64
293 SM4_CRYPT_BLK4(v0, v1, v2, v3)
295 st1 {v0.16b-v3.16b}, [x1], #64
301 ld1 {v0.16b}, [x2], #16
308 transpose_4x4(v0, v1, v2, v3)
310 SM4_CRYPT_BLK4(v0, v1, v2, v3)
313 st1 {v0.16b}, [x1], #16
340 ld4 {v0.4s-v3.4s}, [x2], #64
341 ld4 {v4.4s-v7.4s}, [x2]
343 SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7)
346 rotate_clockwise_4x4(v0, v1, v2, v3)
351 eor v0.16b, v0.16b, RIV.16b
353 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
354 ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
366 st1 {v0.16b-v3.16b}, [x1], #64
367 st1 {v4.16b-v7.16b}, [x1], #64
379 ld1 {v0.16b-v3.16b}, [x2], #64
381 rev32 v4.16b, v0.16b
391 eor v5.16b, v5.16b, v0.16b
397 st1 {v4.16b-v7.16b}, [x1], #64
403 ld1 {v0.16b}, [x2], #16
410 rev32 v4.16b, v0.16b
420 mov RIV.16b, v0.16b
424 eor v5.16b, v5.16b, v0.16b
451 ld1 {v0.16b}, [x3]
457 ld1 {v1.16b-v3.16b}, [x2], #48
458 ld4 {v4.4s-v7.4s}, [x2]
460 transpose_4x4(v0, v1, v2, v3)
462 SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
465 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
466 ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
468 eor v0.16b, v0.16b, RTMP0.16b
477 st1 {v0.16b-v3.16b}, [x1], #64
478 st1 {v4.16b-v7.16b}, [x1], #64
480 mov v0.16b, RTMP7.16b
492 ld1 {v4.16b-v7.16b}, [x2], #64
494 rev32 v0.16b, v0.16b /* v0 is IV register */
499 transpose_4x4(v0, v1, v2, v3)
501 SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
503 eor v0.16b, v0.16b, v4.16b
508 st1 {v0.16b-v3.16b}, [x1], #64
510 mov v0.16b, v7.16b
523 rev32 v0.16b, v0.16b /* v0 is IV register */
527 transpose_4x4(v0, v1, v2, v3)
529 SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
532 eor v0.16b, v0.16b, v4.16b
533 st1 {v0.16b}, [x1], #16
534 mov v0.16b, v4.16b
539 mov v0.16b, v5.16b
544 mov v0.16b, v6.16b
548 st1 {v0.16b}, [x3]
573 mov vctr.d[1], x8; \
575 adds x8, x8, #1; \
580 inc_le128(v0) /* +0 */
581 inc_le128(v1) /* +1 */
589 transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7)
591 SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
593 ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64
594 ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64
596 eor v0.16b, v0.16b, RTMP0.16b
605 st1 {v0.16b-v3.16b}, [x1], #64
606 st1 {v4.16b-v7.16b}, [x1], #64
619 inc_le128(v0) /* +0 */
620 inc_le128(v1) /* +1 */
624 ld1 {v4.16b-v7.16b}, [x2], #64
626 transpose_4x4(v0, v1, v2, v3)
628 SM4_CRYPT_BLK4(v0, v1, v2, v3)
630 eor v0.16b, v0.16b, v4.16b
635 st1 {v0.16b-v3.16b}, [x1], #64
642 inc_le128(v0)
655 transpose_4x4(v0, v1, v2, v3)
657 SM4_CRYPT_BLK4(v0, v1, v2, v3)
661 eor v0.16b, v0.16b, v4.16b
662 st1 {v0.16b}, [x1], #16