Lines Matching +full:1 +full:- +full:16
1 /* SPDX-License-Identifier: GPL-2.0-only */
5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
61 .arch armv8-a+crypto
64 pmull \rd\().1q, \rn\().1d, \rm\().1d
68 pmull2 \rd\().1q, \rn\().2d, \rm\().2d
72 ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
80 tbl t3.16b, {\ad\().16b}, perm1.16b // A1
81 tbl t5.16b, {\ad\().16b}, perm2.16b // A2
82 tbl t7.16b, {\ad\().16b}, perm3.16b // A3
96 __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
109 eor t3.16b, t3.16b, t4.16b // L = E + F
110 eor t5.16b, t5.16b, t6.16b // M = G + H
111 eor t7.16b, t7.16b, t8.16b // N = I + J
119 // t5 = (M) (P2 + P3) << 16
120 eor t4.16b, t4.16b, t3.16b
121 and t3.16b, t3.16b, k32_48.16b
125 eor t6.16b, t6.16b, t7.16b
126 and t7.16b, t7.16b, k00_16.16b
128 eor t4.16b, t4.16b, t3.16b
129 eor t6.16b, t6.16b, t7.16b
136 ext t3.16b, t3.16b, t3.16b, #15
137 ext t5.16b, t5.16b, t5.16b, #14
138 ext t7.16b, t7.16b, t7.16b, #13
139 ext t9.16b, t9.16b, t9.16b, #12
141 eor t3.16b, t3.16b, t5.16b
142 eor t7.16b, t7.16b, t9.16b
143 eor \rq\().16b, \rq\().16b, t3.16b
144 eor \rq\().16b, \rq\().16b, t7.16b
148 add x8, x3, #16
149 ld1 {HH.2d-HH4.2d}, [x8]
153 eor SHASH2.16b, SHASH2.16b, T1.16b
157 eor HH34.16b, HH34.16b, T1.16b
159 movi MASK.16b, #0xe1
164 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
165 eor SHASH2.16b, SHASH2.16b, SHASH.16b
177 eor perm1.16b, perm1.16b, T1.16b
179 ushr perm3.2d, perm1.2d, #16
186 tbl sh1.16b, {SHASH.16b}, perm1.16b
187 tbl sh2.16b, {SHASH.16b}, perm2.16b
188 tbl sh3.16b, {SHASH.16b}, perm3.16b
189 tbl sh4.16b, {SHASH.16b}, T1.16b
190 ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
197 // PMULL (64x64->128) based reduction for CPUs that can do
201 pmull T2.1q, XL.1d, MASK.1d
202 eor XM.16b, XM.16b, T1.16b
204 mov XH.d[0], XM.d[1]
205 mov XM.d[1], XL.d[0]
207 eor XL.16b, XM.16b, T2.16b
208 ext T2.16b, XL.16b, XL.16b, #8
209 pmull XL.1q, XL.1d, MASK.1d
214 // 64x64->128 PMULL instruction
217 eor XM.16b, XM.16b, T1.16b
219 mov XL.d[1], XM.d[0]
220 mov XH.d[0], XM.d[1]
224 eor T2.16b, T2.16b, T1.16b
226 eor T2.16b, T2.16b, T1.16b
227 ext T1.16b, XL.16b, XH.16b, #8
228 eor T2.16b, T2.16b, T1.16b
230 mov XL.d[1], T2.d[0]
231 mov XH.d[0], T2.d[1]
233 ushr T2.2d, XL.2d, #1
234 eor XH.16b, XH.16b, XL.16b
235 eor XL.16b, XL.16b, T2.16b
237 ushr XL.2d, XL.2d, #1
254 tbnz w0, #1, 2f // round multiple of 4
256 1: ld1 {XM3.16b-TT4.16b}, [x2], #64
260 rev64 T1.16b, XM3.16b
261 rev64 T2.16b, XH3.16b
262 rev64 TT4.16b, TT4.16b
263 rev64 TT3.16b, TT3.16b
265 ext IN1.16b, TT4.16b, TT4.16b, #8
266 ext XL3.16b, TT3.16b, TT3.16b, #8
268 eor TT4.16b, TT4.16b, IN1.16b
269 pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
270 pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
271 pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
273 eor TT3.16b, TT3.16b, XL3.16b
274 pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1
275 pmull XL3.1q, HH.1d, XL3.1d // a0 * b0
276 pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
278 ext IN1.16b, T2.16b, T2.16b, #8
279 eor XL2.16b, XL2.16b, XL3.16b
280 eor XH2.16b, XH2.16b, XH3.16b
281 eor XM2.16b, XM2.16b, XM3.16b
283 eor T2.16b, T2.16b, IN1.16b
284 pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1
285 pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0
286 pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
288 eor XL2.16b, XL2.16b, XL3.16b
289 eor XH2.16b, XH2.16b, XH3.16b
290 eor XM2.16b, XM2.16b, XM3.16b
292 ext IN1.16b, T1.16b, T1.16b, #8
293 ext TT3.16b, XL.16b, XL.16b, #8
294 eor XL.16b, XL.16b, IN1.16b
295 eor T1.16b, T1.16b, TT3.16b
297 pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1
298 eor T1.16b, T1.16b, XL.16b
299 pmull XL.1q, HH4.1d, XL.1d // a0 * b0
300 pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
302 eor XL.16b, XL.16b, XL2.16b
303 eor XH.16b, XH.16b, XH2.16b
304 eor XM.16b, XM.16b, XM2.16b
306 eor T2.16b, XL.16b, XH.16b
307 ext T1.16b, XL.16b, XH.16b, #8
308 eor XM.16b, XM.16b, T2.16b
312 eor T2.16b, T2.16b, XH.16b
313 eor XL.16b, XL.16b, T2.16b
316 b 1b
319 2: ld1 {T1.2d}, [x2], #16
320 sub w0, w0, #1
323 CPU_LE( rev64 T1.16b, T1.16b )
325 ext T2.16b, XL.16b, XL.16b, #8
326 ext IN1.16b, T1.16b, T1.16b, #8
327 eor T1.16b, T1.16b, T2.16b
328 eor XL.16b, XL.16b, IN1.16b
331 eor T1.16b, T1.16b, XL.16b
335 4: eor T2.16b, XL.16b, XH.16b
336 ext T1.16b, XL.16b, XH.16b, #8
337 eor XM.16b, XM.16b, T2.16b
341 eor T2.16b, T2.16b, XH.16b
342 eor XL.16b, XL.16b, T2.16b
388 ld1 {K0.4s-K3.4s}, [\rk]
389 ld1 {K4.4s-K5.4s}, [\tmp]
392 ld1 {KK.4s-KM.4s}, [\tmp]
396 aese \state\().16b, \key\().16b
397 aesmc \state\().16b, \state\().16b
409 ld1 {K6.4s-K7.4s}, [\tmp], #32
421 aese \state\().16b, KL.16b
422 eor \state\().16b, \state\().16b, KM.16b
424 .subsection 1
426 ld1 {K8.4s-K9.4s}, [\tmp], #32
429 ld1 {K6.4s-K7.4s}, [\tmp]
432 tbz \rounds, #1, .Lout192_\@
439 stp x29, x30, [sp, #-32]!
445 ld1 {SHASH.2d}, [x3], #16
446 ld1 {HH.2d-HH4.2d}, [x3]
450 eor SHASH2.16b, SHASH2.16b, T1.16b
454 eor HH34.16b, HH34.16b, T1.16b
471 bmi 1f
472 ld1 {INP0.16b-INP3.16b}, [x2], #64
473 .subsection 1
479 * 1 byte | | | |x |
480 * 16 bytes | | | |xxxxxxxx|
488 * input size is < 16 bytes)
490 1: mov x15, #16
493 adr_l x17, .Lpermute_table + 16
498 ld1 {T1.16b}, [x12]
502 cmp x0, #-16
504 cmp x0, #-32
506 cmp x0, #-48
511 ld1 {INP0.16b}, [x2], x14
512 ld1 {INP1.16b}, [x2], x15
513 ld1 {INP2.16b}, [x2], x16
514 ld1 {INP3.16b}, [x2]
515 tbl INP3.16b, {INP3.16b}, T1.16b
526 st1 {INP0.16b-INP3.16b}, [x1], #64
527 .if \enc == 1
535 ld1 {INP3.16b}, [x10] // load lengths[]
536 mov w9, #1
539 mov w11, #(0x1 << 24) // BE '1U'
540 ld1 {KS0.16b}, [x5]
545 ext XL.16b, XL.16b, XL.16b, #8
546 rev64 XL.16b, XL.16b
547 eor XL.16b, XL.16b, KS0.16b
549 .if \enc == 1
550 st1 {XL.16b}, [x10] // store tag
554 ld1 {KS0.16b}, [x11] // load supplied tag
556 ld1 {KS1.16b}, [x17] // load permute vector
558 cmeq XL.16b, XL.16b, KS0.16b // compare tags
559 mvn XL.16b, XL.16b // -1 for fail, 0 for pass
560 tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only
561 sminv b0, XL.16b // signed minimum across XL
574 6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
575 sub x17, x17, x19, lsl #1
577 cmp w9, #1
579 .subsection 1
580 7: ld1 {INP2.16b}, [x1]
581 tbx INP2.16b, {INP3.16b}, T1.16b
582 mov INP3.16b, INP2.16b
586 st1 {INP0.16b}, [x1], x14
587 st1 {INP1.16b}, [x1], x15
588 st1 {INP2.16b}, [x1], x16
589 tbl INP3.16b, {INP3.16b}, T1.16b
590 tbx INP3.16b, {INP2.16b}, T2.16b
591 8: st1 {INP3.16b}, [x1]
593 .if \enc == 1
594 ld1 {T1.16b}, [x17]
595 tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
607 pmull_gcm_do_crypt 1
620 movi MASK.16b, #0xe1
623 rev64 T1.16b, INP0.16b
624 rev64 T2.16b, INP1.16b
625 rev64 TT3.16b, INP2.16b
626 rev64 TT4.16b, INP3.16b
628 ext XL.16b, XL.16b, XL.16b, #8
631 .subsection 1
632 0: movi XH2.16b, #0
633 movi XM2.16b, #0
634 movi XL2.16b, #0
636 tbz w9, #0, 1f // 2 blocks?
637 tbz w9, #1, 2f // 1 block?
639 eor T2.16b, T2.16b, XL.16b
640 ext T1.16b, T2.16b, T2.16b, #8
643 1: eor TT3.16b, TT3.16b, XL.16b
644 ext T2.16b, TT3.16b, TT3.16b, #8
647 2: eor TT4.16b, TT4.16b, XL.16b
648 ext IN1.16b, TT4.16b, TT4.16b, #8
652 eor T1.16b, T1.16b, XL.16b
653 ext IN1.16b, T1.16b, T1.16b, #8
655 pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
656 eor T1.16b, T1.16b, IN1.16b
657 pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
658 pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
660 ext T1.16b, T2.16b, T2.16b, #8
661 .Lgh3: eor T2.16b, T2.16b, T1.16b
662 pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
663 pmull XL.1q, HH3.1d, T1.1d // a0 * b0
664 pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
666 eor XH2.16b, XH2.16b, XH.16b
667 eor XL2.16b, XL2.16b, XL.16b
668 eor XM2.16b, XM2.16b, XM.16b
670 ext T2.16b, TT3.16b, TT3.16b, #8
671 .Lgh2: eor TT3.16b, TT3.16b, T2.16b
672 pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
673 pmull XL.1q, HH.1d, T2.1d // a0 * b0
674 pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
676 eor XH2.16b, XH2.16b, XH.16b
677 eor XL2.16b, XL2.16b, XL.16b
678 eor XM2.16b, XM2.16b, XM.16b
680 ext IN1.16b, TT4.16b, TT4.16b, #8
681 .Lgh1: eor TT4.16b, TT4.16b, IN1.16b
682 pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
683 pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
684 pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
686 eor XH.16b, XH.16b, XH2.16b
687 eor XL.16b, XL.16b, XL2.16b
688 eor XM.16b, XM.16b, XM2.16b
690 eor T2.16b, XL.16b, XH.16b
691 ext T1.16b, XL.16b, XH.16b, #8
692 eor XM.16b, XM.16b, T2.16b
696 eor T2.16b, T2.16b, XH.16b
697 eor XL.16b, XL.16b, T2.16b
703 ld1 {KS0.16b}, [x5] // load upper counter
707 sub w13, w8, #1
712 mov KS1.16b, KS0.16b
713 mov KS2.16b, KS0.16b
714 mov KS3.16b, KS0.16b
721 ld1 {K6.4s-K7.4s}, [x10], #32
727 .subsection 1
729 ld1 {K8.4s-K9.4s}, [x10], #32
733 ld1 {K6.4s-K7.4s}, [x10]
737 tbz x7, #1, .Lout192
749 aese KS0.16b, KL.16b
750 aese KS1.16b, KL.16b
751 aese KS2.16b, KL.16b
752 aese KS3.16b, KL.16b
754 eor KS0.16b, KS0.16b, KM.16b
755 eor KS1.16b, KS1.16b, KM.16b
756 eor KS2.16b, KS2.16b, KM.16b
757 eor KS3.16b, KS3.16b, KM.16b
759 eor INP0.16b, INP0.16b, KS0.16b
760 eor INP1.16b, INP1.16b, KS1.16b
761 eor INP2.16b, INP2.16b, KS2.16b
762 eor INP3.16b, INP3.16b, KS3.16b