Lines Matching +full:2 +full:- +full:5
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
28 # setup r^4, r^3, r^2, r vectors
29 # vs [r^1, r^3, r^2, r^4]
35 # vs5 = [r1*5,...]
36 # vs6 = [r2*5,...]
37 # vs7 = [r2*5,...]
38 # vs8 = [r4*5,...]
42 # r0, r4*5, r3*5, r2*5, r1*5;
43 # r1, r0, r4*5, r3*5, r2*5;
44 # r2, r1, r0, r4*5, r3*5;
45 # r3, r2, r1, r0, r4*5;
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
95 stdu 1,-752(1)
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
210 # p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5;
211 # p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5;
212 # p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5;
215 # [r^2, r^3, r^1, r^4]
221 vmulouw 10, 5, 3
222 vmulouw 11, 6, 2
228 vmulouw 10, 5, 26
234 vmulouw 12, 7, 2
239 vmulouw 10, 5, 27
244 vmulouw 13, 8, 2
248 vmulouw 10, 5, 28
257 vmulouw 10, 5, 29
269 vmuleuw 10, 5, 3
270 vmuleuw 11, 6, 2
280 vmuleuw 10, 5, 26
282 vmuleuw 12, 7, 2
291 vmuleuw 10, 5, 27
294 vmuleuw 13, 8, 2
302 vmuleuw 10, 5, 28
313 vmuleuw 10, 5, 29
327 # setup r^4, r^3, r^2, r vectors
328 # [r, r^3, r^2, r^4]
334 # vs5 = [r4*5,...]
335 # vs6 = [r3*5,...]
336 # vs7 = [r2*5,...]
337 # vs8 = [r1*5,...]
339 # r0, r4*5, r3*5, r2*5, r1*5;
340 # r1, r0, r4*5, r3*5, r2*5;
341 # r2, r1, r0, r4*5, r3*5;
342 # r3, r2, r1, r0, r4*5;
356 # [r, r^3, r^2, r^4]
357 # compute r^2
359 vmr 5, 27
363 bl do_mul # r^2 r^1
374 vspltisb 13, 2
381 vaddudm 2, 11, 29
386 vmrgow 27, 27, 5
390 vspltisb 13, 2
397 vaddudm 2, 11, 29
400 # r^2 r^4
403 xxlor 2, 60, 60
406 xxlor 5, 32, 32
412 vspltw 10, 26, 2
415 vspltw 10, 27, 2
418 vspltw 10, 28, 2
421 vspltw 10, 29, 2
424 vspltw 10, 30, 2
433 vaddudm 2, 11, 29
442 vspltisb 9, 2
453 vand 5, 15, 25
467 vaddudm 5, 5, 10
476 addis 10, 2, rmask@toc@ha
484 addis 10, 2, cnum@toc@ha
486 lvx 25, 0, 10 # v25 - mask
512 # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5
513 li 9, 5
517 vmulouw 2, 29, 4 # v2 = rr2
529 .align 5
530 cmpdi 5, 64
585 vaddudm 21, 5, 10
611 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
613 vmrgow 5, 10, 21
619 addi 5, 5, -64 # len -= 64
623 divdu 31, 5, 9
632 # h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
655 vand 5, 15, 25
669 vaddudm 5, 5, 10
716 vaddudm 5, 5, 21
721 # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1]
723 vmrgow 5, 10, 5
729 addi 5, 5, -64 # len -= 64
737 xxlor 60, 2, 2
740 xxlor 32, 5, 5
754 vaddudm 5, 15, 10
767 vspltisb 9, 2
774 vaddudm 5, 5, 10
776 vsrd 11, 5, 31
778 vand 5, 5, 25
792 vaddudm 5, 5, 10
793 vsrd 10, 5, 31
794 vand 5, 5, 25
803 vsld 5, 5, 31
804 vor 20, 4, 5
813 mfvsrld 16, 40 # save last 2 bytes
844 addis 10, 2, rmask@toc@ha
856 srdi 21, 10, 2
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
863 mtvsrdd 32+2, 19, 25 # s1
882 vmsumudm 10, 8, 2, 11 # d1 += h2 * s1
907 add 23, 23, 22 # (h2 & 3) * 5
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
931 stdu 1,-400(1)
966 divdu 31, 5, 30
1039 # h + 5 + (-p)
1043 addic. 6, 6, 5
1046 srdi 9, 8, 2 # overflow?
1060 std 10, 0(5)
1061 std 11, 8(5)
1066 .align 5