Lines Matching +full:2 +full:d

87 	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
97 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
139 add x10,x10,x14,lsr#2
157 ldp x4,x5,[x0] // load hash base 2^64
175 add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
241 add x10,x10,x14,lsr#2
252 and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
260 add w12,w13,w13,lsl#2 // r1*5
262 add w13,w14,w14,lsl#2 // r2*5
263 str w12,[x0,#16*2] // s1
265 add w14,w15,w15,lsl#2 // r3*5
268 add w15,w16,w16,lsl#2 // r4*5
298 ldp w10,w11,[x0] // load hash value base 2^26
307 add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
318 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
330 and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
343 ldp x4,x5,[x0] // load hash value base 2^64
351 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
364 and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
382 add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
388 bl poly1305_mult // r^2
411 ldp x8,x12,[x1,#32] // inp[2:3]
426 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
462 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
481 movi v31.2d,#-1
485 ushr v31.2d,v31.2d,#38
492 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
493 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
495 // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
496 // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
499 // Note that we start with inp[2:3]*r^2. This is because it
509 umull v23.2d,v14.2s,v7.s[2]
511 umull v22.2d,v14.2s,v5.s[2]
512 umull v21.2d,v14.2s,v3.s[2]
513 ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
514 umull v20.2d,v14.2s,v1.s[2]
516 umull v19.2d,v14.2s,v0.s[2]
524 umlal v23.2d,v15.2s,v5.s[2]
525 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
526 umlal v22.2d,v15.2s,v3.s[2]
528 umlal v21.2d,v15.2s,v1.s[2]
530 umlal v20.2d,v15.2s,v0.s[2]
532 umlal v19.2d,v15.2s,v8.s[2]
535 umlal v23.2d,v16.2s,v3.s[2]
537 umlal v22.2d,v16.2s,v1.s[2]
539 umlal v21.2d,v16.2s,v0.s[2]
541 umlal v20.2d,v16.2s,v8.s[2]
543 umlal v19.2d,v16.2s,v6.s[2]
546 umlal v23.2d,v17.2s,v1.s[2]
548 umlal v22.2d,v17.2s,v0.s[2]
550 umlal v21.2d,v17.2s,v8.s[2]
552 umlal v20.2d,v17.2s,v6.s[2]
554 umlal v19.2d,v17.2s,v4.s[2]
557 add v11.2s,v11.2s,v26.2s
559 umlal v23.2d,v18.2s,v0.s[2]
561 umlal v22.2d,v18.2s,v8.s[2]
563 umlal v21.2d,v18.2s,v6.s[2]
565 umlal v20.2d,v18.2s,v4.s[2]
567 umlal v19.2d,v18.2s,v2.s[2]
573 add v9.2s,v9.2s,v24.2s
575 umlal v22.2d,v11.2s,v1.s[0]
577 umlal v19.2d,v11.2s,v6.s[0]
579 umlal v23.2d,v11.2s,v3.s[0]
580 umlal v20.2d,v11.2s,v8.s[0]
581 umlal v21.2d,v11.2s,v0.s[0]
589 add v10.2s,v10.2s,v25.2s
590 umlal v22.2d,v9.2s,v5.s[0]
591 umlal v23.2d,v9.2s,v7.s[0]
592 and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
593 umlal v21.2d,v9.2s,v3.s[0]
595 umlal v19.2d,v9.2s,v0.s[0]
597 umlal v20.2d,v9.2s,v1.s[0]
600 add v12.2s,v12.2s,v27.2s
602 umlal v22.2d,v10.2s,v3.s[0]
604 umlal v23.2d,v10.2s,v5.s[0]
606 umlal v19.2d,v10.2s,v8.s[0]
608 umlal v21.2d,v10.2s,v1.s[0]
610 umlal v20.2d,v10.2s,v0.s[0]
613 add v13.2s,v13.2s,v28.2s
615 umlal v22.2d,v12.2s,v0.s[0]
617 umlal v19.2d,v12.2s,v4.s[0]
619 umlal v23.2d,v12.2s,v1.s[0]
621 umlal v20.2d,v12.2s,v6.s[0]
623 umlal v21.2d,v12.2s,v8.s[0]
626 umlal v22.2d,v13.2s,v8.s[0]
628 umlal v19.2d,v13.2s,v2.s[0]
630 umlal v23.2d,v13.2s,v0.s[0]
632 umlal v20.2d,v13.2s,v4.s[0]
634 umlal v21.2d,v13.2s,v6.s[0]
639 // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
644 ushr v29.2d,v22.2d,#26
645 xtn v27.2s,v22.2d
646 ushr v30.2d,v19.2d,#26
648 add v23.2d,v23.2d,v29.2d // h3 -> h4
649 bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
650 add v20.2d,v20.2d,v30.2d // h0 -> h1
652 ushr v29.2d,v23.2d,#26
653 xtn v28.2s,v23.2d
654 ushr v30.2d,v20.2d,#26
655 xtn v25.2s,v20.2d
656 bic v28.2s,#0xfc,lsl#24
657 add v21.2d,v21.2d,v30.2d // h1 -> h2
659 add v19.2d,v19.2d,v29.2d
660 shl v29.2d,v29.2d,#2
661 shrn v30.2s,v21.2d,#26
662 xtn v26.2s,v21.2d
663 add v19.2d,v19.2d,v29.2d // h4 -> h0
664 bic v25.2s,#0xfc,lsl#24
665 add v27.2s,v27.2s,v30.2s // h2 -> h3
666 bic v26.2s,#0xfc,lsl#24
668 shrn v29.2s,v19.2d,#26
669 xtn v24.2s,v19.2d
670 ushr v30.2s,v27.2s,#26
671 bic v27.2s,#0xfc,lsl#24
672 bic v24.2s,#0xfc,lsl#24
673 add v25.2s,v25.2s,v29.2s // h0 -> h1
674 add v28.2s,v28.2s,v30.2s // h3 -> h4
679 dup v16.2d,v16.d[0]
680 add v11.2s,v11.2s,v26.2s
683 // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
688 dup v16.2d,v11.d[0]
689 add v14.2s,v9.2s,v24.2s
690 add v17.2s,v12.2s,v27.2s
691 add v15.2s,v10.2s,v25.2s
692 add v18.2s,v13.2s,v28.2s
695 dup v14.2d,v14.d[0]
696 umull2 v19.2d,v16.4s,v6.4s
697 umull2 v22.2d,v16.4s,v1.4s
698 umull2 v23.2d,v16.4s,v3.4s
699 umull2 v21.2d,v16.4s,v0.4s
700 umull2 v20.2d,v16.4s,v8.4s
702 dup v15.2d,v15.d[0]
703 umlal2 v19.2d,v14.4s,v0.4s
704 umlal2 v21.2d,v14.4s,v3.4s
705 umlal2 v22.2d,v14.4s,v5.4s
706 umlal2 v23.2d,v14.4s,v7.4s
707 umlal2 v20.2d,v14.4s,v1.4s
709 dup v17.2d,v17.d[0]
710 umlal2 v19.2d,v15.4s,v8.4s
711 umlal2 v22.2d,v15.4s,v3.4s
712 umlal2 v21.2d,v15.4s,v1.4s
713 umlal2 v23.2d,v15.4s,v5.4s
714 umlal2 v20.2d,v15.4s,v0.4s
716 dup v18.2d,v18.d[0]
717 umlal2 v22.2d,v17.4s,v0.4s
718 umlal2 v23.2d,v17.4s,v1.4s
719 umlal2 v19.2d,v17.4s,v4.4s
720 umlal2 v20.2d,v17.4s,v6.4s
721 umlal2 v21.2d,v17.4s,v8.4s
723 umlal2 v22.2d,v18.4s,v8.4s
724 umlal2 v19.2d,v18.4s,v2.4s
725 umlal2 v23.2d,v18.4s,v0.4s
726 umlal2 v20.2d,v18.4s,v4.4s
727 umlal2 v21.2d,v18.4s,v6.4s
734 add v9.2s,v9.2s,v24.2s
735 umlal v22.2d,v11.2s,v1.2s
736 umlal v19.2d,v11.2s,v6.2s
737 umlal v23.2d,v11.2s,v3.2s
738 umlal v20.2d,v11.2s,v8.2s
739 umlal v21.2d,v11.2s,v0.2s
741 add v10.2s,v10.2s,v25.2s
742 umlal v22.2d,v9.2s,v5.2s
743 umlal v19.2d,v9.2s,v0.2s
744 umlal v23.2d,v9.2s,v7.2s
745 umlal v20.2d,v9.2s,v1.2s
746 umlal v21.2d,v9.2s,v3.2s
748 add v12.2s,v12.2s,v27.2s
749 umlal v22.2d,v10.2s,v3.2s
750 umlal v19.2d,v10.2s,v8.2s
751 umlal v23.2d,v10.2s,v5.2s
752 umlal v20.2d,v10.2s,v0.2s
753 umlal v21.2d,v10.2s,v1.2s
755 add v13.2s,v13.2s,v28.2s
756 umlal v22.2d,v12.2s,v0.2s
757 umlal v19.2d,v12.2s,v4.2s
758 umlal v23.2d,v12.2s,v1.2s
759 umlal v20.2d,v12.2s,v6.2s
760 umlal v21.2d,v12.2s,v8.2s
762 umlal v22.2d,v13.2s,v8.2s
763 umlal v19.2d,v13.2s,v2.2s
764 umlal v23.2d,v13.2s,v0.2s
765 umlal v20.2d,v13.2s,v4.2s
766 umlal v21.2d,v13.2s,v6.2s
772 addp v22.2d,v22.2d,v22.2d
774 addp v19.2d,v19.2d,v19.2d
776 addp v23.2d,v23.2d,v23.2d
778 addp v20.2d,v20.2d,v20.2d
780 addp v21.2d,v21.2d,v21.2d
787 ushr v29.2d,v22.2d,#26
789 ushr v30.2d,v19.2d,#26
792 add v23.2d,v23.2d,v29.2d // h3 -> h4
793 add v20.2d,v20.2d,v30.2d // h0 -> h1
795 ushr v29.2d,v23.2d,#26
797 ushr v30.2d,v20.2d,#26
799 add v21.2d,v21.2d,v30.2d // h1 -> h2
801 add v19.2d,v19.2d,v29.2d
802 shl v29.2d,v29.2d,#2
803 ushr v30.2d,v21.2d,#26
805 add v19.2d,v19.2d,v29.2d // h4 -> h0
806 add v22.2d,v22.2d,v30.2d // h2 -> h3
808 ushr v29.2d,v19.2d,#26
810 ushr v30.2d,v22.2d,#26
812 add v20.2d,v20.2d,v29.2d // h0 -> h1
813 add v23.2d,v23.2d,v30.2d // h3 -> h4
831 .align 2