Lines Matching +full:cortex +full:- +full:a8
2 # SPDX-License-Identifier: GPL-2.0
21 # Performance is ~2x better than gcc 3.4 generated code and in "abso-
22 # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23 # byte [on single-issue Xscale PXA250 core].
27 # Rescheduling for dual-issue pipeline resulted in 22% improvement on
28 # Cortex A8 core and ~20 cycles per processed byte.
32 # Profiler-assisted and platform-specific optimization resulted in 16%
33 # improvement on Cortex A8 core and ~15.4 cycles per processed byte.
37 # Add NEON implementation. On Cortex A8 it was measured to process one
38 # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39 # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40 # code (meaning that latter performs sub-optimally, nothing was done
47 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
80 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
82 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
97 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
99 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
111 eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
129 eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
156 eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
158 eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
203 .size K256,.-K256
207 .word OPENSSL_armcap_P-sha256_block_data_order
229 stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
278 ldmia sp!,{r4-r11,pc}
280 ldmia sp!,{r4-r11,lr}
283 bx lr @ interoperable with Thumb ISA:-)
285 .size sha256_block_data_order,.-sha256_block_data_order
296 sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
297 sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
299 sub AUTOLOAD() # thunk [simplified] x86-style perlasm
329 &vsli_32 ($T2,$T0,32-$sigma0[0]);
338 &vsli_32 ($T3,$T0,32-$sigma0[1]);
347 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
362 &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
374 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
389 &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
439 '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
442 '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
443 '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
447 '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
462 .arch armv7-a
470 stmdb sp!,{r4-r12,lr}
474 sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256
475 bic $H,$H,#15 @ align for 128-bit stores
490 vrev32.8 @X[1],@X[1] @ big-endian
506 ldmia $ctx,{$A-$H}
566 stmia $t1,{$E-$H}
577 ldmia sp!,{r4-r12,pc}
578 .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
607 sub $Ktbl,$Ktbl,#.LARMv8-K256
614 vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
615 vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
652 sub $Ktbl,$Ktbl,#256-16 @ rewind
670 .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
697 if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
701 # since ARMv7 instructions are always encoded little-endian.
703 # assemblers don't implement it:-(
719 s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4