sha512-armv4.pl - OpenGrok cross reference for /Linux-v5.10/arch/arm/crypto/sha512-armv4.pl

Lines Matching +full:cortex +full:- +full:a8
2 # SPDX-License-Identifier: GPL-2.0
22 # by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
27 # Rescheduling for dual-issue pipeline resulted in 6% improvement on
28 # Cortex A8 core and ~40 cycles per processed byte.
32 # Profiler-assisted and platform-specific optimization resulted in 7%
33 # improvement on Coxtex A8 core and ~38 cycles per byte.
37 # Add NEON implementation. On Cortex A8 it was measured to process
38 # one byte in 23.3 cycles or ~60% faster than integer-only code.
44 # Technical writers asserted that 3-way S4 pipeline can sustain
46 # not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
47 # for further details. On side note Cortex-A15 processes one byte in
53 # h[0-7], namely with most significant dword at *lower* address, which
55 # expected to maintain native byte order for whole 64-bit values.
60 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
190 # define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
191 # define VFP_ABI_POP	vldmia	sp!,{d8-d15}
264 .size	K512,.-K512
267 .word	OPENSSL_armcap_P-sha512_block_data_order
268 .skip	32-4
289 	stmdb	sp!,{r4-r12,lr}
352 	ldr	$t0,[sp,#`$Xoff+8*(16-1)`+0]
353 	ldr	$t1,[sp,#`$Xoff+8*(16-1)`+4]
360 	ldr	$t2,[sp,#`$Xoff+8*(16-14)`+0]
362 	ldr	$t3,[sp,#`$Xoff+8*(16-14)`+4]
386 	ldr	$t2,[sp,#`$Xoff+8*(16-9)`+0]
389 	ldr	$t3,[sp,#`$Xoff+8*(16-9)`+4]
405 	ldreq	$t0,[sp,#`$Xoff+8*(16-1)`+0]
406 	ldreq	$t1,[sp,#`$Xoff+8*(16-1)`+4]
482 	ldmia	sp!,{r4-r12,pc}
484 	ldmia	sp!,{r4-r12,lr}
487 	bx	lr			@ interoperable with Thumb ISA:-)
489 .size	sha512_block_data_order,.-sha512_block_data_order
499 my $cnt="r12";	# volatile register known as ip, intra-procedure-call scratch
522 	vsli.64		$t0,$e,#`64-@Sigma1[0]`
523 	vsli.64		$t1,$e,#`64-@Sigma1[1]`
525 	vsli.64		$t2,$e,#`64-@Sigma1[2]`
535 	vsli.64		$t0,$a,#`64-@Sigma0[0]`
539 	vsli.64		$t1,$a,#`64-@Sigma0[1]`
541 	vsli.64		$t2,$a,#`64-@Sigma0[2]`
557 # 2x-vectorized, therefore runs every 2nd round
558 my @X=map("q$_",(0..7));			# view @X as 128-bit vector
568 	vsli.64		$t0,@X[($i+7)%8],#`64-@sigma1[0]`
570 	vsli.64		$t1,@X[($i+7)%8],#`64-@sigma1[1]`
577 	vsli.64		$t0,$s0,#`64-@sigma0[0]`
578 	vsli.64		$t1,$s0,#`64-@sigma0[1]`
593 .arch	armv7-a
601 	dmb				@ errata #451034 on early Cortex A8
605 	sub	$Ktbl,$Ktbl,.Lsha512_block_data_order-K512
606 	vldmia	$ctx,{$A-$H}		@ load context
620 	vldmia		$ctx,{d24-d31}	@ load context to temp
625 	vstmia		$ctx,{$A-$H}	@ save context
632 .size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
645 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4