1@ SPDX-License-Identifier: GPL-2.0
2
3@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
4@ has relicensed it under the GPLv2. Therefore this program is free software;
5@ you can redistribute it and/or modify it under the terms of the GNU General
6@ Public License version 2 as published by the Free Software Foundation.
7@
8@ The original headers, including the original license headers, are
9@ included below for completeness.
10
11@ ====================================================================
12@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
13@ project. The module is, however, dual licensed under OpenSSL and
14@ CRYPTOGAMS licenses depending on where you obtain it. For further
15@ details see http://www.openssl.org/~appro/cryptogams/.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46#ifndef __KERNEL__
47# include "arm_arch.h"
48#else
49# define __ARM_ARCH__ __LINUX_ARM_ARCH__
50# define __ARM_MAX_ARCH__ 7
51#endif
52
53.text
54#if __ARM_ARCH__<7
55.code	32
56#else
57.syntax unified
58# ifdef __thumb2__
59#  define adrl adr
60.thumb
61# else
62.code   32
63# endif
64#endif
65
66.type	K256,%object
67.align	5
68K256:
69.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
85.size	K256,.-K256
86.word	0				@ terminator
87#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
88.LOPENSSL_armcap:
89.word	OPENSSL_armcap_P-sha256_block_data_order
90#endif
91.align	5
92
93.global	sha256_block_data_order
94.type	sha256_block_data_order,%function
95sha256_block_data_order:
96#if __ARM_ARCH__<7
97	sub	r3,pc,#8		@ sha256_block_data_order
98#else
99	adr	r3,sha256_block_data_order
100#endif
101#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102	ldr	r12,.LOPENSSL_armcap
103	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
104	tst	r12,#ARMV8_SHA256
105	bne	.LARMv8
106	tst	r12,#ARMV7_NEON
107	bne	.LNEON
108#endif
109	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
110	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
111	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
112	sub	r14,r3,#256+32	@ K256
113	sub	sp,sp,#16*4		@ alloca(X[16])
114.Loop:
115# if __ARM_ARCH__>=7
116	ldr	r2,[r1],#4
117# else
118	ldrb	r2,[r1,#3]
119# endif
120	eor	r3,r5,r6		@ magic
121	eor	r12,r12,r12
122#if __ARM_ARCH__>=7
123	@ ldr	r2,[r1],#4			@ 0
124# if 0==15
125	str	r1,[sp,#17*4]			@ make room for r1
126# endif
127	eor	r0,r8,r8,ror#5
128	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
129	eor	r0,r0,r8,ror#19	@ Sigma1(e)
130# ifndef __ARMEB__
131	rev	r2,r2
132# endif
133#else
134	@ ldrb	r2,[r1,#3]			@ 0
135	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
136	ldrb	r12,[r1,#2]
137	ldrb	r0,[r1,#1]
138	orr	r2,r2,r12,lsl#8
139	ldrb	r12,[r1],#4
140	orr	r2,r2,r0,lsl#16
141# if 0==15
142	str	r1,[sp,#17*4]			@ make room for r1
143# endif
144	eor	r0,r8,r8,ror#5
145	orr	r2,r2,r12,lsl#24
146	eor	r0,r0,r8,ror#19	@ Sigma1(e)
147#endif
148	ldr	r12,[r14],#4			@ *K256++
149	add	r11,r11,r2			@ h+=X[i]
150	str	r2,[sp,#0*4]
151	eor	r2,r9,r10
152	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
153	and	r2,r2,r8
154	add	r11,r11,r12			@ h+=K256[i]
155	eor	r2,r2,r10			@ Ch(e,f,g)
156	eor	r0,r4,r4,ror#11
157	add	r11,r11,r2			@ h+=Ch(e,f,g)
158#if 0==31
159	and	r12,r12,#0xff
160	cmp	r12,#0xf2			@ done?
161#endif
162#if 0<15
163# if __ARM_ARCH__>=7
164	ldr	r2,[r1],#4			@ prefetch
165# else
166	ldrb	r2,[r1,#3]
167# endif
168	eor	r12,r4,r5			@ a^b, b^c in next round
169#else
170	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
171	eor	r12,r4,r5			@ a^b, b^c in next round
172	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
173#endif
174	eor	r0,r0,r4,ror#20	@ Sigma0(a)
175	and	r3,r3,r12			@ (b^c)&=(a^b)
176	add	r7,r7,r11			@ d+=h
177	eor	r3,r3,r5			@ Maj(a,b,c)
178	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
179	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
180#if __ARM_ARCH__>=7
181	@ ldr	r2,[r1],#4			@ 1
182# if 1==15
183	str	r1,[sp,#17*4]			@ make room for r1
184# endif
185	eor	r0,r7,r7,ror#5
186	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
187	eor	r0,r0,r7,ror#19	@ Sigma1(e)
188# ifndef __ARMEB__
189	rev	r2,r2
190# endif
191#else
192	@ ldrb	r2,[r1,#3]			@ 1
193	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
194	ldrb	r3,[r1,#2]
195	ldrb	r0,[r1,#1]
196	orr	r2,r2,r3,lsl#8
197	ldrb	r3,[r1],#4
198	orr	r2,r2,r0,lsl#16
199# if 1==15
200	str	r1,[sp,#17*4]			@ make room for r1
201# endif
202	eor	r0,r7,r7,ror#5
203	orr	r2,r2,r3,lsl#24
204	eor	r0,r0,r7,ror#19	@ Sigma1(e)
205#endif
206	ldr	r3,[r14],#4			@ *K256++
207	add	r10,r10,r2			@ h+=X[i]
208	str	r2,[sp,#1*4]
209	eor	r2,r8,r9
210	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
211	and	r2,r2,r7
212	add	r10,r10,r3			@ h+=K256[i]
213	eor	r2,r2,r9			@ Ch(e,f,g)
214	eor	r0,r11,r11,ror#11
215	add	r10,r10,r2			@ h+=Ch(e,f,g)
216#if 1==31
217	and	r3,r3,#0xff
218	cmp	r3,#0xf2			@ done?
219#endif
220#if 1<15
221# if __ARM_ARCH__>=7
222	ldr	r2,[r1],#4			@ prefetch
223# else
224	ldrb	r2,[r1,#3]
225# endif
226	eor	r3,r11,r4			@ a^b, b^c in next round
227#else
228	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
229	eor	r3,r11,r4			@ a^b, b^c in next round
230	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
231#endif
232	eor	r0,r0,r11,ror#20	@ Sigma0(a)
233	and	r12,r12,r3			@ (b^c)&=(a^b)
234	add	r6,r6,r10			@ d+=h
235	eor	r12,r12,r4			@ Maj(a,b,c)
236	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
237	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
238#if __ARM_ARCH__>=7
239	@ ldr	r2,[r1],#4			@ 2
240# if 2==15
241	str	r1,[sp,#17*4]			@ make room for r1
242# endif
243	eor	r0,r6,r6,ror#5
244	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
245	eor	r0,r0,r6,ror#19	@ Sigma1(e)
246# ifndef __ARMEB__
247	rev	r2,r2
248# endif
249#else
250	@ ldrb	r2,[r1,#3]			@ 2
251	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
252	ldrb	r12,[r1,#2]
253	ldrb	r0,[r1,#1]
254	orr	r2,r2,r12,lsl#8
255	ldrb	r12,[r1],#4
256	orr	r2,r2,r0,lsl#16
257# if 2==15
258	str	r1,[sp,#17*4]			@ make room for r1
259# endif
260	eor	r0,r6,r6,ror#5
261	orr	r2,r2,r12,lsl#24
262	eor	r0,r0,r6,ror#19	@ Sigma1(e)
263#endif
264	ldr	r12,[r14],#4			@ *K256++
265	add	r9,r9,r2			@ h+=X[i]
266	str	r2,[sp,#2*4]
267	eor	r2,r7,r8
268	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
269	and	r2,r2,r6
270	add	r9,r9,r12			@ h+=K256[i]
271	eor	r2,r2,r8			@ Ch(e,f,g)
272	eor	r0,r10,r10,ror#11
273	add	r9,r9,r2			@ h+=Ch(e,f,g)
274#if 2==31
275	and	r12,r12,#0xff
276	cmp	r12,#0xf2			@ done?
277#endif
278#if 2<15
279# if __ARM_ARCH__>=7
280	ldr	r2,[r1],#4			@ prefetch
281# else
282	ldrb	r2,[r1,#3]
283# endif
284	eor	r12,r10,r11			@ a^b, b^c in next round
285#else
286	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
287	eor	r12,r10,r11			@ a^b, b^c in next round
288	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
289#endif
290	eor	r0,r0,r10,ror#20	@ Sigma0(a)
291	and	r3,r3,r12			@ (b^c)&=(a^b)
292	add	r5,r5,r9			@ d+=h
293	eor	r3,r3,r11			@ Maj(a,b,c)
294	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
295	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
296#if __ARM_ARCH__>=7
297	@ ldr	r2,[r1],#4			@ 3
298# if 3==15
299	str	r1,[sp,#17*4]			@ make room for r1
300# endif
301	eor	r0,r5,r5,ror#5
302	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
303	eor	r0,r0,r5,ror#19	@ Sigma1(e)
304# ifndef __ARMEB__
305	rev	r2,r2
306# endif
307#else
308	@ ldrb	r2,[r1,#3]			@ 3
309	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
310	ldrb	r3,[r1,#2]
311	ldrb	r0,[r1,#1]
312	orr	r2,r2,r3,lsl#8
313	ldrb	r3,[r1],#4
314	orr	r2,r2,r0,lsl#16
315# if 3==15
316	str	r1,[sp,#17*4]			@ make room for r1
317# endif
318	eor	r0,r5,r5,ror#5
319	orr	r2,r2,r3,lsl#24
320	eor	r0,r0,r5,ror#19	@ Sigma1(e)
321#endif
322	ldr	r3,[r14],#4			@ *K256++
323	add	r8,r8,r2			@ h+=X[i]
324	str	r2,[sp,#3*4]
325	eor	r2,r6,r7
326	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
327	and	r2,r2,r5
328	add	r8,r8,r3			@ h+=K256[i]
329	eor	r2,r2,r7			@ Ch(e,f,g)
330	eor	r0,r9,r9,ror#11
331	add	r8,r8,r2			@ h+=Ch(e,f,g)
332#if 3==31
333	and	r3,r3,#0xff
334	cmp	r3,#0xf2			@ done?
335#endif
336#if 3<15
337# if __ARM_ARCH__>=7
338	ldr	r2,[r1],#4			@ prefetch
339# else
340	ldrb	r2,[r1,#3]
341# endif
342	eor	r3,r9,r10			@ a^b, b^c in next round
343#else
344	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
345	eor	r3,r9,r10			@ a^b, b^c in next round
346	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
347#endif
348	eor	r0,r0,r9,ror#20	@ Sigma0(a)
349	and	r12,r12,r3			@ (b^c)&=(a^b)
350	add	r4,r4,r8			@ d+=h
351	eor	r12,r12,r10			@ Maj(a,b,c)
352	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
353	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
354#if __ARM_ARCH__>=7
355	@ ldr	r2,[r1],#4			@ 4
356# if 4==15
357	str	r1,[sp,#17*4]			@ make room for r1
358# endif
359	eor	r0,r4,r4,ror#5
360	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
361	eor	r0,r0,r4,ror#19	@ Sigma1(e)
362# ifndef __ARMEB__
363	rev	r2,r2
364# endif
365#else
366	@ ldrb	r2,[r1,#3]			@ 4
367	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
368	ldrb	r12,[r1,#2]
369	ldrb	r0,[r1,#1]
370	orr	r2,r2,r12,lsl#8
371	ldrb	r12,[r1],#4
372	orr	r2,r2,r0,lsl#16
373# if 4==15
374	str	r1,[sp,#17*4]			@ make room for r1
375# endif
376	eor	r0,r4,r4,ror#5
377	orr	r2,r2,r12,lsl#24
378	eor	r0,r0,r4,ror#19	@ Sigma1(e)
379#endif
380	ldr	r12,[r14],#4			@ *K256++
381	add	r7,r7,r2			@ h+=X[i]
382	str	r2,[sp,#4*4]
383	eor	r2,r5,r6
384	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
385	and	r2,r2,r4
386	add	r7,r7,r12			@ h+=K256[i]
387	eor	r2,r2,r6			@ Ch(e,f,g)
388	eor	r0,r8,r8,ror#11
389	add	r7,r7,r2			@ h+=Ch(e,f,g)
390#if 4==31
391	and	r12,r12,#0xff
392	cmp	r12,#0xf2			@ done?
393#endif
394#if 4<15
395# if __ARM_ARCH__>=7
396	ldr	r2,[r1],#4			@ prefetch
397# else
398	ldrb	r2,[r1,#3]
399# endif
400	eor	r12,r8,r9			@ a^b, b^c in next round
401#else
402	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
403	eor	r12,r8,r9			@ a^b, b^c in next round
404	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
405#endif
406	eor	r0,r0,r8,ror#20	@ Sigma0(a)
407	and	r3,r3,r12			@ (b^c)&=(a^b)
408	add	r11,r11,r7			@ d+=h
409	eor	r3,r3,r9			@ Maj(a,b,c)
410	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
411	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
412#if __ARM_ARCH__>=7
413	@ ldr	r2,[r1],#4			@ 5
414# if 5==15
415	str	r1,[sp,#17*4]			@ make room for r1
416# endif
417	eor	r0,r11,r11,ror#5
418	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
419	eor	r0,r0,r11,ror#19	@ Sigma1(e)
420# ifndef __ARMEB__
421	rev	r2,r2
422# endif
423#else
424	@ ldrb	r2,[r1,#3]			@ 5
425	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
426	ldrb	r3,[r1,#2]
427	ldrb	r0,[r1,#1]
428	orr	r2,r2,r3,lsl#8
429	ldrb	r3,[r1],#4
430	orr	r2,r2,r0,lsl#16
431# if 5==15
432	str	r1,[sp,#17*4]			@ make room for r1
433# endif
434	eor	r0,r11,r11,ror#5
435	orr	r2,r2,r3,lsl#24
436	eor	r0,r0,r11,ror#19	@ Sigma1(e)
437#endif
438	ldr	r3,[r14],#4			@ *K256++
439	add	r6,r6,r2			@ h+=X[i]
440	str	r2,[sp,#5*4]
441	eor	r2,r4,r5
442	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
443	and	r2,r2,r11
444	add	r6,r6,r3			@ h+=K256[i]
445	eor	r2,r2,r5			@ Ch(e,f,g)
446	eor	r0,r7,r7,ror#11
447	add	r6,r6,r2			@ h+=Ch(e,f,g)
448#if 5==31
449	and	r3,r3,#0xff
450	cmp	r3,#0xf2			@ done?
451#endif
452#if 5<15
453# if __ARM_ARCH__>=7
454	ldr	r2,[r1],#4			@ prefetch
455# else
456	ldrb	r2,[r1,#3]
457# endif
458	eor	r3,r7,r8			@ a^b, b^c in next round
459#else
460	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
461	eor	r3,r7,r8			@ a^b, b^c in next round
462	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
463#endif
464	eor	r0,r0,r7,ror#20	@ Sigma0(a)
465	and	r12,r12,r3			@ (b^c)&=(a^b)
466	add	r10,r10,r6			@ d+=h
467	eor	r12,r12,r8			@ Maj(a,b,c)
468	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
469	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
470#if __ARM_ARCH__>=7
471	@ ldr	r2,[r1],#4			@ 6
472# if 6==15
473	str	r1,[sp,#17*4]			@ make room for r1
474# endif
475	eor	r0,r10,r10,ror#5
476	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
477	eor	r0,r0,r10,ror#19	@ Sigma1(e)
478# ifndef __ARMEB__
479	rev	r2,r2
480# endif
481#else
482	@ ldrb	r2,[r1,#3]			@ 6
483	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
484	ldrb	r12,[r1,#2]
485	ldrb	r0,[r1,#1]
486	orr	r2,r2,r12,lsl#8
487	ldrb	r12,[r1],#4
488	orr	r2,r2,r0,lsl#16
489# if 6==15
490	str	r1,[sp,#17*4]			@ make room for r1
491# endif
492	eor	r0,r10,r10,ror#5
493	orr	r2,r2,r12,lsl#24
494	eor	r0,r0,r10,ror#19	@ Sigma1(e)
495#endif
496	ldr	r12,[r14],#4			@ *K256++
497	add	r5,r5,r2			@ h+=X[i]
498	str	r2,[sp,#6*4]
499	eor	r2,r11,r4
500	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
501	and	r2,r2,r10
502	add	r5,r5,r12			@ h+=K256[i]
503	eor	r2,r2,r4			@ Ch(e,f,g)
504	eor	r0,r6,r6,ror#11
505	add	r5,r5,r2			@ h+=Ch(e,f,g)
506#if 6==31
507	and	r12,r12,#0xff
508	cmp	r12,#0xf2			@ done?
509#endif
510#if 6<15
511# if __ARM_ARCH__>=7
512	ldr	r2,[r1],#4			@ prefetch
513# else
514	ldrb	r2,[r1,#3]
515# endif
516	eor	r12,r6,r7			@ a^b, b^c in next round
517#else
518	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
519	eor	r12,r6,r7			@ a^b, b^c in next round
520	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
521#endif
522	eor	r0,r0,r6,ror#20	@ Sigma0(a)
523	and	r3,r3,r12			@ (b^c)&=(a^b)
524	add	r9,r9,r5			@ d+=h
525	eor	r3,r3,r7			@ Maj(a,b,c)
526	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
527	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
528#if __ARM_ARCH__>=7
529	@ ldr	r2,[r1],#4			@ 7
530# if 7==15
531	str	r1,[sp,#17*4]			@ make room for r1
532# endif
533	eor	r0,r9,r9,ror#5
534	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
535	eor	r0,r0,r9,ror#19	@ Sigma1(e)
536# ifndef __ARMEB__
537	rev	r2,r2
538# endif
539#else
540	@ ldrb	r2,[r1,#3]			@ 7
541	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
542	ldrb	r3,[r1,#2]
543	ldrb	r0,[r1,#1]
544	orr	r2,r2,r3,lsl#8
545	ldrb	r3,[r1],#4
546	orr	r2,r2,r0,lsl#16
547# if 7==15
548	str	r1,[sp,#17*4]			@ make room for r1
549# endif
550	eor	r0,r9,r9,ror#5
551	orr	r2,r2,r3,lsl#24
552	eor	r0,r0,r9,ror#19	@ Sigma1(e)
553#endif
554	ldr	r3,[r14],#4			@ *K256++
555	add	r4,r4,r2			@ h+=X[i]
556	str	r2,[sp,#7*4]
557	eor	r2,r10,r11
558	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
559	and	r2,r2,r9
560	add	r4,r4,r3			@ h+=K256[i]
561	eor	r2,r2,r11			@ Ch(e,f,g)
562	eor	r0,r5,r5,ror#11
563	add	r4,r4,r2			@ h+=Ch(e,f,g)
564#if 7==31
565	and	r3,r3,#0xff
566	cmp	r3,#0xf2			@ done?
567#endif
568#if 7<15
569# if __ARM_ARCH__>=7
570	ldr	r2,[r1],#4			@ prefetch
571# else
572	ldrb	r2,[r1,#3]
573# endif
574	eor	r3,r5,r6			@ a^b, b^c in next round
575#else
576	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
577	eor	r3,r5,r6			@ a^b, b^c in next round
578	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
579#endif
580	eor	r0,r0,r5,ror#20	@ Sigma0(a)
581	and	r12,r12,r3			@ (b^c)&=(a^b)
582	add	r8,r8,r4			@ d+=h
583	eor	r12,r12,r6			@ Maj(a,b,c)
584	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
585	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
586#if __ARM_ARCH__>=7
587	@ ldr	r2,[r1],#4			@ 8
588# if 8==15
589	str	r1,[sp,#17*4]			@ make room for r1
590# endif
591	eor	r0,r8,r8,ror#5
592	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
593	eor	r0,r0,r8,ror#19	@ Sigma1(e)
594# ifndef __ARMEB__
595	rev	r2,r2
596# endif
597#else
598	@ ldrb	r2,[r1,#3]			@ 8
599	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
600	ldrb	r12,[r1,#2]
601	ldrb	r0,[r1,#1]
602	orr	r2,r2,r12,lsl#8
603	ldrb	r12,[r1],#4
604	orr	r2,r2,r0,lsl#16
605# if 8==15
606	str	r1,[sp,#17*4]			@ make room for r1
607# endif
608	eor	r0,r8,r8,ror#5
609	orr	r2,r2,r12,lsl#24
610	eor	r0,r0,r8,ror#19	@ Sigma1(e)
611#endif
612	ldr	r12,[r14],#4			@ *K256++
613	add	r11,r11,r2			@ h+=X[i]
614	str	r2,[sp,#8*4]
615	eor	r2,r9,r10
616	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
617	and	r2,r2,r8
618	add	r11,r11,r12			@ h+=K256[i]
619	eor	r2,r2,r10			@ Ch(e,f,g)
620	eor	r0,r4,r4,ror#11
621	add	r11,r11,r2			@ h+=Ch(e,f,g)
622#if 8==31
623	and	r12,r12,#0xff
624	cmp	r12,#0xf2			@ done?
625#endif
626#if 8<15
627# if __ARM_ARCH__>=7
628	ldr	r2,[r1],#4			@ prefetch
629# else
630	ldrb	r2,[r1,#3]
631# endif
632	eor	r12,r4,r5			@ a^b, b^c in next round
633#else
634	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
635	eor	r12,r4,r5			@ a^b, b^c in next round
636	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
637#endif
638	eor	r0,r0,r4,ror#20	@ Sigma0(a)
639	and	r3,r3,r12			@ (b^c)&=(a^b)
640	add	r7,r7,r11			@ d+=h
641	eor	r3,r3,r5			@ Maj(a,b,c)
642	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
643	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
644#if __ARM_ARCH__>=7
645	@ ldr	r2,[r1],#4			@ 9
646# if 9==15
647	str	r1,[sp,#17*4]			@ make room for r1
648# endif
649	eor	r0,r7,r7,ror#5
650	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
651	eor	r0,r0,r7,ror#19	@ Sigma1(e)
652# ifndef __ARMEB__
653	rev	r2,r2
654# endif
655#else
656	@ ldrb	r2,[r1,#3]			@ 9
657	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
658	ldrb	r3,[r1,#2]
659	ldrb	r0,[r1,#1]
660	orr	r2,r2,r3,lsl#8
661	ldrb	r3,[r1],#4
662	orr	r2,r2,r0,lsl#16
663# if 9==15
664	str	r1,[sp,#17*4]			@ make room for r1
665# endif
666	eor	r0,r7,r7,ror#5
667	orr	r2,r2,r3,lsl#24
668	eor	r0,r0,r7,ror#19	@ Sigma1(e)
669#endif
670	ldr	r3,[r14],#4			@ *K256++
671	add	r10,r10,r2			@ h+=X[i]
672	str	r2,[sp,#9*4]
673	eor	r2,r8,r9
674	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
675	and	r2,r2,r7
676	add	r10,r10,r3			@ h+=K256[i]
677	eor	r2,r2,r9			@ Ch(e,f,g)
678	eor	r0,r11,r11,ror#11
679	add	r10,r10,r2			@ h+=Ch(e,f,g)
680#if 9==31
681	and	r3,r3,#0xff
682	cmp	r3,#0xf2			@ done?
683#endif
684#if 9<15
685# if __ARM_ARCH__>=7
686	ldr	r2,[r1],#4			@ prefetch
687# else
688	ldrb	r2,[r1,#3]
689# endif
690	eor	r3,r11,r4			@ a^b, b^c in next round
691#else
692	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
693	eor	r3,r11,r4			@ a^b, b^c in next round
694	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
695#endif
696	eor	r0,r0,r11,ror#20	@ Sigma0(a)
697	and	r12,r12,r3			@ (b^c)&=(a^b)
698	add	r6,r6,r10			@ d+=h
699	eor	r12,r12,r4			@ Maj(a,b,c)
700	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
701	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
702#if __ARM_ARCH__>=7
703	@ ldr	r2,[r1],#4			@ 10
704# if 10==15
705	str	r1,[sp,#17*4]			@ make room for r1
706# endif
707	eor	r0,r6,r6,ror#5
708	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
709	eor	r0,r0,r6,ror#19	@ Sigma1(e)
710# ifndef __ARMEB__
711	rev	r2,r2
712# endif
713#else
714	@ ldrb	r2,[r1,#3]			@ 10
715	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
716	ldrb	r12,[r1,#2]
717	ldrb	r0,[r1,#1]
718	orr	r2,r2,r12,lsl#8
719	ldrb	r12,[r1],#4
720	orr	r2,r2,r0,lsl#16
721# if 10==15
722	str	r1,[sp,#17*4]			@ make room for r1
723# endif
724	eor	r0,r6,r6,ror#5
725	orr	r2,r2,r12,lsl#24
726	eor	r0,r0,r6,ror#19	@ Sigma1(e)
727#endif
728	ldr	r12,[r14],#4			@ *K256++
729	add	r9,r9,r2			@ h+=X[i]
730	str	r2,[sp,#10*4]
731	eor	r2,r7,r8
732	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
733	and	r2,r2,r6
734	add	r9,r9,r12			@ h+=K256[i]
735	eor	r2,r2,r8			@ Ch(e,f,g)
736	eor	r0,r10,r10,ror#11
737	add	r9,r9,r2			@ h+=Ch(e,f,g)
738#if 10==31
739	and	r12,r12,#0xff
740	cmp	r12,#0xf2			@ done?
741#endif
742#if 10<15
743# if __ARM_ARCH__>=7
744	ldr	r2,[r1],#4			@ prefetch
745# else
746	ldrb	r2,[r1,#3]
747# endif
748	eor	r12,r10,r11			@ a^b, b^c in next round
749#else
750	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
751	eor	r12,r10,r11			@ a^b, b^c in next round
752	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
753#endif
754	eor	r0,r0,r10,ror#20	@ Sigma0(a)
755	and	r3,r3,r12			@ (b^c)&=(a^b)
756	add	r5,r5,r9			@ d+=h
757	eor	r3,r3,r11			@ Maj(a,b,c)
758	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
759	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
760#if __ARM_ARCH__>=7
761	@ ldr	r2,[r1],#4			@ 11
762# if 11==15
763	str	r1,[sp,#17*4]			@ make room for r1
764# endif
765	eor	r0,r5,r5,ror#5
766	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
767	eor	r0,r0,r5,ror#19	@ Sigma1(e)
768# ifndef __ARMEB__
769	rev	r2,r2
770# endif
771#else
772	@ ldrb	r2,[r1,#3]			@ 11
773	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
774	ldrb	r3,[r1,#2]
775	ldrb	r0,[r1,#1]
776	orr	r2,r2,r3,lsl#8
777	ldrb	r3,[r1],#4
778	orr	r2,r2,r0,lsl#16
779# if 11==15
780	str	r1,[sp,#17*4]			@ make room for r1
781# endif
782	eor	r0,r5,r5,ror#5
783	orr	r2,r2,r3,lsl#24
784	eor	r0,r0,r5,ror#19	@ Sigma1(e)
785#endif
786	ldr	r3,[r14],#4			@ *K256++
787	add	r8,r8,r2			@ h+=X[i]
788	str	r2,[sp,#11*4]
789	eor	r2,r6,r7
790	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
791	and	r2,r2,r5
792	add	r8,r8,r3			@ h+=K256[i]
793	eor	r2,r2,r7			@ Ch(e,f,g)
794	eor	r0,r9,r9,ror#11
795	add	r8,r8,r2			@ h+=Ch(e,f,g)
796#if 11==31
797	and	r3,r3,#0xff
798	cmp	r3,#0xf2			@ done?
799#endif
800#if 11<15
801# if __ARM_ARCH__>=7
802	ldr	r2,[r1],#4			@ prefetch
803# else
804	ldrb	r2,[r1,#3]
805# endif
806	eor	r3,r9,r10			@ a^b, b^c in next round
807#else
808	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
809	eor	r3,r9,r10			@ a^b, b^c in next round
810	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
811#endif
812	eor	r0,r0,r9,ror#20	@ Sigma0(a)
813	and	r12,r12,r3			@ (b^c)&=(a^b)
814	add	r4,r4,r8			@ d+=h
815	eor	r12,r12,r10			@ Maj(a,b,c)
816	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
817	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
818#if __ARM_ARCH__>=7
819	@ ldr	r2,[r1],#4			@ 12
820# if 12==15
821	str	r1,[sp,#17*4]			@ make room for r1
822# endif
823	eor	r0,r4,r4,ror#5
824	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
825	eor	r0,r0,r4,ror#19	@ Sigma1(e)
826# ifndef __ARMEB__
827	rev	r2,r2
828# endif
829#else
830	@ ldrb	r2,[r1,#3]			@ 12
831	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
832	ldrb	r12,[r1,#2]
833	ldrb	r0,[r1,#1]
834	orr	r2,r2,r12,lsl#8
835	ldrb	r12,[r1],#4
836	orr	r2,r2,r0,lsl#16
837# if 12==15
838	str	r1,[sp,#17*4]			@ make room for r1
839# endif
840	eor	r0,r4,r4,ror#5
841	orr	r2,r2,r12,lsl#24
842	eor	r0,r0,r4,ror#19	@ Sigma1(e)
843#endif
844	ldr	r12,[r14],#4			@ *K256++
845	add	r7,r7,r2			@ h+=X[i]
846	str	r2,[sp,#12*4]
847	eor	r2,r5,r6
848	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
849	and	r2,r2,r4
850	add	r7,r7,r12			@ h+=K256[i]
851	eor	r2,r2,r6			@ Ch(e,f,g)
852	eor	r0,r8,r8,ror#11
853	add	r7,r7,r2			@ h+=Ch(e,f,g)
854#if 12==31
855	and	r12,r12,#0xff
856	cmp	r12,#0xf2			@ done?
857#endif
858#if 12<15
859# if __ARM_ARCH__>=7
860	ldr	r2,[r1],#4			@ prefetch
861# else
862	ldrb	r2,[r1,#3]
863# endif
864	eor	r12,r8,r9			@ a^b, b^c in next round
865#else
866	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
867	eor	r12,r8,r9			@ a^b, b^c in next round
868	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
869#endif
870	eor	r0,r0,r8,ror#20	@ Sigma0(a)
871	and	r3,r3,r12			@ (b^c)&=(a^b)
872	add	r11,r11,r7			@ d+=h
873	eor	r3,r3,r9			@ Maj(a,b,c)
874	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
875	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
876#if __ARM_ARCH__>=7
877	@ ldr	r2,[r1],#4			@ 13
878# if 13==15
879	str	r1,[sp,#17*4]			@ make room for r1
880# endif
881	eor	r0,r11,r11,ror#5
882	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
883	eor	r0,r0,r11,ror#19	@ Sigma1(e)
884# ifndef __ARMEB__
885	rev	r2,r2
886# endif
887#else
888	@ ldrb	r2,[r1,#3]			@ 13
889	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
890	ldrb	r3,[r1,#2]
891	ldrb	r0,[r1,#1]
892	orr	r2,r2,r3,lsl#8
893	ldrb	r3,[r1],#4
894	orr	r2,r2,r0,lsl#16
895# if 13==15
896	str	r1,[sp,#17*4]			@ make room for r1
897# endif
898	eor	r0,r11,r11,ror#5
899	orr	r2,r2,r3,lsl#24
900	eor	r0,r0,r11,ror#19	@ Sigma1(e)
901#endif
902	ldr	r3,[r14],#4			@ *K256++
903	add	r6,r6,r2			@ h+=X[i]
904	str	r2,[sp,#13*4]
905	eor	r2,r4,r5
906	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
907	and	r2,r2,r11
908	add	r6,r6,r3			@ h+=K256[i]
909	eor	r2,r2,r5			@ Ch(e,f,g)
910	eor	r0,r7,r7,ror#11
911	add	r6,r6,r2			@ h+=Ch(e,f,g)
912#if 13==31
913	and	r3,r3,#0xff
914	cmp	r3,#0xf2			@ done?
915#endif
916#if 13<15
917# if __ARM_ARCH__>=7
918	ldr	r2,[r1],#4			@ prefetch
919# else
920	ldrb	r2,[r1,#3]
921# endif
922	eor	r3,r7,r8			@ a^b, b^c in next round
923#else
924	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
925	eor	r3,r7,r8			@ a^b, b^c in next round
926	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
927#endif
928	eor	r0,r0,r7,ror#20	@ Sigma0(a)
929	and	r12,r12,r3			@ (b^c)&=(a^b)
930	add	r10,r10,r6			@ d+=h
931	eor	r12,r12,r8			@ Maj(a,b,c)
932	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
933	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
934#if __ARM_ARCH__>=7
935	@ ldr	r2,[r1],#4			@ 14
936# if 14==15
937	str	r1,[sp,#17*4]			@ make room for r1
938# endif
939	eor	r0,r10,r10,ror#5
940	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
941	eor	r0,r0,r10,ror#19	@ Sigma1(e)
942# ifndef __ARMEB__
943	rev	r2,r2
944# endif
945#else
946	@ ldrb	r2,[r1,#3]			@ 14
947	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
948	ldrb	r12,[r1,#2]
949	ldrb	r0,[r1,#1]
950	orr	r2,r2,r12,lsl#8
951	ldrb	r12,[r1],#4
952	orr	r2,r2,r0,lsl#16
953# if 14==15
954	str	r1,[sp,#17*4]			@ make room for r1
955# endif
956	eor	r0,r10,r10,ror#5
957	orr	r2,r2,r12,lsl#24
958	eor	r0,r0,r10,ror#19	@ Sigma1(e)
959#endif
960	ldr	r12,[r14],#4			@ *K256++
961	add	r5,r5,r2			@ h+=X[i]
962	str	r2,[sp,#14*4]
963	eor	r2,r11,r4
964	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
965	and	r2,r2,r10
966	add	r5,r5,r12			@ h+=K256[i]
967	eor	r2,r2,r4			@ Ch(e,f,g)
968	eor	r0,r6,r6,ror#11
969	add	r5,r5,r2			@ h+=Ch(e,f,g)
970#if 14==31
971	and	r12,r12,#0xff
972	cmp	r12,#0xf2			@ done?
973#endif
974#if 14<15
975# if __ARM_ARCH__>=7
976	ldr	r2,[r1],#4			@ prefetch
977# else
978	ldrb	r2,[r1,#3]
979# endif
980	eor	r12,r6,r7			@ a^b, b^c in next round
981#else
982	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
983	eor	r12,r6,r7			@ a^b, b^c in next round
984	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
985#endif
986	eor	r0,r0,r6,ror#20	@ Sigma0(a)
987	and	r3,r3,r12			@ (b^c)&=(a^b)
988	add	r9,r9,r5			@ d+=h
989	eor	r3,r3,r7			@ Maj(a,b,c)
990	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
991	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
992#if __ARM_ARCH__>=7
993	@ ldr	r2,[r1],#4			@ 15
994# if 15==15
995	str	r1,[sp,#17*4]			@ make room for r1
996# endif
997	eor	r0,r9,r9,ror#5
998	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
999	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1000# ifndef __ARMEB__
1001	rev	r2,r2
1002# endif
1003#else
1004	@ ldrb	r2,[r1,#3]			@ 15
1005	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1006	ldrb	r3,[r1,#2]
1007	ldrb	r0,[r1,#1]
1008	orr	r2,r2,r3,lsl#8
1009	ldrb	r3,[r1],#4
1010	orr	r2,r2,r0,lsl#16
1011# if 15==15
1012	str	r1,[sp,#17*4]			@ make room for r1
1013# endif
1014	eor	r0,r9,r9,ror#5
1015	orr	r2,r2,r3,lsl#24
1016	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1017#endif
1018	ldr	r3,[r14],#4			@ *K256++
1019	add	r4,r4,r2			@ h+=X[i]
1020	str	r2,[sp,#15*4]
1021	eor	r2,r10,r11
1022	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1023	and	r2,r2,r9
1024	add	r4,r4,r3			@ h+=K256[i]
1025	eor	r2,r2,r11			@ Ch(e,f,g)
1026	eor	r0,r5,r5,ror#11
1027	add	r4,r4,r2			@ h+=Ch(e,f,g)
1028#if 15==31
1029	and	r3,r3,#0xff
1030	cmp	r3,#0xf2			@ done?
1031#endif
1032#if 15<15
1033# if __ARM_ARCH__>=7
1034	ldr	r2,[r1],#4			@ prefetch
1035# else
1036	ldrb	r2,[r1,#3]
1037# endif
1038	eor	r3,r5,r6			@ a^b, b^c in next round
1039#else
1040	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1041	eor	r3,r5,r6			@ a^b, b^c in next round
1042	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1043#endif
1044	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1045	and	r12,r12,r3			@ (b^c)&=(a^b)
1046	add	r8,r8,r4			@ d+=h
1047	eor	r12,r12,r6			@ Maj(a,b,c)
1048	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1049	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1050.Lrounds_16_xx:
1051	@ ldr	r2,[sp,#1*4]		@ 16
1052	@ ldr	r1,[sp,#14*4]
1053	mov	r0,r2,ror#7
1054	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1055	mov	r12,r1,ror#17
1056	eor	r0,r0,r2,ror#18
1057	eor	r12,r12,r1,ror#19
1058	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1059	ldr	r2,[sp,#0*4]
1060	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1061	ldr	r1,[sp,#9*4]
1062
1063	add	r12,r12,r0
1064	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1065	add	r2,r2,r12
1066	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1067	add	r2,r2,r1			@ X[i]
1068	ldr	r12,[r14],#4			@ *K256++
1069	add	r11,r11,r2			@ h+=X[i]
1070	str	r2,[sp,#0*4]
1071	eor	r2,r9,r10
1072	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1073	and	r2,r2,r8
1074	add	r11,r11,r12			@ h+=K256[i]
1075	eor	r2,r2,r10			@ Ch(e,f,g)
1076	eor	r0,r4,r4,ror#11
1077	add	r11,r11,r2			@ h+=Ch(e,f,g)
1078#if 16==31
1079	and	r12,r12,#0xff
1080	cmp	r12,#0xf2			@ done?
1081#endif
1082#if 16<15
1083# if __ARM_ARCH__>=7
1084	ldr	r2,[r1],#4			@ prefetch
1085# else
1086	ldrb	r2,[r1,#3]
1087# endif
1088	eor	r12,r4,r5			@ a^b, b^c in next round
1089#else
1090	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1091	eor	r12,r4,r5			@ a^b, b^c in next round
1092	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1093#endif
1094	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1095	and	r3,r3,r12			@ (b^c)&=(a^b)
1096	add	r7,r7,r11			@ d+=h
1097	eor	r3,r3,r5			@ Maj(a,b,c)
1098	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1099	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1100	@ ldr	r2,[sp,#2*4]		@ 17
1101	@ ldr	r1,[sp,#15*4]
1102	mov	r0,r2,ror#7
1103	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1104	mov	r3,r1,ror#17
1105	eor	r0,r0,r2,ror#18
1106	eor	r3,r3,r1,ror#19
1107	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1108	ldr	r2,[sp,#1*4]
1109	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1110	ldr	r1,[sp,#10*4]
1111
1112	add	r3,r3,r0
1113	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1114	add	r2,r2,r3
1115	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1116	add	r2,r2,r1			@ X[i]
1117	ldr	r3,[r14],#4			@ *K256++
1118	add	r10,r10,r2			@ h+=X[i]
1119	str	r2,[sp,#1*4]
1120	eor	r2,r8,r9
1121	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1122	and	r2,r2,r7
1123	add	r10,r10,r3			@ h+=K256[i]
1124	eor	r2,r2,r9			@ Ch(e,f,g)
1125	eor	r0,r11,r11,ror#11
1126	add	r10,r10,r2			@ h+=Ch(e,f,g)
1127#if 17==31
1128	and	r3,r3,#0xff
1129	cmp	r3,#0xf2			@ done?
1130#endif
1131#if 17<15
1132# if __ARM_ARCH__>=7
1133	ldr	r2,[r1],#4			@ prefetch
1134# else
1135	ldrb	r2,[r1,#3]
1136# endif
1137	eor	r3,r11,r4			@ a^b, b^c in next round
1138#else
1139	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1140	eor	r3,r11,r4			@ a^b, b^c in next round
1141	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1142#endif
1143	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1144	and	r12,r12,r3			@ (b^c)&=(a^b)
1145	add	r6,r6,r10			@ d+=h
1146	eor	r12,r12,r4			@ Maj(a,b,c)
1147	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1148	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1149	@ ldr	r2,[sp,#3*4]		@ 18
1150	@ ldr	r1,[sp,#0*4]
1151	mov	r0,r2,ror#7
1152	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1153	mov	r12,r1,ror#17
1154	eor	r0,r0,r2,ror#18
1155	eor	r12,r12,r1,ror#19
1156	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1157	ldr	r2,[sp,#2*4]
1158	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1159	ldr	r1,[sp,#11*4]
1160
1161	add	r12,r12,r0
1162	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1163	add	r2,r2,r12
1164	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1165	add	r2,r2,r1			@ X[i]
1166	ldr	r12,[r14],#4			@ *K256++
1167	add	r9,r9,r2			@ h+=X[i]
1168	str	r2,[sp,#2*4]
1169	eor	r2,r7,r8
1170	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1171	and	r2,r2,r6
1172	add	r9,r9,r12			@ h+=K256[i]
1173	eor	r2,r2,r8			@ Ch(e,f,g)
1174	eor	r0,r10,r10,ror#11
1175	add	r9,r9,r2			@ h+=Ch(e,f,g)
1176#if 18==31
1177	and	r12,r12,#0xff
1178	cmp	r12,#0xf2			@ done?
1179#endif
1180#if 18<15
1181# if __ARM_ARCH__>=7
1182	ldr	r2,[r1],#4			@ prefetch
1183# else
1184	ldrb	r2,[r1,#3]
1185# endif
1186	eor	r12,r10,r11			@ a^b, b^c in next round
1187#else
1188	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1189	eor	r12,r10,r11			@ a^b, b^c in next round
1190	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1191#endif
1192	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1193	and	r3,r3,r12			@ (b^c)&=(a^b)
1194	add	r5,r5,r9			@ d+=h
1195	eor	r3,r3,r11			@ Maj(a,b,c)
1196	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1197	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1198	@ ldr	r2,[sp,#4*4]		@ 19
1199	@ ldr	r1,[sp,#1*4]
1200	mov	r0,r2,ror#7
1201	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1202	mov	r3,r1,ror#17
1203	eor	r0,r0,r2,ror#18
1204	eor	r3,r3,r1,ror#19
1205	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1206	ldr	r2,[sp,#3*4]
1207	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1208	ldr	r1,[sp,#12*4]
1209
1210	add	r3,r3,r0
1211	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1212	add	r2,r2,r3
1213	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1214	add	r2,r2,r1			@ X[i]
1215	ldr	r3,[r14],#4			@ *K256++
1216	add	r8,r8,r2			@ h+=X[i]
1217	str	r2,[sp,#3*4]
1218	eor	r2,r6,r7
1219	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1220	and	r2,r2,r5
1221	add	r8,r8,r3			@ h+=K256[i]
1222	eor	r2,r2,r7			@ Ch(e,f,g)
1223	eor	r0,r9,r9,ror#11
1224	add	r8,r8,r2			@ h+=Ch(e,f,g)
1225#if 19==31
1226	and	r3,r3,#0xff
1227	cmp	r3,#0xf2			@ done?
1228#endif
1229#if 19<15
1230# if __ARM_ARCH__>=7
1231	ldr	r2,[r1],#4			@ prefetch
1232# else
1233	ldrb	r2,[r1,#3]
1234# endif
1235	eor	r3,r9,r10			@ a^b, b^c in next round
1236#else
1237	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1238	eor	r3,r9,r10			@ a^b, b^c in next round
1239	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1240#endif
1241	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1242	and	r12,r12,r3			@ (b^c)&=(a^b)
1243	add	r4,r4,r8			@ d+=h
1244	eor	r12,r12,r10			@ Maj(a,b,c)
1245	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1246	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1247	@ ldr	r2,[sp,#5*4]		@ 20
1248	@ ldr	r1,[sp,#2*4]
1249	mov	r0,r2,ror#7
1250	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1251	mov	r12,r1,ror#17
1252	eor	r0,r0,r2,ror#18
1253	eor	r12,r12,r1,ror#19
1254	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1255	ldr	r2,[sp,#4*4]
1256	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1257	ldr	r1,[sp,#13*4]
1258
1259	add	r12,r12,r0
1260	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1261	add	r2,r2,r12
1262	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1263	add	r2,r2,r1			@ X[i]
1264	ldr	r12,[r14],#4			@ *K256++
1265	add	r7,r7,r2			@ h+=X[i]
1266	str	r2,[sp,#4*4]
1267	eor	r2,r5,r6
1268	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1269	and	r2,r2,r4
1270	add	r7,r7,r12			@ h+=K256[i]
1271	eor	r2,r2,r6			@ Ch(e,f,g)
1272	eor	r0,r8,r8,ror#11
1273	add	r7,r7,r2			@ h+=Ch(e,f,g)
1274#if 20==31
1275	and	r12,r12,#0xff
1276	cmp	r12,#0xf2			@ done?
1277#endif
1278#if 20<15
1279# if __ARM_ARCH__>=7
1280	ldr	r2,[r1],#4			@ prefetch
1281# else
1282	ldrb	r2,[r1,#3]
1283# endif
1284	eor	r12,r8,r9			@ a^b, b^c in next round
1285#else
1286	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1287	eor	r12,r8,r9			@ a^b, b^c in next round
1288	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1289#endif
1290	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1291	and	r3,r3,r12			@ (b^c)&=(a^b)
1292	add	r11,r11,r7			@ d+=h
1293	eor	r3,r3,r9			@ Maj(a,b,c)
1294	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1295	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1296	@ ldr	r2,[sp,#6*4]		@ 21
1297	@ ldr	r1,[sp,#3*4]
1298	mov	r0,r2,ror#7
1299	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1300	mov	r3,r1,ror#17
1301	eor	r0,r0,r2,ror#18
1302	eor	r3,r3,r1,ror#19
1303	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1304	ldr	r2,[sp,#5*4]
1305	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1306	ldr	r1,[sp,#14*4]
1307
1308	add	r3,r3,r0
1309	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1310	add	r2,r2,r3
1311	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1312	add	r2,r2,r1			@ X[i]
1313	ldr	r3,[r14],#4			@ *K256++
1314	add	r6,r6,r2			@ h+=X[i]
1315	str	r2,[sp,#5*4]
1316	eor	r2,r4,r5
1317	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1318	and	r2,r2,r11
1319	add	r6,r6,r3			@ h+=K256[i]
1320	eor	r2,r2,r5			@ Ch(e,f,g)
1321	eor	r0,r7,r7,ror#11
1322	add	r6,r6,r2			@ h+=Ch(e,f,g)
1323#if 21==31
1324	and	r3,r3,#0xff
1325	cmp	r3,#0xf2			@ done?
1326#endif
1327#if 21<15
1328# if __ARM_ARCH__>=7
1329	ldr	r2,[r1],#4			@ prefetch
1330# else
1331	ldrb	r2,[r1,#3]
1332# endif
1333	eor	r3,r7,r8			@ a^b, b^c in next round
1334#else
1335	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1336	eor	r3,r7,r8			@ a^b, b^c in next round
1337	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1338#endif
1339	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1340	and	r12,r12,r3			@ (b^c)&=(a^b)
1341	add	r10,r10,r6			@ d+=h
1342	eor	r12,r12,r8			@ Maj(a,b,c)
1343	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1344	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1345	@ ldr	r2,[sp,#7*4]		@ 22
1346	@ ldr	r1,[sp,#4*4]
1347	mov	r0,r2,ror#7
1348	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1349	mov	r12,r1,ror#17
1350	eor	r0,r0,r2,ror#18
1351	eor	r12,r12,r1,ror#19
1352	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1353	ldr	r2,[sp,#6*4]
1354	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1355	ldr	r1,[sp,#15*4]
1356
1357	add	r12,r12,r0
1358	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1359	add	r2,r2,r12
1360	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1361	add	r2,r2,r1			@ X[i]
1362	ldr	r12,[r14],#4			@ *K256++
1363	add	r5,r5,r2			@ h+=X[i]
1364	str	r2,[sp,#6*4]
1365	eor	r2,r11,r4
1366	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1367	and	r2,r2,r10
1368	add	r5,r5,r12			@ h+=K256[i]
1369	eor	r2,r2,r4			@ Ch(e,f,g)
1370	eor	r0,r6,r6,ror#11
1371	add	r5,r5,r2			@ h+=Ch(e,f,g)
1372#if 22==31
1373	and	r12,r12,#0xff
1374	cmp	r12,#0xf2			@ done?
1375#endif
1376#if 22<15
1377# if __ARM_ARCH__>=7
1378	ldr	r2,[r1],#4			@ prefetch
1379# else
1380	ldrb	r2,[r1,#3]
1381# endif
1382	eor	r12,r6,r7			@ a^b, b^c in next round
1383#else
1384	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1385	eor	r12,r6,r7			@ a^b, b^c in next round
1386	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1387#endif
1388	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1389	and	r3,r3,r12			@ (b^c)&=(a^b)
1390	add	r9,r9,r5			@ d+=h
1391	eor	r3,r3,r7			@ Maj(a,b,c)
1392	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1393	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1394	@ ldr	r2,[sp,#8*4]		@ 23
1395	@ ldr	r1,[sp,#5*4]
1396	mov	r0,r2,ror#7
1397	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1398	mov	r3,r1,ror#17
1399	eor	r0,r0,r2,ror#18
1400	eor	r3,r3,r1,ror#19
1401	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1402	ldr	r2,[sp,#7*4]
1403	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1404	ldr	r1,[sp,#0*4]
1405
1406	add	r3,r3,r0
1407	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1408	add	r2,r2,r3
1409	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1410	add	r2,r2,r1			@ X[i]
1411	ldr	r3,[r14],#4			@ *K256++
1412	add	r4,r4,r2			@ h+=X[i]
1413	str	r2,[sp,#7*4]
1414	eor	r2,r10,r11
1415	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1416	and	r2,r2,r9
1417	add	r4,r4,r3			@ h+=K256[i]
1418	eor	r2,r2,r11			@ Ch(e,f,g)
1419	eor	r0,r5,r5,ror#11
1420	add	r4,r4,r2			@ h+=Ch(e,f,g)
1421#if 23==31
1422	and	r3,r3,#0xff
1423	cmp	r3,#0xf2			@ done?
1424#endif
1425#if 23<15
1426# if __ARM_ARCH__>=7
1427	ldr	r2,[r1],#4			@ prefetch
1428# else
1429	ldrb	r2,[r1,#3]
1430# endif
1431	eor	r3,r5,r6			@ a^b, b^c in next round
1432#else
1433	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1434	eor	r3,r5,r6			@ a^b, b^c in next round
1435	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1436#endif
1437	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1438	and	r12,r12,r3			@ (b^c)&=(a^b)
1439	add	r8,r8,r4			@ d+=h
1440	eor	r12,r12,r6			@ Maj(a,b,c)
1441	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1442	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1443	@ ldr	r2,[sp,#9*4]		@ 24
1444	@ ldr	r1,[sp,#6*4]
1445	mov	r0,r2,ror#7
1446	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1447	mov	r12,r1,ror#17
1448	eor	r0,r0,r2,ror#18
1449	eor	r12,r12,r1,ror#19
1450	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1451	ldr	r2,[sp,#8*4]
1452	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1453	ldr	r1,[sp,#1*4]
1454
1455	add	r12,r12,r0
1456	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1457	add	r2,r2,r12
1458	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1459	add	r2,r2,r1			@ X[i]
1460	ldr	r12,[r14],#4			@ *K256++
1461	add	r11,r11,r2			@ h+=X[i]
1462	str	r2,[sp,#8*4]
1463	eor	r2,r9,r10
1464	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1465	and	r2,r2,r8
1466	add	r11,r11,r12			@ h+=K256[i]
1467	eor	r2,r2,r10			@ Ch(e,f,g)
1468	eor	r0,r4,r4,ror#11
1469	add	r11,r11,r2			@ h+=Ch(e,f,g)
1470#if 24==31
1471	and	r12,r12,#0xff
1472	cmp	r12,#0xf2			@ done?
1473#endif
1474#if 24<15
1475# if __ARM_ARCH__>=7
1476	ldr	r2,[r1],#4			@ prefetch
1477# else
1478	ldrb	r2,[r1,#3]
1479# endif
1480	eor	r12,r4,r5			@ a^b, b^c in next round
1481#else
1482	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1483	eor	r12,r4,r5			@ a^b, b^c in next round
1484	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1485#endif
1486	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1487	and	r3,r3,r12			@ (b^c)&=(a^b)
1488	add	r7,r7,r11			@ d+=h
1489	eor	r3,r3,r5			@ Maj(a,b,c)
1490	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1491	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1492	@ ldr	r2,[sp,#10*4]		@ 25
1493	@ ldr	r1,[sp,#7*4]
1494	mov	r0,r2,ror#7
1495	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1496	mov	r3,r1,ror#17
1497	eor	r0,r0,r2,ror#18
1498	eor	r3,r3,r1,ror#19
1499	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1500	ldr	r2,[sp,#9*4]
1501	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1502	ldr	r1,[sp,#2*4]
1503
1504	add	r3,r3,r0
1505	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1506	add	r2,r2,r3
1507	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1508	add	r2,r2,r1			@ X[i]
1509	ldr	r3,[r14],#4			@ *K256++
1510	add	r10,r10,r2			@ h+=X[i]
1511	str	r2,[sp,#9*4]
1512	eor	r2,r8,r9
1513	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1514	and	r2,r2,r7
1515	add	r10,r10,r3			@ h+=K256[i]
1516	eor	r2,r2,r9			@ Ch(e,f,g)
1517	eor	r0,r11,r11,ror#11
1518	add	r10,r10,r2			@ h+=Ch(e,f,g)
1519#if 25==31
1520	and	r3,r3,#0xff
1521	cmp	r3,#0xf2			@ done?
1522#endif
1523#if 25<15
1524# if __ARM_ARCH__>=7
1525	ldr	r2,[r1],#4			@ prefetch
1526# else
1527	ldrb	r2,[r1,#3]
1528# endif
1529	eor	r3,r11,r4			@ a^b, b^c in next round
1530#else
1531	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1532	eor	r3,r11,r4			@ a^b, b^c in next round
1533	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1534#endif
1535	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1536	and	r12,r12,r3			@ (b^c)&=(a^b)
1537	add	r6,r6,r10			@ d+=h
1538	eor	r12,r12,r4			@ Maj(a,b,c)
1539	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1540	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1541	@ ldr	r2,[sp,#11*4]		@ 26
1542	@ ldr	r1,[sp,#8*4]
1543	mov	r0,r2,ror#7
1544	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1545	mov	r12,r1,ror#17
1546	eor	r0,r0,r2,ror#18
1547	eor	r12,r12,r1,ror#19
1548	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1549	ldr	r2,[sp,#10*4]
1550	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1551	ldr	r1,[sp,#3*4]
1552
1553	add	r12,r12,r0
1554	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1555	add	r2,r2,r12
1556	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1557	add	r2,r2,r1			@ X[i]
1558	ldr	r12,[r14],#4			@ *K256++
1559	add	r9,r9,r2			@ h+=X[i]
1560	str	r2,[sp,#10*4]
1561	eor	r2,r7,r8
1562	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1563	and	r2,r2,r6
1564	add	r9,r9,r12			@ h+=K256[i]
1565	eor	r2,r2,r8			@ Ch(e,f,g)
1566	eor	r0,r10,r10,ror#11
1567	add	r9,r9,r2			@ h+=Ch(e,f,g)
1568#if 26==31
1569	and	r12,r12,#0xff
1570	cmp	r12,#0xf2			@ done?
1571#endif
1572#if 26<15
1573# if __ARM_ARCH__>=7
1574	ldr	r2,[r1],#4			@ prefetch
1575# else
1576	ldrb	r2,[r1,#3]
1577# endif
1578	eor	r12,r10,r11			@ a^b, b^c in next round
1579#else
1580	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1581	eor	r12,r10,r11			@ a^b, b^c in next round
1582	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1583#endif
1584	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1585	and	r3,r3,r12			@ (b^c)&=(a^b)
1586	add	r5,r5,r9			@ d+=h
1587	eor	r3,r3,r11			@ Maj(a,b,c)
1588	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1589	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1590	@ ldr	r2,[sp,#12*4]		@ 27
1591	@ ldr	r1,[sp,#9*4]
1592	mov	r0,r2,ror#7
1593	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1594	mov	r3,r1,ror#17
1595	eor	r0,r0,r2,ror#18
1596	eor	r3,r3,r1,ror#19
1597	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1598	ldr	r2,[sp,#11*4]
1599	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1600	ldr	r1,[sp,#4*4]
1601
1602	add	r3,r3,r0
1603	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1604	add	r2,r2,r3
1605	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1606	add	r2,r2,r1			@ X[i]
1607	ldr	r3,[r14],#4			@ *K256++
1608	add	r8,r8,r2			@ h+=X[i]
1609	str	r2,[sp,#11*4]
1610	eor	r2,r6,r7
1611	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1612	and	r2,r2,r5
1613	add	r8,r8,r3			@ h+=K256[i]
1614	eor	r2,r2,r7			@ Ch(e,f,g)
1615	eor	r0,r9,r9,ror#11
1616	add	r8,r8,r2			@ h+=Ch(e,f,g)
1617#if 27==31
1618	and	r3,r3,#0xff
1619	cmp	r3,#0xf2			@ done?
1620#endif
1621#if 27<15
1622# if __ARM_ARCH__>=7
1623	ldr	r2,[r1],#4			@ prefetch
1624# else
1625	ldrb	r2,[r1,#3]
1626# endif
1627	eor	r3,r9,r10			@ a^b, b^c in next round
1628#else
1629	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1630	eor	r3,r9,r10			@ a^b, b^c in next round
1631	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1632#endif
1633	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1634	and	r12,r12,r3			@ (b^c)&=(a^b)
1635	add	r4,r4,r8			@ d+=h
1636	eor	r12,r12,r10			@ Maj(a,b,c)
1637	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1638	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1639	@ ldr	r2,[sp,#13*4]		@ 28
1640	@ ldr	r1,[sp,#10*4]
1641	mov	r0,r2,ror#7
1642	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1643	mov	r12,r1,ror#17
1644	eor	r0,r0,r2,ror#18
1645	eor	r12,r12,r1,ror#19
1646	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1647	ldr	r2,[sp,#12*4]
1648	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1649	ldr	r1,[sp,#5*4]
1650
1651	add	r12,r12,r0
1652	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1653	add	r2,r2,r12
1654	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1655	add	r2,r2,r1			@ X[i]
1656	ldr	r12,[r14],#4			@ *K256++
1657	add	r7,r7,r2			@ h+=X[i]
1658	str	r2,[sp,#12*4]
1659	eor	r2,r5,r6
1660	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1661	and	r2,r2,r4
1662	add	r7,r7,r12			@ h+=K256[i]
1663	eor	r2,r2,r6			@ Ch(e,f,g)
1664	eor	r0,r8,r8,ror#11
1665	add	r7,r7,r2			@ h+=Ch(e,f,g)
1666#if 28==31
1667	and	r12,r12,#0xff
1668	cmp	r12,#0xf2			@ done?
1669#endif
1670#if 28<15
1671# if __ARM_ARCH__>=7
1672	ldr	r2,[r1],#4			@ prefetch
1673# else
1674	ldrb	r2,[r1,#3]
1675# endif
1676	eor	r12,r8,r9			@ a^b, b^c in next round
1677#else
1678	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1679	eor	r12,r8,r9			@ a^b, b^c in next round
1680	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1681#endif
1682	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1683	and	r3,r3,r12			@ (b^c)&=(a^b)
1684	add	r11,r11,r7			@ d+=h
1685	eor	r3,r3,r9			@ Maj(a,b,c)
1686	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1687	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1688	@ ldr	r2,[sp,#14*4]		@ 29
1689	@ ldr	r1,[sp,#11*4]
1690	mov	r0,r2,ror#7
1691	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1692	mov	r3,r1,ror#17
1693	eor	r0,r0,r2,ror#18
1694	eor	r3,r3,r1,ror#19
1695	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1696	ldr	r2,[sp,#13*4]
1697	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1698	ldr	r1,[sp,#6*4]
1699
1700	add	r3,r3,r0
1701	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1702	add	r2,r2,r3
1703	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1704	add	r2,r2,r1			@ X[i]
1705	ldr	r3,[r14],#4			@ *K256++
1706	add	r6,r6,r2			@ h+=X[i]
1707	str	r2,[sp,#13*4]
1708	eor	r2,r4,r5
1709	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1710	and	r2,r2,r11
1711	add	r6,r6,r3			@ h+=K256[i]
1712	eor	r2,r2,r5			@ Ch(e,f,g)
1713	eor	r0,r7,r7,ror#11
1714	add	r6,r6,r2			@ h+=Ch(e,f,g)
1715#if 29==31
1716	and	r3,r3,#0xff
1717	cmp	r3,#0xf2			@ done?
1718#endif
1719#if 29<15
1720# if __ARM_ARCH__>=7
1721	ldr	r2,[r1],#4			@ prefetch
1722# else
1723	ldrb	r2,[r1,#3]
1724# endif
1725	eor	r3,r7,r8			@ a^b, b^c in next round
1726#else
1727	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1728	eor	r3,r7,r8			@ a^b, b^c in next round
1729	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1730#endif
1731	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1732	and	r12,r12,r3			@ (b^c)&=(a^b)
1733	add	r10,r10,r6			@ d+=h
1734	eor	r12,r12,r8			@ Maj(a,b,c)
1735	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1736	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1737	@ ldr	r2,[sp,#15*4]		@ 30
1738	@ ldr	r1,[sp,#12*4]
1739	mov	r0,r2,ror#7
1740	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1741	mov	r12,r1,ror#17
1742	eor	r0,r0,r2,ror#18
1743	eor	r12,r12,r1,ror#19
1744	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1745	ldr	r2,[sp,#14*4]
1746	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1747	ldr	r1,[sp,#7*4]
1748
1749	add	r12,r12,r0
1750	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1751	add	r2,r2,r12
1752	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1753	add	r2,r2,r1			@ X[i]
1754	ldr	r12,[r14],#4			@ *K256++
1755	add	r5,r5,r2			@ h+=X[i]
1756	str	r2,[sp,#14*4]
1757	eor	r2,r11,r4
1758	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1759	and	r2,r2,r10
1760	add	r5,r5,r12			@ h+=K256[i]
1761	eor	r2,r2,r4			@ Ch(e,f,g)
1762	eor	r0,r6,r6,ror#11
1763	add	r5,r5,r2			@ h+=Ch(e,f,g)
1764#if 30==31
1765	and	r12,r12,#0xff
1766	cmp	r12,#0xf2			@ done?
1767#endif
1768#if 30<15
1769# if __ARM_ARCH__>=7
1770	ldr	r2,[r1],#4			@ prefetch
1771# else
1772	ldrb	r2,[r1,#3]
1773# endif
1774	eor	r12,r6,r7			@ a^b, b^c in next round
1775#else
1776	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1777	eor	r12,r6,r7			@ a^b, b^c in next round
1778	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1779#endif
1780	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1781	and	r3,r3,r12			@ (b^c)&=(a^b)
1782	add	r9,r9,r5			@ d+=h
1783	eor	r3,r3,r7			@ Maj(a,b,c)
1784	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1785	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1786	@ ldr	r2,[sp,#0*4]		@ 31
1787	@ ldr	r1,[sp,#13*4]
1788	mov	r0,r2,ror#7
1789	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1790	mov	r3,r1,ror#17
1791	eor	r0,r0,r2,ror#18
1792	eor	r3,r3,r1,ror#19
1793	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1794	ldr	r2,[sp,#15*4]
1795	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1796	ldr	r1,[sp,#8*4]
1797
1798	add	r3,r3,r0
1799	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1800	add	r2,r2,r3
1801	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1802	add	r2,r2,r1			@ X[i]
1803	ldr	r3,[r14],#4			@ *K256++
1804	add	r4,r4,r2			@ h+=X[i]
1805	str	r2,[sp,#15*4]
1806	eor	r2,r10,r11
1807	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1808	and	r2,r2,r9
1809	add	r4,r4,r3			@ h+=K256[i]
1810	eor	r2,r2,r11			@ Ch(e,f,g)
1811	eor	r0,r5,r5,ror#11
1812	add	r4,r4,r2			@ h+=Ch(e,f,g)
1813#if 31==31
1814	and	r3,r3,#0xff
1815	cmp	r3,#0xf2			@ done?
1816#endif
1817#if 31<15
1818# if __ARM_ARCH__>=7
1819	ldr	r2,[r1],#4			@ prefetch
1820# else
1821	ldrb	r2,[r1,#3]
1822# endif
1823	eor	r3,r5,r6			@ a^b, b^c in next round
1824#else
1825	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1826	eor	r3,r5,r6			@ a^b, b^c in next round
1827	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1828#endif
1829	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1830	and	r12,r12,r3			@ (b^c)&=(a^b)
1831	add	r8,r8,r4			@ d+=h
1832	eor	r12,r12,r6			@ Maj(a,b,c)
1833	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1834	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1835#if __ARM_ARCH__>=7
1836	ite	eq			@ Thumb2 thing, sanity check in ARM
1837#endif
1838	ldreq	r3,[sp,#16*4]		@ pull ctx
1839	bne	.Lrounds_16_xx
1840
1841	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1842	ldr	r0,[r3,#0]
1843	ldr	r2,[r3,#4]
1844	ldr	r12,[r3,#8]
1845	add	r4,r4,r0
1846	ldr	r0,[r3,#12]
1847	add	r5,r5,r2
1848	ldr	r2,[r3,#16]
1849	add	r6,r6,r12
1850	ldr	r12,[r3,#20]
1851	add	r7,r7,r0
1852	ldr	r0,[r3,#24]
1853	add	r8,r8,r2
1854	ldr	r2,[r3,#28]
1855	add	r9,r9,r12
1856	ldr	r1,[sp,#17*4]		@ pull inp
1857	ldr	r12,[sp,#18*4]		@ pull inp+len
1858	add	r10,r10,r0
1859	add	r11,r11,r2
1860	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1861	cmp	r1,r12
1862	sub	r14,r14,#256	@ rewind Ktbl
1863	bne	.Loop
1864
1865	add	sp,sp,#19*4	@ destroy frame
1866#if __ARM_ARCH__>=5
1867	ldmia	sp!,{r4-r11,pc}
1868#else
1869	ldmia	sp!,{r4-r11,lr}
1870	tst	lr,#1
1871	moveq	pc,lr			@ be binary compatible with V4, yet
1872	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1873#endif
1874.size	sha256_block_data_order,.-sha256_block_data_order
1875#if __ARM_MAX_ARCH__>=7
1876.arch	armv7-a
1877.fpu	neon
1878
1879.global	sha256_block_data_order_neon
1880.type	sha256_block_data_order_neon,%function
1881.align	4
1882sha256_block_data_order_neon:
1883.LNEON:
1884	stmdb	sp!,{r4-r12,lr}
1885
1886	sub	r11,sp,#16*4+16
1887	adrl	r14,K256
1888	bic	r11,r11,#15		@ align for 128-bit stores
1889	mov	r12,sp
1890	mov	sp,r11			@ alloca
1891	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1892
1893	vld1.8		{q0},[r1]!
1894	vld1.8		{q1},[r1]!
1895	vld1.8		{q2},[r1]!
1896	vld1.8		{q3},[r1]!
1897	vld1.32		{q8},[r14,:128]!
1898	vld1.32		{q9},[r14,:128]!
1899	vld1.32		{q10},[r14,:128]!
1900	vld1.32		{q11},[r14,:128]!
1901	vrev32.8	q0,q0		@ yes, even on
1902	str		r0,[sp,#64]
1903	vrev32.8	q1,q1		@ big-endian
1904	str		r1,[sp,#68]
1905	mov		r1,sp
1906	vrev32.8	q2,q2
1907	str		r2,[sp,#72]
1908	vrev32.8	q3,q3
1909	str		r12,[sp,#76]		@ save original sp
1910	vadd.i32	q8,q8,q0
1911	vadd.i32	q9,q9,q1
1912	vst1.32		{q8},[r1,:128]!
1913	vadd.i32	q10,q10,q2
1914	vst1.32		{q9},[r1,:128]!
1915	vadd.i32	q11,q11,q3
1916	vst1.32		{q10},[r1,:128]!
1917	vst1.32		{q11},[r1,:128]!
1918
1919	ldmia		r0,{r4-r11}
1920	sub		r1,r1,#64
1921	ldr		r2,[sp,#0]
1922	eor		r12,r12,r12
1923	eor		r3,r5,r6
1924	b		.L_00_48
1925
1926.align	4
1927.L_00_48:
1928	vext.8	q8,q0,q1,#4
1929	add	r11,r11,r2
1930	eor	r2,r9,r10
1931	eor	r0,r8,r8,ror#5
1932	vext.8	q9,q2,q3,#4
1933	add	r4,r4,r12
1934	and	r2,r2,r8
1935	eor	r12,r0,r8,ror#19
1936	vshr.u32	q10,q8,#7
1937	eor	r0,r4,r4,ror#11
1938	eor	r2,r2,r10
1939	vadd.i32	q0,q0,q9
1940	add	r11,r11,r12,ror#6
1941	eor	r12,r4,r5
1942	vshr.u32	q9,q8,#3
1943	eor	r0,r0,r4,ror#20
1944	add	r11,r11,r2
1945	vsli.32	q10,q8,#25
1946	ldr	r2,[sp,#4]
1947	and	r3,r3,r12
1948	vshr.u32	q11,q8,#18
1949	add	r7,r7,r11
1950	add	r11,r11,r0,ror#2
1951	eor	r3,r3,r5
1952	veor	q9,q9,q10
1953	add	r10,r10,r2
1954	vsli.32	q11,q8,#14
1955	eor	r2,r8,r9
1956	eor	r0,r7,r7,ror#5
1957	vshr.u32	d24,d7,#17
1958	add	r11,r11,r3
1959	and	r2,r2,r7
1960	veor	q9,q9,q11
1961	eor	r3,r0,r7,ror#19
1962	eor	r0,r11,r11,ror#11
1963	vsli.32	d24,d7,#15
1964	eor	r2,r2,r9
1965	add	r10,r10,r3,ror#6
1966	vshr.u32	d25,d7,#10
1967	eor	r3,r11,r4
1968	eor	r0,r0,r11,ror#20
1969	vadd.i32	q0,q0,q9
1970	add	r10,r10,r2
1971	ldr	r2,[sp,#8]
1972	veor	d25,d25,d24
1973	and	r12,r12,r3
1974	add	r6,r6,r10
1975	vshr.u32	d24,d7,#19
1976	add	r10,r10,r0,ror#2
1977	eor	r12,r12,r4
1978	vsli.32	d24,d7,#13
1979	add	r9,r9,r2
1980	eor	r2,r7,r8
1981	veor	d25,d25,d24
1982	eor	r0,r6,r6,ror#5
1983	add	r10,r10,r12
1984	vadd.i32	d0,d0,d25
1985	and	r2,r2,r6
1986	eor	r12,r0,r6,ror#19
1987	vshr.u32	d24,d0,#17
1988	eor	r0,r10,r10,ror#11
1989	eor	r2,r2,r8
1990	vsli.32	d24,d0,#15
1991	add	r9,r9,r12,ror#6
1992	eor	r12,r10,r11
1993	vshr.u32	d25,d0,#10
1994	eor	r0,r0,r10,ror#20
1995	add	r9,r9,r2
1996	veor	d25,d25,d24
1997	ldr	r2,[sp,#12]
1998	and	r3,r3,r12
1999	vshr.u32	d24,d0,#19
2000	add	r5,r5,r9
2001	add	r9,r9,r0,ror#2
2002	eor	r3,r3,r11
2003	vld1.32	{q8},[r14,:128]!
2004	add	r8,r8,r2
2005	vsli.32	d24,d0,#13
2006	eor	r2,r6,r7
2007	eor	r0,r5,r5,ror#5
2008	veor	d25,d25,d24
2009	add	r9,r9,r3
2010	and	r2,r2,r5
2011	vadd.i32	d1,d1,d25
2012	eor	r3,r0,r5,ror#19
2013	eor	r0,r9,r9,ror#11
2014	vadd.i32	q8,q8,q0
2015	eor	r2,r2,r7
2016	add	r8,r8,r3,ror#6
2017	eor	r3,r9,r10
2018	eor	r0,r0,r9,ror#20
2019	add	r8,r8,r2
2020	ldr	r2,[sp,#16]
2021	and	r12,r12,r3
2022	add	r4,r4,r8
2023	vst1.32	{q8},[r1,:128]!
2024	add	r8,r8,r0,ror#2
2025	eor	r12,r12,r10
2026	vext.8	q8,q1,q2,#4
2027	add	r7,r7,r2
2028	eor	r2,r5,r6
2029	eor	r0,r4,r4,ror#5
2030	vext.8	q9,q3,q0,#4
2031	add	r8,r8,r12
2032	and	r2,r2,r4
2033	eor	r12,r0,r4,ror#19
2034	vshr.u32	q10,q8,#7
2035	eor	r0,r8,r8,ror#11
2036	eor	r2,r2,r6
2037	vadd.i32	q1,q1,q9
2038	add	r7,r7,r12,ror#6
2039	eor	r12,r8,r9
2040	vshr.u32	q9,q8,#3
2041	eor	r0,r0,r8,ror#20
2042	add	r7,r7,r2
2043	vsli.32	q10,q8,#25
2044	ldr	r2,[sp,#20]
2045	and	r3,r3,r12
2046	vshr.u32	q11,q8,#18
2047	add	r11,r11,r7
2048	add	r7,r7,r0,ror#2
2049	eor	r3,r3,r9
2050	veor	q9,q9,q10
2051	add	r6,r6,r2
2052	vsli.32	q11,q8,#14
2053	eor	r2,r4,r5
2054	eor	r0,r11,r11,ror#5
2055	vshr.u32	d24,d1,#17
2056	add	r7,r7,r3
2057	and	r2,r2,r11
2058	veor	q9,q9,q11
2059	eor	r3,r0,r11,ror#19
2060	eor	r0,r7,r7,ror#11
2061	vsli.32	d24,d1,#15
2062	eor	r2,r2,r5
2063	add	r6,r6,r3,ror#6
2064	vshr.u32	d25,d1,#10
2065	eor	r3,r7,r8
2066	eor	r0,r0,r7,ror#20
2067	vadd.i32	q1,q1,q9
2068	add	r6,r6,r2
2069	ldr	r2,[sp,#24]
2070	veor	d25,d25,d24
2071	and	r12,r12,r3
2072	add	r10,r10,r6
2073	vshr.u32	d24,d1,#19
2074	add	r6,r6,r0,ror#2
2075	eor	r12,r12,r8
2076	vsli.32	d24,d1,#13
2077	add	r5,r5,r2
2078	eor	r2,r11,r4
2079	veor	d25,d25,d24
2080	eor	r0,r10,r10,ror#5
2081	add	r6,r6,r12
2082	vadd.i32	d2,d2,d25
2083	and	r2,r2,r10
2084	eor	r12,r0,r10,ror#19
2085	vshr.u32	d24,d2,#17
2086	eor	r0,r6,r6,ror#11
2087	eor	r2,r2,r4
2088	vsli.32	d24,d2,#15
2089	add	r5,r5,r12,ror#6
2090	eor	r12,r6,r7
2091	vshr.u32	d25,d2,#10
2092	eor	r0,r0,r6,ror#20
2093	add	r5,r5,r2
2094	veor	d25,d25,d24
2095	ldr	r2,[sp,#28]
2096	and	r3,r3,r12
2097	vshr.u32	d24,d2,#19
2098	add	r9,r9,r5
2099	add	r5,r5,r0,ror#2
2100	eor	r3,r3,r7
2101	vld1.32	{q8},[r14,:128]!
2102	add	r4,r4,r2
2103	vsli.32	d24,d2,#13
2104	eor	r2,r10,r11
2105	eor	r0,r9,r9,ror#5
2106	veor	d25,d25,d24
2107	add	r5,r5,r3
2108	and	r2,r2,r9
2109	vadd.i32	d3,d3,d25
2110	eor	r3,r0,r9,ror#19
2111	eor	r0,r5,r5,ror#11
2112	vadd.i32	q8,q8,q1
2113	eor	r2,r2,r11
2114	add	r4,r4,r3,ror#6
2115	eor	r3,r5,r6
2116	eor	r0,r0,r5,ror#20
2117	add	r4,r4,r2
2118	ldr	r2,[sp,#32]
2119	and	r12,r12,r3
2120	add	r8,r8,r4
2121	vst1.32	{q8},[r1,:128]!
2122	add	r4,r4,r0,ror#2
2123	eor	r12,r12,r6
2124	vext.8	q8,q2,q3,#4
2125	add	r11,r11,r2
2126	eor	r2,r9,r10
2127	eor	r0,r8,r8,ror#5
2128	vext.8	q9,q0,q1,#4
2129	add	r4,r4,r12
2130	and	r2,r2,r8
2131	eor	r12,r0,r8,ror#19
2132	vshr.u32	q10,q8,#7
2133	eor	r0,r4,r4,ror#11
2134	eor	r2,r2,r10
2135	vadd.i32	q2,q2,q9
2136	add	r11,r11,r12,ror#6
2137	eor	r12,r4,r5
2138	vshr.u32	q9,q8,#3
2139	eor	r0,r0,r4,ror#20
2140	add	r11,r11,r2
2141	vsli.32	q10,q8,#25
2142	ldr	r2,[sp,#36]
2143	and	r3,r3,r12
2144	vshr.u32	q11,q8,#18
2145	add	r7,r7,r11
2146	add	r11,r11,r0,ror#2
2147	eor	r3,r3,r5
2148	veor	q9,q9,q10
2149	add	r10,r10,r2
2150	vsli.32	q11,q8,#14
2151	eor	r2,r8,r9
2152	eor	r0,r7,r7,ror#5
2153	vshr.u32	d24,d3,#17
2154	add	r11,r11,r3
2155	and	r2,r2,r7
2156	veor	q9,q9,q11
2157	eor	r3,r0,r7,ror#19
2158	eor	r0,r11,r11,ror#11
2159	vsli.32	d24,d3,#15
2160	eor	r2,r2,r9
2161	add	r10,r10,r3,ror#6
2162	vshr.u32	d25,d3,#10
2163	eor	r3,r11,r4
2164	eor	r0,r0,r11,ror#20
2165	vadd.i32	q2,q2,q9
2166	add	r10,r10,r2
2167	ldr	r2,[sp,#40]
2168	veor	d25,d25,d24
2169	and	r12,r12,r3
2170	add	r6,r6,r10
2171	vshr.u32	d24,d3,#19
2172	add	r10,r10,r0,ror#2
2173	eor	r12,r12,r4
2174	vsli.32	d24,d3,#13
2175	add	r9,r9,r2
2176	eor	r2,r7,r8
2177	veor	d25,d25,d24
2178	eor	r0,r6,r6,ror#5
2179	add	r10,r10,r12
2180	vadd.i32	d4,d4,d25
2181	and	r2,r2,r6
2182	eor	r12,r0,r6,ror#19
2183	vshr.u32	d24,d4,#17
2184	eor	r0,r10,r10,ror#11
2185	eor	r2,r2,r8
2186	vsli.32	d24,d4,#15
2187	add	r9,r9,r12,ror#6
2188	eor	r12,r10,r11
2189	vshr.u32	d25,d4,#10
2190	eor	r0,r0,r10,ror#20
2191	add	r9,r9,r2
2192	veor	d25,d25,d24
2193	ldr	r2,[sp,#44]
2194	and	r3,r3,r12
2195	vshr.u32	d24,d4,#19
2196	add	r5,r5,r9
2197	add	r9,r9,r0,ror#2
2198	eor	r3,r3,r11
2199	vld1.32	{q8},[r14,:128]!
2200	add	r8,r8,r2
2201	vsli.32	d24,d4,#13
2202	eor	r2,r6,r7
2203	eor	r0,r5,r5,ror#5
2204	veor	d25,d25,d24
2205	add	r9,r9,r3
2206	and	r2,r2,r5
2207	vadd.i32	d5,d5,d25
2208	eor	r3,r0,r5,ror#19
2209	eor	r0,r9,r9,ror#11
2210	vadd.i32	q8,q8,q2
2211	eor	r2,r2,r7
2212	add	r8,r8,r3,ror#6
2213	eor	r3,r9,r10
2214	eor	r0,r0,r9,ror#20
2215	add	r8,r8,r2
2216	ldr	r2,[sp,#48]
2217	and	r12,r12,r3
2218	add	r4,r4,r8
2219	vst1.32	{q8},[r1,:128]!
2220	add	r8,r8,r0,ror#2
2221	eor	r12,r12,r10
2222	vext.8	q8,q3,q0,#4
2223	add	r7,r7,r2
2224	eor	r2,r5,r6
2225	eor	r0,r4,r4,ror#5
2226	vext.8	q9,q1,q2,#4
2227	add	r8,r8,r12
2228	and	r2,r2,r4
2229	eor	r12,r0,r4,ror#19
2230	vshr.u32	q10,q8,#7
2231	eor	r0,r8,r8,ror#11
2232	eor	r2,r2,r6
2233	vadd.i32	q3,q3,q9
2234	add	r7,r7,r12,ror#6
2235	eor	r12,r8,r9
2236	vshr.u32	q9,q8,#3
2237	eor	r0,r0,r8,ror#20
2238	add	r7,r7,r2
2239	vsli.32	q10,q8,#25
2240	ldr	r2,[sp,#52]
2241	and	r3,r3,r12
2242	vshr.u32	q11,q8,#18
2243	add	r11,r11,r7
2244	add	r7,r7,r0,ror#2
2245	eor	r3,r3,r9
2246	veor	q9,q9,q10
2247	add	r6,r6,r2
2248	vsli.32	q11,q8,#14
2249	eor	r2,r4,r5
2250	eor	r0,r11,r11,ror#5
2251	vshr.u32	d24,d5,#17
2252	add	r7,r7,r3
2253	and	r2,r2,r11
2254	veor	q9,q9,q11
2255	eor	r3,r0,r11,ror#19
2256	eor	r0,r7,r7,ror#11
2257	vsli.32	d24,d5,#15
2258	eor	r2,r2,r5
2259	add	r6,r6,r3,ror#6
2260	vshr.u32	d25,d5,#10
2261	eor	r3,r7,r8
2262	eor	r0,r0,r7,ror#20
2263	vadd.i32	q3,q3,q9
2264	add	r6,r6,r2
2265	ldr	r2,[sp,#56]
2266	veor	d25,d25,d24
2267	and	r12,r12,r3
2268	add	r10,r10,r6
2269	vshr.u32	d24,d5,#19
2270	add	r6,r6,r0,ror#2
2271	eor	r12,r12,r8
2272	vsli.32	d24,d5,#13
2273	add	r5,r5,r2
2274	eor	r2,r11,r4
2275	veor	d25,d25,d24
2276	eor	r0,r10,r10,ror#5
2277	add	r6,r6,r12
2278	vadd.i32	d6,d6,d25
2279	and	r2,r2,r10
2280	eor	r12,r0,r10,ror#19
2281	vshr.u32	d24,d6,#17
2282	eor	r0,r6,r6,ror#11
2283	eor	r2,r2,r4
2284	vsli.32	d24,d6,#15
2285	add	r5,r5,r12,ror#6
2286	eor	r12,r6,r7
2287	vshr.u32	d25,d6,#10
2288	eor	r0,r0,r6,ror#20
2289	add	r5,r5,r2
2290	veor	d25,d25,d24
2291	ldr	r2,[sp,#60]
2292	and	r3,r3,r12
2293	vshr.u32	d24,d6,#19
2294	add	r9,r9,r5
2295	add	r5,r5,r0,ror#2
2296	eor	r3,r3,r7
2297	vld1.32	{q8},[r14,:128]!
2298	add	r4,r4,r2
2299	vsli.32	d24,d6,#13
2300	eor	r2,r10,r11
2301	eor	r0,r9,r9,ror#5
2302	veor	d25,d25,d24
2303	add	r5,r5,r3
2304	and	r2,r2,r9
2305	vadd.i32	d7,d7,d25
2306	eor	r3,r0,r9,ror#19
2307	eor	r0,r5,r5,ror#11
2308	vadd.i32	q8,q8,q3
2309	eor	r2,r2,r11
2310	add	r4,r4,r3,ror#6
2311	eor	r3,r5,r6
2312	eor	r0,r0,r5,ror#20
2313	add	r4,r4,r2
2314	ldr	r2,[r14]
2315	and	r12,r12,r3
2316	add	r8,r8,r4
2317	vst1.32	{q8},[r1,:128]!
2318	add	r4,r4,r0,ror#2
2319	eor	r12,r12,r6
2320	teq	r2,#0				@ check for K256 terminator
2321	ldr	r2,[sp,#0]
2322	sub	r1,r1,#64
2323	bne	.L_00_48
2324
2325	ldr		r1,[sp,#68]
2326	ldr		r0,[sp,#72]
2327	sub		r14,r14,#256	@ rewind r14
2328	teq		r1,r0
2329	it		eq
2330	subeq		r1,r1,#64		@ avoid SEGV
2331	vld1.8		{q0},[r1]!		@ load next input block
2332	vld1.8		{q1},[r1]!
2333	vld1.8		{q2},[r1]!
2334	vld1.8		{q3},[r1]!
2335	it		ne
2336	strne		r1,[sp,#68]
2337	mov		r1,sp
2338	add	r11,r11,r2
2339	eor	r2,r9,r10
2340	eor	r0,r8,r8,ror#5
2341	add	r4,r4,r12
2342	vld1.32	{q8},[r14,:128]!
2343	and	r2,r2,r8
2344	eor	r12,r0,r8,ror#19
2345	eor	r0,r4,r4,ror#11
2346	eor	r2,r2,r10
2347	vrev32.8	q0,q0
2348	add	r11,r11,r12,ror#6
2349	eor	r12,r4,r5
2350	eor	r0,r0,r4,ror#20
2351	add	r11,r11,r2
2352	vadd.i32	q8,q8,q0
2353	ldr	r2,[sp,#4]
2354	and	r3,r3,r12
2355	add	r7,r7,r11
2356	add	r11,r11,r0,ror#2
2357	eor	r3,r3,r5
2358	add	r10,r10,r2
2359	eor	r2,r8,r9
2360	eor	r0,r7,r7,ror#5
2361	add	r11,r11,r3
2362	and	r2,r2,r7
2363	eor	r3,r0,r7,ror#19
2364	eor	r0,r11,r11,ror#11
2365	eor	r2,r2,r9
2366	add	r10,r10,r3,ror#6
2367	eor	r3,r11,r4
2368	eor	r0,r0,r11,ror#20
2369	add	r10,r10,r2
2370	ldr	r2,[sp,#8]
2371	and	r12,r12,r3
2372	add	r6,r6,r10
2373	add	r10,r10,r0,ror#2
2374	eor	r12,r12,r4
2375	add	r9,r9,r2
2376	eor	r2,r7,r8
2377	eor	r0,r6,r6,ror#5
2378	add	r10,r10,r12
2379	and	r2,r2,r6
2380	eor	r12,r0,r6,ror#19
2381	eor	r0,r10,r10,ror#11
2382	eor	r2,r2,r8
2383	add	r9,r9,r12,ror#6
2384	eor	r12,r10,r11
2385	eor	r0,r0,r10,ror#20
2386	add	r9,r9,r2
2387	ldr	r2,[sp,#12]
2388	and	r3,r3,r12
2389	add	r5,r5,r9
2390	add	r9,r9,r0,ror#2
2391	eor	r3,r3,r11
2392	add	r8,r8,r2
2393	eor	r2,r6,r7
2394	eor	r0,r5,r5,ror#5
2395	add	r9,r9,r3
2396	and	r2,r2,r5
2397	eor	r3,r0,r5,ror#19
2398	eor	r0,r9,r9,ror#11
2399	eor	r2,r2,r7
2400	add	r8,r8,r3,ror#6
2401	eor	r3,r9,r10
2402	eor	r0,r0,r9,ror#20
2403	add	r8,r8,r2
2404	ldr	r2,[sp,#16]
2405	and	r12,r12,r3
2406	add	r4,r4,r8
2407	add	r8,r8,r0,ror#2
2408	eor	r12,r12,r10
2409	vst1.32	{q8},[r1,:128]!
2410	add	r7,r7,r2
2411	eor	r2,r5,r6
2412	eor	r0,r4,r4,ror#5
2413	add	r8,r8,r12
2414	vld1.32	{q8},[r14,:128]!
2415	and	r2,r2,r4
2416	eor	r12,r0,r4,ror#19
2417	eor	r0,r8,r8,ror#11
2418	eor	r2,r2,r6
2419	vrev32.8	q1,q1
2420	add	r7,r7,r12,ror#6
2421	eor	r12,r8,r9
2422	eor	r0,r0,r8,ror#20
2423	add	r7,r7,r2
2424	vadd.i32	q8,q8,q1
2425	ldr	r2,[sp,#20]
2426	and	r3,r3,r12
2427	add	r11,r11,r7
2428	add	r7,r7,r0,ror#2
2429	eor	r3,r3,r9
2430	add	r6,r6,r2
2431	eor	r2,r4,r5
2432	eor	r0,r11,r11,ror#5
2433	add	r7,r7,r3
2434	and	r2,r2,r11
2435	eor	r3,r0,r11,ror#19
2436	eor	r0,r7,r7,ror#11
2437	eor	r2,r2,r5
2438	add	r6,r6,r3,ror#6
2439	eor	r3,r7,r8
2440	eor	r0,r0,r7,ror#20
2441	add	r6,r6,r2
2442	ldr	r2,[sp,#24]
2443	and	r12,r12,r3
2444	add	r10,r10,r6
2445	add	r6,r6,r0,ror#2
2446	eor	r12,r12,r8
2447	add	r5,r5,r2
2448	eor	r2,r11,r4
2449	eor	r0,r10,r10,ror#5
2450	add	r6,r6,r12
2451	and	r2,r2,r10
2452	eor	r12,r0,r10,ror#19
2453	eor	r0,r6,r6,ror#11
2454	eor	r2,r2,r4
2455	add	r5,r5,r12,ror#6
2456	eor	r12,r6,r7
2457	eor	r0,r0,r6,ror#20
2458	add	r5,r5,r2
2459	ldr	r2,[sp,#28]
2460	and	r3,r3,r12
2461	add	r9,r9,r5
2462	add	r5,r5,r0,ror#2
2463	eor	r3,r3,r7
2464	add	r4,r4,r2
2465	eor	r2,r10,r11
2466	eor	r0,r9,r9,ror#5
2467	add	r5,r5,r3
2468	and	r2,r2,r9
2469	eor	r3,r0,r9,ror#19
2470	eor	r0,r5,r5,ror#11
2471	eor	r2,r2,r11
2472	add	r4,r4,r3,ror#6
2473	eor	r3,r5,r6
2474	eor	r0,r0,r5,ror#20
2475	add	r4,r4,r2
2476	ldr	r2,[sp,#32]
2477	and	r12,r12,r3
2478	add	r8,r8,r4
2479	add	r4,r4,r0,ror#2
2480	eor	r12,r12,r6
2481	vst1.32	{q8},[r1,:128]!
2482	add	r11,r11,r2
2483	eor	r2,r9,r10
2484	eor	r0,r8,r8,ror#5
2485	add	r4,r4,r12
2486	vld1.32	{q8},[r14,:128]!
2487	and	r2,r2,r8
2488	eor	r12,r0,r8,ror#19
2489	eor	r0,r4,r4,ror#11
2490	eor	r2,r2,r10
2491	vrev32.8	q2,q2
2492	add	r11,r11,r12,ror#6
2493	eor	r12,r4,r5
2494	eor	r0,r0,r4,ror#20
2495	add	r11,r11,r2
2496	vadd.i32	q8,q8,q2
2497	ldr	r2,[sp,#36]
2498	and	r3,r3,r12
2499	add	r7,r7,r11
2500	add	r11,r11,r0,ror#2
2501	eor	r3,r3,r5
2502	add	r10,r10,r2
2503	eor	r2,r8,r9
2504	eor	r0,r7,r7,ror#5
2505	add	r11,r11,r3
2506	and	r2,r2,r7
2507	eor	r3,r0,r7,ror#19
2508	eor	r0,r11,r11,ror#11
2509	eor	r2,r2,r9
2510	add	r10,r10,r3,ror#6
2511	eor	r3,r11,r4
2512	eor	r0,r0,r11,ror#20
2513	add	r10,r10,r2
2514	ldr	r2,[sp,#40]
2515	and	r12,r12,r3
2516	add	r6,r6,r10
2517	add	r10,r10,r0,ror#2
2518	eor	r12,r12,r4
2519	add	r9,r9,r2
2520	eor	r2,r7,r8
2521	eor	r0,r6,r6,ror#5
2522	add	r10,r10,r12
2523	and	r2,r2,r6
2524	eor	r12,r0,r6,ror#19
2525	eor	r0,r10,r10,ror#11
2526	eor	r2,r2,r8
2527	add	r9,r9,r12,ror#6
2528	eor	r12,r10,r11
2529	eor	r0,r0,r10,ror#20
2530	add	r9,r9,r2
2531	ldr	r2,[sp,#44]
2532	and	r3,r3,r12
2533	add	r5,r5,r9
2534	add	r9,r9,r0,ror#2
2535	eor	r3,r3,r11
2536	add	r8,r8,r2
2537	eor	r2,r6,r7
2538	eor	r0,r5,r5,ror#5
2539	add	r9,r9,r3
2540	and	r2,r2,r5
2541	eor	r3,r0,r5,ror#19
2542	eor	r0,r9,r9,ror#11
2543	eor	r2,r2,r7
2544	add	r8,r8,r3,ror#6
2545	eor	r3,r9,r10
2546	eor	r0,r0,r9,ror#20
2547	add	r8,r8,r2
2548	ldr	r2,[sp,#48]
2549	and	r12,r12,r3
2550	add	r4,r4,r8
2551	add	r8,r8,r0,ror#2
2552	eor	r12,r12,r10
2553	vst1.32	{q8},[r1,:128]!
2554	add	r7,r7,r2
2555	eor	r2,r5,r6
2556	eor	r0,r4,r4,ror#5
2557	add	r8,r8,r12
2558	vld1.32	{q8},[r14,:128]!
2559	and	r2,r2,r4
2560	eor	r12,r0,r4,ror#19
2561	eor	r0,r8,r8,ror#11
2562	eor	r2,r2,r6
2563	vrev32.8	q3,q3
2564	add	r7,r7,r12,ror#6
2565	eor	r12,r8,r9
2566	eor	r0,r0,r8,ror#20
2567	add	r7,r7,r2
2568	vadd.i32	q8,q8,q3
2569	ldr	r2,[sp,#52]
2570	and	r3,r3,r12
2571	add	r11,r11,r7
2572	add	r7,r7,r0,ror#2
2573	eor	r3,r3,r9
2574	add	r6,r6,r2
2575	eor	r2,r4,r5
2576	eor	r0,r11,r11,ror#5
2577	add	r7,r7,r3
2578	and	r2,r2,r11
2579	eor	r3,r0,r11,ror#19
2580	eor	r0,r7,r7,ror#11
2581	eor	r2,r2,r5
2582	add	r6,r6,r3,ror#6
2583	eor	r3,r7,r8
2584	eor	r0,r0,r7,ror#20
2585	add	r6,r6,r2
2586	ldr	r2,[sp,#56]
2587	and	r12,r12,r3
2588	add	r10,r10,r6
2589	add	r6,r6,r0,ror#2
2590	eor	r12,r12,r8
2591	add	r5,r5,r2
2592	eor	r2,r11,r4
2593	eor	r0,r10,r10,ror#5
2594	add	r6,r6,r12
2595	and	r2,r2,r10
2596	eor	r12,r0,r10,ror#19
2597	eor	r0,r6,r6,ror#11
2598	eor	r2,r2,r4
2599	add	r5,r5,r12,ror#6
2600	eor	r12,r6,r7
2601	eor	r0,r0,r6,ror#20
2602	add	r5,r5,r2
2603	ldr	r2,[sp,#60]
2604	and	r3,r3,r12
2605	add	r9,r9,r5
2606	add	r5,r5,r0,ror#2
2607	eor	r3,r3,r7
2608	add	r4,r4,r2
2609	eor	r2,r10,r11
2610	eor	r0,r9,r9,ror#5
2611	add	r5,r5,r3
2612	and	r2,r2,r9
2613	eor	r3,r0,r9,ror#19
2614	eor	r0,r5,r5,ror#11
2615	eor	r2,r2,r11
2616	add	r4,r4,r3,ror#6
2617	eor	r3,r5,r6
2618	eor	r0,r0,r5,ror#20
2619	add	r4,r4,r2
2620	ldr	r2,[sp,#64]
2621	and	r12,r12,r3
2622	add	r8,r8,r4
2623	add	r4,r4,r0,ror#2
2624	eor	r12,r12,r6
2625	vst1.32	{q8},[r1,:128]!
2626	ldr	r0,[r2,#0]
2627	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2628	ldr	r12,[r2,#4]
2629	ldr	r3,[r2,#8]
2630	ldr	r1,[r2,#12]
2631	add	r4,r4,r0			@ accumulate
2632	ldr	r0,[r2,#16]
2633	add	r5,r5,r12
2634	ldr	r12,[r2,#20]
2635	add	r6,r6,r3
2636	ldr	r3,[r2,#24]
2637	add	r7,r7,r1
2638	ldr	r1,[r2,#28]
2639	add	r8,r8,r0
2640	str	r4,[r2],#4
2641	add	r9,r9,r12
2642	str	r5,[r2],#4
2643	add	r10,r10,r3
2644	str	r6,[r2],#4
2645	add	r11,r11,r1
2646	str	r7,[r2],#4
2647	stmia	r2,{r8-r11}
2648
2649	ittte	ne
2650	movne	r1,sp
2651	ldrne	r2,[sp,#0]
2652	eorne	r12,r12,r12
2653	ldreq	sp,[sp,#76]			@ restore original sp
2654	itt	ne
2655	eorne	r3,r5,r6
2656	bne	.L_00_48
2657
2658	ldmia	sp!,{r4-r12,pc}
2659.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2660#endif
2661#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2662
2663# ifdef __thumb2__
2664#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2665# else
2666#  define INST(a,b,c,d)	.byte	a,b,c,d
2667# endif
2668
2669.type	sha256_block_data_order_armv8,%function
2670.align	5
2671sha256_block_data_order_armv8:
2672.LARMv8:
2673	vld1.32	{q0,q1},[r0]
2674# ifdef __thumb2__
2675	adr	r3,.LARMv8
2676	sub	r3,r3,#.LARMv8-K256
2677# else
2678	adrl	r3,K256
2679# endif
2680	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2681
2682.Loop_v8:
2683	vld1.8		{q8-q9},[r1]!
2684	vld1.8		{q10-q11},[r1]!
2685	vld1.32		{q12},[r3]!
2686	vrev32.8	q8,q8
2687	vrev32.8	q9,q9
2688	vrev32.8	q10,q10
2689	vrev32.8	q11,q11
2690	vmov		q14,q0	@ offload
2691	vmov		q15,q1
2692	teq		r1,r2
2693	vld1.32		{q13},[r3]!
2694	vadd.i32	q12,q12,q8
2695	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2696	vmov		q2,q0
2697	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2698	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2699	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2700	vld1.32		{q12},[r3]!
2701	vadd.i32	q13,q13,q9
2702	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2703	vmov		q2,q0
2704	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2705	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2706	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2707	vld1.32		{q13},[r3]!
2708	vadd.i32	q12,q12,q10
2709	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2710	vmov		q2,q0
2711	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2712	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2713	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2714	vld1.32		{q12},[r3]!
2715	vadd.i32	q13,q13,q11
2716	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2717	vmov		q2,q0
2718	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2719	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2720	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2721	vld1.32		{q13},[r3]!
2722	vadd.i32	q12,q12,q8
2723	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2724	vmov		q2,q0
2725	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2726	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2727	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2728	vld1.32		{q12},[r3]!
2729	vadd.i32	q13,q13,q9
2730	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2731	vmov		q2,q0
2732	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2733	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2734	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2735	vld1.32		{q13},[r3]!
2736	vadd.i32	q12,q12,q10
2737	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2738	vmov		q2,q0
2739	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2740	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2741	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2742	vld1.32		{q12},[r3]!
2743	vadd.i32	q13,q13,q11
2744	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2745	vmov		q2,q0
2746	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2747	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2748	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2749	vld1.32		{q13},[r3]!
2750	vadd.i32	q12,q12,q8
2751	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2752	vmov		q2,q0
2753	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2754	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2755	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2756	vld1.32		{q12},[r3]!
2757	vadd.i32	q13,q13,q9
2758	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2759	vmov		q2,q0
2760	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2761	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2762	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2763	vld1.32		{q13},[r3]!
2764	vadd.i32	q12,q12,q10
2765	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2766	vmov		q2,q0
2767	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2768	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2769	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2770	vld1.32		{q12},[r3]!
2771	vadd.i32	q13,q13,q11
2772	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2773	vmov		q2,q0
2774	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2775	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2776	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2777	vld1.32		{q13},[r3]!
2778	vadd.i32	q12,q12,q8
2779	vmov		q2,q0
2780	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2781	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2782
2783	vld1.32		{q12},[r3]!
2784	vadd.i32	q13,q13,q9
2785	vmov		q2,q0
2786	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2787	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2788
2789	vld1.32		{q13},[r3]
2790	vadd.i32	q12,q12,q10
2791	sub		r3,r3,#256-16	@ rewind
2792	vmov		q2,q0
2793	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2794	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2795
2796	vadd.i32	q13,q13,q11
2797	vmov		q2,q0
2798	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2799	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2800
2801	vadd.i32	q0,q0,q14
2802	vadd.i32	q1,q1,q15
2803	it		ne
2804	bne		.Loop_v8
2805
2806	vst1.32		{q0,q1},[r0]
2807
2808	bx	lr		@ bx lr
2809.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2810#endif
2811.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
2812.align	2
2813#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2814.comm   OPENSSL_armcap_P,4,4
2815#endif
2816