1#*******************************************************************************
2#*******************************************************************************
3#
4# Copyright (c) Freescale Semiconductor, Inc 2011.
5#
6# FILE NAME      : mmcau_sha1_functions.s
7# VERSION        : $Id: mmcau_sha1_functions.s.rca 1.5 Thu Nov 21 14:17:37 2013 b40907 Experimental $
8# TYPE           : Source Cortex-Mx assembly library code
9# DEPARTMENT     : MSG R&D Core and Platforms
10# AUTHOR         : David Schimke
11# AUTHOR'S EMAIL : David.Schimke@freescale.com
12# -----------------------------------------------------------------------------
13# Release history
14# VERSION  Date       AUTHOR           DESCRIPTION
15#          08-2010    David Schimke    Initial Release
16#          12-2010    David Schimke    Remove "global" on data objects
17#          01-2011    David Schimke    Header added
18#          11-2013    Teejay Ciancio   Cleanup
19#
20#*******************************************************************************
21#*******************************************************************************
22
23    .include "cau2_defines.hdr"
24    .equ    MMCAU_PPB_DIRECT,0xe0081000
25    .equ    MMCAU_PPB_INDIRECT,0xe0081800
26    .equ    MMCAU_1_CMD, 0x80000000
27    .equ    MMCAU_2_CMDS, 0x80100000
28
29    .syntax unified
30
31#*******************************************************************************
32#*******************************************************************************
33#
34# SHA1: Initializes the SHA1 state variables
35#   arguments
36#           *sha1_state pointer to 160-bit block of SHA1 state variables:
37#                           a,b,c,d,e
38#
39#   calling convention
40#   void    mmcau_sha1_initialize_output (const unsigned int *sha1_state)
41
42    .global _mmcau_sha1_initialize_output
43    .global mmcau_sha1_initialize_output
44    .type   mmcau_sha1_initialize_output, %function
45    .align  4
46
47_mmcau_sha1_initialize_output:
48mmcau_sha1_initialize_output:
49
50    stmdb   sp!, {r4-r5}                            @ save registers
51
52    movw    r1, #:lower16:sha1_initial_h            @ r1 -> initial data
53    movt    r1, #:upper16:sha1_initial_h
54
55# copy initial data into hash output buffer
56    ldmia   r1, {r1-r5}                             @ get sha1[0-4]
57    stmia   r0, {r1-r5}                             @ copy to sha1_state[0-4]
58
59    ldmia   sp!, {r4-r5}                            @ restore registers
60    bx      lr
61
62
63#*******************************************************************************
64#*******************************************************************************
65#
66# SHA1: Perform the hash and generate SHA1 state variables for one or more
67#       input message blocks
68#
69#   arguments
70#           *msg_data   pointer to start of input message data
71#           num_blks    number of 512-bit blocks to process
72#           *sha1_state pointer to 160-bit block of SHA1 state variables:
73#                           a,b,c,d,e
74#
75#   NOTE    Input message and digest output blocks must not overlap
76#
77#   calling convention
78#   void    mmcau_sha1_hash_n (const unsigned char *msg_data,
79#                              const int            num_blks,
80#                              unsigned int        *sha1_state)
81
82    .global _mmcau_sha1_hash_n
83    .global mmcau_sha1_hash_n
84    .type   mmcau_sha1_hash_n, %function
85    .align  4
86
87_mmcau_sha1_hash_n:
88mmcau_sha1_hash_n:
89
90#  register allocation
91# --------------------
92#  r0      = scratch / input pointer (arg0)
93#  r1      = scratch / input num_blks (arg1)
94#  r2      = scratch / output pointer (arg2)
95#  r3      = scratch
96#  r4      = scratch
97#  r5      = scratch / mmcau_1_cmd(SHS)
98#  r6      = scratch / mmcau_2_cmds(HASH+HFC,ADRA+CA4)
99#  r7      = scratch
100#  r8      = scratch / mmcau_2_cmds(HASH+HFP,ADRA+CA4)
101#  r9      = scratch / mmcau_2_cmds(HASH+HFM,ADRA+CA4)
102# r10 (sl) = scratch / pointer to sha1_k
103# r11 (fp) = pointer to MMCAU_PPB_DIRECT
104
105    stmdb   sp!, {r4-fp}                            @ save registers on stack
106
107    sub     sp, $384                                @ reserve stack space
108
109    movw    fp, #:lower16:MMCAU_PPB_DIRECT          @ fp -> MMCAU_PPB_DIRECT
110    movt    fp, #:upper16:MMCAU_PPB_DIRECT
111
112    add     r8, fp, $0x800+((LDR+CA0)<<2)           @ r8 = INDIRECT (LDR+CA0)
113    add     r9, sp, $28                             @ r9 -> sha1_state (stack)
114
115# initialize the CAU data registers with the current contents of sha1_state[]
116    ldmia   r2, {r3-r7}                             @ get sha1_state[0-4]
117    stmia   r8, {r3-r7}                             @ load CA0-CA4
118
119    .align  2
120next_blk:
121    stmia   r9, {r3-r7}                             @ copy sha1_state to stack
122
123    ror     r5, r3, $27                             @ rotate CA0 by 5
124    str     r5, [fp, $0x800+((LDR+CAA)<<2)]         @ load into CAA
125
126    movw    r5, #:lower16:MMCAU_1_CMD+(SHS)<<22
127    movw    r6, #:lower16:MMCAU_2_CMDS+(HASH+HFC)<<22+(ADRA+CA4)<<11
128    movw    sl, #:lower16:sha1_k
129    movt    r5, #:upper16:MMCAU_1_CMD+(SHS)<<22
130    movt    r6, #:upper16:MMCAU_2_CMDS+(HASH+HFC)<<22+(ADRA+CA4)<<11
131    movt    sl, #:upper16:sha1_k
132
133# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
134#   for (j = 0; j < 16; j++, k++)
135#   {
136#       w[i] = byterev(msg_data[k]);                           // m[k] -> w[i]
137#       *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(HASH+HFC,ADRA+CA4); // +Ch(b,c,d),+e
138#       *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha1_k[0];        // add k[0]
139#       *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = w[i++];           // add w[i]
140#       *(MMCAU_PPB_DIRECT) = mmcau_1_cmd(SHS);                // shift regs
141#   }
142# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
143
144# -- (loop unrolled)
145
146    ldr     r7, [sl], $4                            @ get k[0]; sl++
147
148    ldr     r3, [r0], $4                            @ r3 = input[0]
149    rev     r4, r3                                  @ byte reverse
150    str     r6, [fp]                                @ +Ch(b,c,d), +e
151    str     r4, [sp, $64]                           @ w[0] = m[0]
152    add     r4, r7                                  @ add k[0] to w[0]
153    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
154    str     r5, [fp]                                @ shift registers
155
156    ldr     r3, [r0], $4                            @ r3 = input[1]
157    rev     r4, r3                                  @ byte reverse
158    str     r6, [fp]                                @ +Ch(b,c,d), +e
159    str     r4, [sp, $68]                           @ w[1] = m[1]
160    add     r4, r7                                  @ add k[0] to w[1]
161    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
162    str     r5, [fp]                                @ shift registers
163
164    ldr     r3, [r0], $4                            @ r3 = input[2]
165    rev     r4, r3                                  @ byte reverse
166    str     r6, [fp]                                @ +Ch(b,c,d), +e
167    str     r4, [sp, $72]                           @ w[2] = m[2]
168    add     r4, r7                                  @ add k[0] to w[2]
169    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
170    str     r5, [fp]                                @ shift registers
171
172    ldr     r3, [r0], $4                            @ r3 = input[3]
173    rev     r4, r3                                  @ byte reverse
174    str     r6, [fp]                                @ +Ch(b,c,d), +e
175    str     r4, [sp, $76]                           @ w[3] = m[3]
176    add     r4, r7                                  @ add k[0] to w[3]
177    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
178    str     r5, [fp]                                @ shift registers
179
180    ldr     r3, [r0], $4                            @ r3 = input[4]
181    rev     r4, r3                                  @ byte reverse
182    str     r6, [fp]                                @ +Ch(b,c,d), +e
183    str     r4, [sp, $80]                           @ w[4] = m[4]
184    add     r4, r7                                  @ add k[0] to w[4]
185    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
186    str     r5, [fp]                                @ shift registers
187
188    ldr     r3, [r0], $4                            @ r3 = input[5]
189    rev     r4, r3                                  @ byte reverse
190    str     r6, [fp]                                @ +Ch(b,c,d), +e
191    str     r4, [sp, $84]                           @ w[5] = m[5]
192    add     r4, r7                                  @ add k[0] to w[5]
193    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
194    str     r5, [fp]                                @ shift registers
195
196    ldr     r3, [r0], $4                            @ r3 = input[6]
197    rev     r4, r3                                  @ byte reverse
198    str     r6, [fp]                                @ +Ch(b,c,d), +e
199    str     r4, [sp, $88]                           @ w[6] = m[6]
200    add     r4, r7                                  @ add k[0] to w[6]
201    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
202    str     r5, [fp]                                @ shift registers
203
204    ldr     r3, [r0], $4                            @ r3 = input[7]
205    rev     r4, r3                                  @ byte reverse
206    str     r6, [fp]                                @ +Ch(b,c,d), +e
207    str     r4, [sp, $92]                           @ w[7] = m[7]
208    add     r4, r7                                  @ add k[0] to w[7]
209    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
210    str     r5, [fp]                                @ shift registers
211
212    ldr     r3, [r0], $4                            @ r3 = input[8]
213    rev     r4, r3                                  @ byte reverse
214    str     r6, [fp]                                @ +Ch(b,c,d), +e
215    str     r4, [sp, $96]                           @ w[8] = m[8]
216    add     r4, r7                                  @ add k[0] to w[8]
217    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
218    str     r5, [fp]                                @ shift registers
219
220    ldr     r3, [r0], $4                            @ r3 = input[9]
221    rev     r4, r3                                  @ byte reverse
222    str     r6, [fp]                                @ +Ch(b,c,d), +e
223    str     r4, [sp, $100]                          @ w[9] = m[9]
224    add     r4, r7                                  @ add k[0] to w[9]
225    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
226    str     r5, [fp]                                @ shift registers
227
228    ldr     r3, [r0], $4                            @ r3 = input[10]
229    rev     r4, r3                                  @ byte reverse
230    str     r6, [fp]                                @ +Ch(b,c,d), +e
231    str     r4, [sp, $104]                          @ w[10] = m[10]
232    add     r4, r7                                  @ add k[0] to w[10]
233    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
234    str     r5, [fp]                                @ shift registers
235
236    ldr     r3, [r0], $4                            @ r3 = input[11]
237    rev     r4, r3                                  @ byte reverse
238    str     r6, [fp]                                @ +Ch(b,c,d), +e
239    str     r4, [sp, $108]                          @ w[11] = m[11]
240    add     r4, r7                                  @ add k[0] to w[11]
241    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
242    str     r5, [fp]                                @ shift registers
243
244    ldr     r3, [r0], $4                            @ r3 = input[12]
245    rev     r4, r3                                  @ byte reverse
246    str     r6, [fp]                                @ +Ch(b,c,d), +e
247    str     r4, [sp, $112]                          @ w[12] = m[12]
248    add     r4, r7                                  @ add k[0] to w[12]
249    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
250    str     r5, [fp]                                @ shift registers
251
252    ldr     r3, [r0], $4                            @ r3 = input[13]
253    rev     r4, r3                                  @ byte reverse
254    str     r6, [fp]                                @ +Ch(b,c,d), +e
255    str     r4, [sp, $116]                          @ w[13] = m[13]
256    add     r4, r7                                  @ add k[0] to w[13]
257    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
258    str     r5, [fp]                                @ shift registers
259
260    ldr     r3, [r0], $4                            @ r3 = input[14]
261    rev     r4, r3                                  @ byte reverse
262    str     r6, [fp]                                @ +Ch(b,c,d), +e
263    str     r4, [sp, $120]                          @ w[14] = m[14]
264    add     r4, r7                                  @ add k[0] to w[14]
265    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
266    str     r5, [fp]                                @ shift registers
267
268    ldr     r3, [r0], $4                            @ r3 = input[15]
269    rev     r4, r3                                  @ byte reverse
270    str     r6, [fp]                                @ +Ch(b,c,d), +e
271    str     r4, [sp, $124]                          @ w[15] = m[15]
272    add     r4, r7                                  @ add k[0] to w[15]
273    str     r4, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
274    str     r5, [fp]                                @ shift registers
275
276# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
277# for (j = 0; j < 4; j++)
278# {
279#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(HASH+HFC,ADRA+CA4); // +Ch(b,c,d), +e
280#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha1_k[0];        // +k[0]
281#   *(MMCAU_PPB_INDIRECT + (LDR+CA5))  = w[i-16];          // ld w[i-16] -> CA5
282#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-14];          // xor w[i-14]
283#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-8];           // xor w[i-8]
284#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-3];           // xor w[i-3]
285#   *(MMCAU_PPB_INDIRECT + (ROTL+CA5)) = 1;                // rotate by 1
286#   w[i++] = *(MMCAU_PPB_INDIRECT + (STR+CA5));            // store w[i]
287#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(ADRA+CA5,SHS);      // +w[i], shift regs
288# }
289# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
290
291# -- (loop unrolled)
292
293    str     r6, [fp]                                @ +Ch(b,c,d), +e
294    ldr     r4, [sp, $64]                           @ r4 = w[0]
295    ldr     r3, [sp, $72]                           @ r3 = w[2]
296    eor     r4, r3                                  @ XOR w[i-14]
297    ldr     r3, [sp, $96]                           @ r3 = w[8]
298    eor     r4, r3                                  @ XOR w[i-8]
299    ldr     r3, [sp, $116]                          @ r3 = w[13]
300    eor     r4, r3                                  @ XOR w[i-3]
301    ror     r3, r4, $31                             @ rotate left by 1
302    str     r3, [sp, $128]                          @ store w[16]
303    add     r3, r7                                  @ add k[0] to w[16]
304    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
305    str     r5, [fp]                                @ shift regs
306
307    str     r6, [fp]                                @ +Ch(b,c,d), +e
308    ldr     r4, [sp, $68]                           @ r4 = w[1]
309    ldr     r3, [sp, $76]                           @ r3 = w[3]
310    eor     r4, r3                                  @ XOR w[i-14]
311    ldr     r3, [sp, $100]                          @ r3 = w[9]
312    eor     r4, r3                                  @ XOR w[i-9]
313    ldr     r3, [sp, $120]                          @ r3 = w[14]
314    eor     r4, r3                                  @ XOR w[i-3]
315    ror     r3, r4, $31                             @ rotate left by 1
316    str     r3, [sp, $132]                          @ store w[17]
317    add     r3, r7                                  @ add k[0] to w[17]
318    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
319    str     r5, [fp]                                @ shift regs
320
321    str     r6, [fp]                                @ +Ch(b,c,d), +e
322    ldr     r4, [sp, $72]                           @ r4 = w[2]
323    ldr     r3, [sp, $80]                           @ r3 = w[4]
324    eor     r4, r3                                  @ XOR w[i-14]
325    ldr     r3, [sp, $104]                          @ r4 = w[10]
326    eor     r4, r3                                  @ XOR w[i-9]
327    ldr     r3, [sp, $124]                          @ r3 = w[15]
328    eor     r4, r3                                  @ XOR w[i-3]
329    ror     r3, r4, $31                             @ rotate left by 1
330    str     r3, [sp, $136]                          @ store w[18]
331    add     r3, r7                                  @ add k[0] to w[18]
332    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
333    str     r5, [fp]                                @ shift regs
334
335    str     r6, [fp]                                @ +Ch(b,c,d), +e
336    ldr     r4, [sp, $76]                           @ r4 = w[3]
337    ldr     r3, [sp, $84]                           @ r3 = w[5]
338    eor     r4, r3                                  @ XOR w[i-14]
339    ldr     r3, [sp, $108]                          @ r4 = w[11]
340    eor     r4, r3                                  @ XOR w[i-9]
341    ldr     r3, [sp, $128]                          @ r3 = w[16]
342    eor     r4, r3                                  @ XOR w[i-3]
343    ror     r3, r4, $31                             @ rotate left by 1
344    str     r3, [sp, $140]                          @ store w[19]
345    add     r3, r7                                  @ add k[0] to w[19]
346    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
347    str     r5, [fp]                                @ shift regs
348
349
350# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
351# for (j = 0; j < 20; j++)
352# {
353#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(HASH+HFP,ADRA+CA4); // +Par(b,c,d), +e
354#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha1_k[1];        // +k[1]
355#   *(MMCAU_PPB_INDIRECT + (LDR+CA5))  = w[i-16];          // ld w[i-16] -> CA5
356#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-14];          // xor w[i-14]
357#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-8];           // xor w[i-8]
358#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-3];           // xor w[i-3]
359#   *(MMCAU_PPB_INDIRECT + (ROTL+CA5)) = 1;                // rotate by 1
360#   w[i++] = *(MMCAU_PPB_INDIRECT + (STR+CA5));            // store w[i]
361#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(ADRA+CA5,SHS);      // +w[i], shift regs
362# }
363# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
364
365    movw    r8, #:lower16:MMCAU_2_CMDS+(HASH+HFP)<<22+(ADRA+CA4)<<11
366    movt    r8, #:upper16:MMCAU_2_CMDS+(HASH+HFP)<<22+(ADRA+CA4)<<11
367    ldr     r7, [sl], $4                            @ get k[1]; sl++
368
369# -- (loop unrolled)
370
371    str     r8, [fp]                                @ +Par(b,c,d), +e
372    ldr     r4, [sp, $80]                           @ r4 = w[i-16]
373    ldr     r3, [sp, $88]                           @ r3 = w[i-14]
374    eor     r4, r3                                  @ XOR w[i-14]
375    ldr     r3, [sp, $112]                          @ r3 = w[i-8]
376    eor     r4, r3                                  @ XOR w[i-8]
377    ldr     r3, [sp, $132]                          @ r4 = w[i-3]
378    eor     r4, r3                                  @ XOR w[i-3]
379    ror     r3, r4, $31                             @ rotate left by 1
380    str     r3, [sp, $144]                          @ store w[20]
381    add     r3, r7                                  @ add k[1] to w[20]
382    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
383    str     r5, [fp]                                @ shift regs
384
385    str     r8, [fp]                                @ +Par(b,c,d), +e
386    ldr     r4, [sp, $84]                           @ r4 = w[i-16]
387    ldr     r3, [sp, $92]                           @ r3 = w[i-14]
388    eor     r4, r3                                  @ XOR w[i-14]
389    ldr     r3, [sp, $116]                          @ r3 = w[i-8]
390    eor     r4, r3                                  @ XOR w[i-8]
391    ldr     r3, [sp, $136]                          @ r3 = w[i-3]
392    eor     r4, r3                                  @ XOR w[i-3]
393    ror     r3, r4, $31                             @ rotate left by 1
394    str     r3, [sp, $148]                          @ store w[21]
395    add     r3, r7                                  @ add k[1] to w[21]
396    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
397    str     r5, [fp]                                @ shift regs
398
399    str     r8, [fp]                                @ +Par(b,c,d), +e
400    ldr     r4, [sp, $88]                           @ r4 = w[i-16]
401    ldr     r3, [sp, $96]                           @ r3 = w[i-14]
402    eor     r4, r3                                  @ XOR w[i-14]
403    ldr     r3, [sp, $120]                          @ r3 = w[i-8]
404    eor     r4, r3                                  @ XOR w[i-8]
405    ldr     r3, [sp, $140]                          @ r3 = w[i-3]
406    eor     r4, r3                                  @ XOR w[i-3]
407    ror     r3, r4, $31                             @ rotate left by 1
408    str     r3, [sp, $152]                          @ store w[22]
409    add     r3, r7                                  @ add k[1] to w[22]
410    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
411    str     r5, [fp]                                @ shift regs
412
413    str     r8, [fp]                                @ +Par(b,c,d), +e
414    ldr     r4, [sp, $92]                           @ r4 = w[i-16]
415    ldr     r3, [sp, $100]                          @ r3 = w[i-14]
416    eor     r4, r3                                  @ XOR w[i-14]
417    ldr     r3, [sp, $124]                          @ r3 = w[i-8]
418    eor     r4, r3                                  @ XOR w[i-8]
419    ldr     r3, [sp, $144]                          @ r3 = w[i-3]
420    eor     r4, r3                                  @ XOR w[i-3]
421    ror     r3, r4, $31                             @ rotate left by 1
422    str     r3, [sp, $156]                          @ store w[23]
423    add     r3, r7                                  @ add k[1] to w[23]
424    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
425    str     r5, [fp]                                @ shift regs
426
427    str     r8, [fp]                                @ +Par(b,c,d), +e
428    ldr     r4, [sp, $96]                           @ r4 = w[i-16]
429    ldr     r3, [sp, $104]                          @ r3 = w[i-14]
430    eor     r4, r3                                  @ XOR w[i-14]
431    ldr     r3, [sp, $128]                          @ r3 = w[i-8]
432    eor     r4, r3                                  @ XOR w[i-8]
433    ldr     r3, [sp, $148]                          @ r3 = w[i-3]
434    eor     r4, r3                                  @ XOR w[i-3]
435    ror     r3, r4, $31                             @ rotate left by 1
436    str     r3, [sp, $160]                          @ store w[24]
437    add     r3, r7                                  @ add k[1] to w[24]
438    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
439    str     r5, [fp]                                @ shift regs
440
441    str     r8, [fp]                                @ +Par(b,c,d), +e
442    ldr     r4, [sp, $100]                          @ r4 = w[i-16]
443    ldr     r3, [sp, $108]                          @ r3 = w[i-14]
444    eor     r4, r3                                  @ XOR w[i-14]
445    ldr     r3, [sp, $132]                          @ r3 = w[i-8]
446    eor     r4, r3                                  @ XOR w[i-8]
447    ldr     r3, [sp, $152]                          @ r3 = w[i-3]
448    eor     r4, r3                                  @ XOR w[i-3]
449    ror     r3, r4, $31                             @ rotate left by 1
450    str     r3, [sp, $164]                          @ store w[25]
451    add     r3, r7                                  @ add k[1] to w[25]
452    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
453    str     r5, [fp]                                @ shift regs
454
455    str     r8, [fp]                                @ +Par(b,c,d), +e
456    ldr     r4, [sp, $104]                          @ r4 = w[i-16]
457    ldr     r3, [sp, $112]                          @ r3 = w[i-14]
458    eor     r4, r3                                  @ XOR w[i-14]
459    ldr     r3, [sp, $136]                          @ r3 = w[i-8]
460    eor     r4, r3                                  @ XOR w[i-8]
461    ldr     r3, [sp, $156]                          @ r3 = w[i-3]
462    eor     r4, r3                                  @ XOR w[i-3]
463    ror     r3, r4, $31                             @ rotate left by 1
464    str     r3, [sp, $168]                          @ store w[26]
465    add     r3, r7                                  @ add k[1] to w[26]
466    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
467    str     r5, [fp]                                @ shift regs
468
469    str     r8, [fp]                                @ +Par(b,c,d), +e
470    ldr     r4, [sp, $108]                          @ r4 = w[i-16]
471    ldr     r3, [sp, $116]                          @ r3 = w[i-14]
472    eor     r4, r3                                  @ XOR w[i-14]
473    ldr     r3, [sp, $140]                          @ r3 = w[i-8]
474    eor     r4, r3                                  @ XOR w[i-8]
475    ldr     r3, [sp, $160]                          @ r3 = w[i-3]
476    eor     r4, r3                                  @ XOR w[i-3]
477    ror     r3, r4, $31                             @ rotate left by 1
478    str     r3, [sp, $172]                          @ store w[27]
479    add     r3, r7                                  @ add k[1] to w[27]
480    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
481    str     r5, [fp]                                @ shift regs
482
483    str     r8, [fp]                                @ +Par(b,c,d), +e
484    ldr     r4, [sp, $112]                          @ r4 = w[i-16]
485    ldr     r3, [sp, $120]                          @ r3 = w[i-14]
486    eor     r4, r3                                  @ XOR w[i-14]
487    ldr     r3, [sp, $144]                          @ r3 = w[i-8]
488    eor     r4, r3                                  @ XOR w[i-8]
489    ldr     r3, [sp, $164]                          @ r3 = w[i-3]
490    eor     r4, r3                                  @ XOR w[i-3]
491    ror     r3, r4, $31                             @ rotate left by 1
492    str     r3, [sp, $176]                          @ store w[28]
493    add     r3, r7                                  @ add k[1] to w[28]
494    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
495    str     r5, [fp]                                @ shift regs
496
497    str     r8, [fp]                                @ +Par(b,c,d), +e
498    ldr     r4, [sp, $116]                          @ r4 = w[i-16]
499    ldr     r3, [sp, $124]                          @ r3 = w[i-14]
500    eor     r4, r3                                  @ XOR w[i-14]
501    ldr     r3, [sp, $148]                          @ r3 = w[i-8]
502    eor     r4, r3                                  @ XOR w[i-8]
503    ldr     r3, [sp, $168]                          @ r3 = w[i-3]
504    eor     r4, r3                                  @ XOR w[i-3]
505    ror     r3, r4, $31                             @ rotate left by 1
506    str     r3, [sp, $180]                          @ store w[29]
507    add     r3, r7                                  @ add k[1] to w[29]
508    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
509    str     r5, [fp]                                @ shift regs
510
511    str     r8, [fp]                                @ +Par(b,c,d), +e
512    ldr     r4, [sp, $120]                          @ r4 = w[i-16]
513    ldr     r3, [sp, $128]                          @ r3 = w[i-14]
514    eor     r4, r3                                  @ XOR w[i-14]
515    ldr     r3, [sp, $152]                          @ r3 = w[i-8]
516    eor     r4, r3                                  @ XOR w[i-8]
517    ldr     r3, [sp, $172]                          @ r3 = w[i-3]
518    eor     r4, r3                                  @ XOR w[i-3]
519    ror     r3, r4, $31                             @ rotate left by 1
520    str     r3, [sp, $184]                          @ store w[30]
521    add     r3, r7                                  @ add k[1] to w[30]
522    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
523    str     r5, [fp]                                @ shift regs
524
525    str     r8, [fp]                                @ +Par(b,c,d), +e
526    ldr     r4, [sp, $124]                          @ r4 = w[i-16]
527    ldr     r3, [sp, $132]                          @ r3 = w[i-14]
528    eor     r4, r3                                  @ XOR w[i-14]
529    ldr     r3, [sp, $156]                          @ r3 = w[i-8]
530    eor     r4, r3                                  @ XOR w[i-8]
531    ldr     r3, [sp, $176]                          @ r3 = w[i-3]
532    eor     r4, r3                                  @ XOR w[i-3]
533    ror     r3, r4, $31                             @ rotate left by 1
534    str     r3, [sp, $188]                          @ store w[31]
535    add     r3, r7                                  @ add k[1] to w[31]
536    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
537    str     r5, [fp]                                @ shift regs
538
539    str     r8, [fp]                                @ +Par(b,c,d), +e
540    ldr     r4, [sp, $128]                          @ r4 = w[i-16]
541    ldr     r3, [sp, $136]                          @ r3 = w[i-14]
542    eor     r4, r3                                  @ XOR w[i-14]
543    ldr     r3, [sp, $160]                          @ r3 = w[i-8]
544    eor     r4, r3                                  @ XOR w[i-8]
545    ldr     r3, [sp, $180]                          @ r3 = w[i-3]
546    eor     r4, r3                                  @ XOR w[i-3]
547    ror     r3, r4, $31                             @ rotate left by 1
548    str     r3, [sp, $192]                          @ store w[32]
549    add     r3, r7                                  @ add k[1] to w[32]
550    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
551    str     r5, [fp]                                @ shift regs
552
553    str     r8, [fp]                                @ +Par(b,c,d), +e
554    ldr     r4, [sp, $132]                          @ r4 = w[i-16]
555    ldr     r3, [sp, $140]                          @ r3 = w[i-14]
556    eor     r4, r3                                  @ XOR w[i-14]
557    ldr     r3, [sp, $164]                          @ r3 = w[i-8]
558    eor     r4, r3                                  @ XOR w[i-8]
559    ldr     r3, [sp, $184]                          @ r3 = w[i-3]
560    eor     r4, r3                                  @ XOR w[i-3]
561    ror     r3, r4, $31                             @ rotate left by 1
562    str     r3, [sp, $196]                          @ store w[33]
563    add     r3, r7                                  @ add k[1] to w[33]
564    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
565    str     r5, [fp]                                @ shift regs
566
567    str     r8, [fp]                                @ +Par(b,c,d), +e
568    ldr     r4, [sp, $136]                          @ r4 = w[i-16]
569    ldr     r3, [sp, $144]                          @ r3 = w[i-14]
570    eor     r4, r3                                  @ XOR w[i-14]
571    ldr     r3, [sp, $168]                          @ r3 = w[i-8]
572    eor     r4, r3                                  @ XOR w[i-8]
573    ldr     r3, [sp, $188]                          @ r3 = w[i-3]
574    eor     r4, r3                                  @ XOR w[i-3]
575    ror     r3, r4, $31                             @ rotate left by 1
576    str     r3, [sp, $200]                          @ store w[34]
577    add     r3, r7                                  @ add k[1] to w[34]
578    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
579    str     r5, [fp]                                @ shift regs
580
581    str     r8, [fp]                                @ +Par(b,c,d), +e
582    ldr     r4, [sp, $140]                          @ r4 = w[i-16]
583    ldr     r3, [sp, $148]                          @ r3 = w[i-14]
584    eor     r4, r3                                  @ XOR w[i-14]
585    ldr     r3, [sp, $172]                          @ r3 = w[i-8]
586    eor     r4, r3                                  @ XOR w[i-8]
587    ldr     r3, [sp, $192]                          @ r3 = w[i-3]
588    eor     r4, r3                                  @ XOR w[i-3]
589    ror     r3, r4, $31                             @ rotate left by 1
590    str     r3, [sp, $204]                          @ store w[35]
591    add     r3, r7                                  @ add k[1] to w[35]
592    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
593    str     r5, [fp]                                @ shift regs
594
595    str     r8, [fp]                                @ +Par(b,c,d), +e
596    ldr     r4, [sp, $144]                          @ r4 = w[i-16]
597    ldr     r3, [sp, $152]                          @ r3 = w[i-14]
598    eor     r4, r3                                  @ XOR w[i-14]
599    ldr     r3, [sp, $176]                          @ r3 = w[i-8]
600    eor     r4, r3                                  @ XOR w[i-8]
601    ldr     r3, [sp, $196]                          @ r3 = w[i-3]
602    eor     r4, r3                                  @ XOR w[i-3]
603    ror     r3, r4, $31                             @ rotate left by 1
604    str     r3, [sp, $208]                          @ store w[36]
605    add     r3, r7                                  @ add k[1] to w[36]
606    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
607    str     r5, [fp]                                @ shift regs
608
609    str     r8, [fp]                                @ +Par(b,c,d), +e
610    ldr     r4, [sp, $148]                          @ r4 = w[i-16]
611    ldr     r3, [sp, $156]                          @ r3 = w[i-14]
612    eor     r4, r3                                  @ XOR w[i-14]
613    ldr     r3, [sp, $180]                          @ r3 = w[i-8]
614    eor     r4, r3                                  @ XOR w[i-8]
615    ldr     r3, [sp, $200]                          @ r3 = w[i-3]
616    eor     r4, r3                                  @ XOR w[i-3]
617    ror     r3, r4, $31                             @ rotate left by 1
618    str     r3, [sp, $212]                          @ store w[37]
619    add     r3, r7                                  @ add k[1] to w[37]
620    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
621    str     r5, [fp]                                @ shift regs
622
623    str     r8, [fp]                                @ +Par(b,c,d), +e
624    ldr     r4, [sp, $152]                          @ r4 = w[i-16]
625    ldr     r3, [sp, $160]                          @ r3 = w[i-14]
626    eor     r4, r3                                  @ XOR w[i-14]
627    ldr     r3, [sp, $184]                          @ r3 = w[i-8]
628    eor     r4, r3                                  @ XOR w[i-8]
629    ldr     r3, [sp, $204]                          @ r3 = w[i-3]
630    eor     r4, r3                                  @ XOR w[i-3]
631    ror     r3, r4, $31                             @ rotate left by 1
632    str     r3, [sp, $216]                          @ store w[38]
633    add     r3, r7                                  @ add k[1] to w[38]
634    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
635    str     r5, [fp]                                @ shift regs
636
637    str     r8, [fp]                                @ +Par(b,c,d), +e
638    ldr     r4, [sp, $156]                          @ r4 = w[i-16]
639    ldr     r3, [sp, $164]                          @ r3 = w[i-14]
640    eor     r4, r3                                  @ XOR w[i-14]
641    ldr     r3, [sp, $188]                          @ r3 = w[i-8]
642    eor     r4, r3                                  @ XOR w[i-8]
643    ldr     r3, [sp, $208]                          @ r3 = w[i-3]
644    eor     r4, r3                                  @ XOR w[i-3]
645    ror     r3, r4, $31                             @ rotate left by 1
646    str     r3, [sp, $220]                          @ store w[39]
647    add     r3, r7                                  @ add k[1] to w[39]
648    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
649    str     r5, [fp]                                @ shift regs
650
651
652# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
653# for (j = 0; j < 20; j++)
654# {
655#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(HASH+HFM,ADRA+CA4); // +Maj(b,c,d), +e
656#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha1_k[2];        // +k[2]
657#   *(MMCAU_PPB_INDIRECT + (LDR+CA5))  = w[i-16];          // ld w[i-16] -> CA5
658#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-14];          // xor w[i-14]
659#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-8];           // xor w[i-8]
660#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-3];           // xor w[i-3]
661#   *(MMCAU_PPB_INDIRECT + (ROTL+CA5)) = 1;                // rotate by 1
662#   w[i++] = *(MMCAU_PPB_INDIRECT + (STR+CA5));            // store w[i]
663#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(ADRA+CA5,SHS);      // +w[i], shift regs
664# }
665# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
666
667    movw    r9, #:lower16:MMCAU_2_CMDS+(HASH+HFM)<<22+(ADRA+CA4)<<11
668    movt    r9, #:upper16:MMCAU_2_CMDS+(HASH+HFM)<<22+(ADRA+CA4)<<11
669    ldr     r7, [sl], $4                            @ get k[2]; sl++
670
671# -- (loop unrolled)
672
673    str     r9, [fp]                                @ +Maj(b,c,d), +e
674    ldr     r4, [sp, $160]                          @ r4 = w[i-16]
675    ldr     r3, [sp, $168]                          @ r3 = w[i-14]
676    eor     r4, r3                                  @ XOR w[i-14]
677    ldr     r3, [sp, $192]                          @ r3 = w[i-8]
678    eor     r4, r3                                  @ XOR w[i-8]
679    ldr     r3, [sp, $212]                          @ r3 = w[i-3]
680    eor     r4, r3                                  @ XOR w[i-3]
681    ror     r3, r4, $31                             @ rotate left by 1
682    str     r3, [sp, $224]                          @ store w[40]
683    add     r3, r7                                  @ add k[2] to w[40]
684    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
685    str     r5, [fp]                                @ shift regs
686
687    str     r9, [fp]                                @ +Maj(b,c,d), +e
688    ldr     r4, [sp, $164]                          @ r4 = w[i-16]
689    ldr     r3, [sp, $172]                          @ r3 = w[i-14]
690    eor     r4, r3                                  @ XOR w[i-14]
691    ldr     r3, [sp, $196]                          @ r3 = w[i-8]
692    eor     r4, r3                                  @ XOR w[i-8]
693    ldr     r3, [sp, $216]                          @ r3 = w[i-3]
694    eor     r4, r3                                  @ XOR w[i-3]
695    ror     r3, r4, $31                             @ rotate left by 1
696    str     r3, [sp, $228]                          @ store w[41]
697    add     r3, r7                                  @ add k[2] to w[41]
698    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
699    str     r5, [fp]                                @ shift regs
700
701    str     r9, [fp]                                @ +Maj(b,c,d), +e
702    ldr     r4, [sp, $168]                          @ r4 = w[i-16]
703    ldr     r3, [sp, $176]                          @ r3 = w[i-14]
704    eor     r4, r3                                  @ XOR w[i-14]
705    ldr     r3, [sp, $200]                          @ r3 = w[i-8]
706    eor     r4, r3                                  @ XOR w[i-8]
707    ldr     r3, [sp, $220]                          @ r3 = w[i-3]
708    eor     r4, r3                                  @ XOR w[i-3]
709    ror     r3, r4, $31                             @ rotate left by 1
710    str     r3, [sp, $232]                          @ store w[42]
711    add     r3, r7                                  @ add k[2] to w[42]
712    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
713    str     r5, [fp]                                @ shift regs
714
715    str     r9, [fp]                                @ +Maj(b,c,d), +e
716    ldr     r4, [sp, $172]                          @ r4 = w[i-16]
717    ldr     r3, [sp, $180]                          @ r3 = w[i-14]
718    eor     r4, r3                                  @ XOR w[i-14]
719    ldr     r3, [sp, $204]                          @ r3 = w[i-8]
720    eor     r4, r3                                  @ XOR w[i-8]
721    ldr     r3, [sp, $224]                          @ r3 = w[i-3]
722    eor     r4, r3                                  @ XOR w[i-3]
723    ror     r3, r4, $31                             @ rotate left by 1
724    str     r3, [sp, $236]                          @ store w[43]
725    add     r3, r7                                  @ add k[2] to w[43]
726    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
727    str     r5, [fp]                                @ shift regs
728
729    str     r9, [fp]                                @ +Maj(b,c,d), +e
730    ldr     r4, [sp, $176]                          @ r4 = w[i-16]
731    ldr     r3, [sp, $184]                          @ r3 = w[i-14]
732    eor     r4, r3                                  @ XOR w[i-14]
733    ldr     r3, [sp, $208]                          @ r3 = w[i-8]
734    eor     r4, r3                                  @ XOR w[i-8]
735    ldr     r3, [sp, $228]                          @ r3 = w[i-3]
736    eor     r4, r3                                  @ XOR w[i-3]
737    ror     r3, r4, $31                             @ rotate left by 1
738    str     r3, [sp, $240]                          @ store w[44]
739    add     r3, r7                                  @ add k[2] to w[44]
740    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
741    str     r5, [fp]                                @ shift regs
742
743    str     r9, [fp]                                @ +Maj(b,c,d), +e
744    ldr     r4, [sp, $180]                          @ r4 = w[i-16]
745    ldr     r3, [sp, $188]                          @ r3 = w[i-14]
746    eor     r4, r3                                  @ XOR w[i-14]
747    ldr     r3, [sp, $212]                          @ r3 = w[i-8]
748    eor     r4, r3                                  @ XOR w[i-8]
749    ldr     r3, [sp, $232]                          @ r3 = w[i-3]
750    eor     r4, r3                                  @ XOR w[i-3]
751    ror     r3, r4, $31                             @ rotate left by 1
752    str     r3, [sp, $244]                          @ store w[45]
753    add     r3, r7                                  @ add k[2] to w[45]
754    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
755    str     r5, [fp]                                @ shift regs
756
757    str     r9, [fp]                                @ +Maj(b,c,d), +e
758    ldr     r4, [sp, $184]                          @ r4 = w[i-16]
759    ldr     r3, [sp, $192]                          @ r3 = w[i-14]
760    eor     r4, r3                                  @ XOR w[i-14]
761    ldr     r3, [sp, $216]                          @ r3 = w[i-8]
762    eor     r4, r3                                  @ XOR w[i-8]
763    ldr     r3, [sp, $236]                          @ r3 = w[i-3]
764    eor     r4, r3                                  @ XOR w[i-3]
765    ror     r3, r4, $31                             @ rotate left by 1
766    str     r3, [sp, $248]                          @ store w[46]
767    add     r3, r7                                  @ add k[2] to w[46]
768    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
769    str     r5, [fp]                                @ shift regs
770
771    str     r9, [fp]                                @ +Maj(b,c,d), +e
772    ldr     r4, [sp, $188]                          @ r4 = w[i-16]
773    ldr     r3, [sp, $196]                          @ r3 = w[i-14]
774    eor     r4, r3                                  @ XOR w[i-14]
775    ldr     r3, [sp, $220]                          @ r3 = w[i-8]
776    eor     r4, r3                                  @ XOR w[i-8]
777    ldr     r3, [sp, $240]                          @ r3 = w[i-3]
778    eor     r4, r3                                  @ XOR w[i-3]
779    ror     r3, r4, $31                             @ rotate left by 1
780    str     r3, [sp, $252]                          @ store w[47]
781    add     r3, r7                                  @ add k[2] to w[47]
782    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
783    str     r5, [fp]                                @ shift regs
784
785    str     r9, [fp]                                @ +Maj(b,c,d), +e
786    ldr     r4, [sp, $192]                          @ r4 = w[i-16]
787    ldr     r3, [sp, $200]                          @ r3 = w[i-14]
788    eor     r4, r3                                  @ XOR w[i-14]
789    ldr     r3, [sp, $224]                          @ r3 = w[i-8]
790    eor     r4, r3                                  @ XOR w[i-8]
791    ldr     r3, [sp, $244]                          @ r3 = w[i-3]
792    eor     r4, r3                                  @ XOR w[i-3]
793    ror     r3, r4, $31                             @ rotate left by 1
794    str     r3, [sp, $256]                          @ store w[48]
795    add     r3, r7                                  @ add k[2] to w[48]
796    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
797    str     r5, [fp]                                @ shift regs
798
799    str     r9, [fp]                                @ +Maj(b,c,d), +e
800    ldr     r4, [sp, $196]                          @ r4 = w[i-16]
801    ldr     r3, [sp, $204]                          @ r3 = w[i-14]
802    eor     r4, r3                                  @ XOR w[i-14]
803    ldr     r3, [sp, $228]                          @ r3 = w[i-8]
804    eor     r4, r3                                  @ XOR w[i-8]
805    ldr     r3, [sp, $248]                          @ r3 = w[i-3]
806    eor     r4, r3                                  @ XOR w[i-3]
807    ror     r3, r4, $31                             @ rotate left by 1
808    str     r3, [sp, $260]                          @ store w[49]
809    add     r3, r7                                  @ add k[2] to w[49]
810    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
811    str     r5, [fp]                                @ shift regs
812
813    str     r9, [fp]                                @ +Maj(b,c,d), +e
814    ldr     r4, [sp, $200]                          @ r4 = w[i-16]
815    ldr     r3, [sp, $208]                          @ r3 = w[i-14]
816    eor     r4, r3                                  @ XOR w[i-14]
817    ldr     r3, [sp, $232]                          @ r3 = w[i-8]
818    eor     r4, r3                                  @ XOR w[i-8]
819    ldr     r3, [sp, $252]                          @ r3 = w[i-3]
820    eor     r4, r3                                  @ XOR w[i-3]
821    ror     r3, r4, $31                             @ rotate left by 1
822    str     r3, [sp, $264]                          @ store w[50]
823    add     r3, r7                                  @ add k[2] to w[50]
824    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add w[50] to CAA
825    str     r5, [fp]                                @ shift regs
826
827    str     r9, [fp]                                @ +Maj(b,c,d), +e
828    ldr     r4, [sp, $204]                          @ r4 = w[i-16]
829    ldr     r3, [sp, $212]                          @ r3 = w[i-14]
830    eor     r4, r3                                  @ XOR w[i-14]
831    ldr     r3, [sp, $236]                          @ r3 = w[i-8]
832    eor     r4, r3                                  @ XOR w[i-8]
833    ldr     r3, [sp, $256]                          @ r3 = w[i-3]
834    eor     r4, r3                                  @ XOR w[i-3]
835    ror     r3, r4, $31                             @ rotate left by 1
836    str     r3, [sp, $268]                          @ store w[51]
837    add     r3, r7                                  @ add k[2] to w[51]
838    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
839    str     r5, [fp]                                @ shift regs
840
841    str     r9, [fp]                                @ +Maj(b,c,d), +e
842    ldr     r4, [sp, $208]                          @ r4 = w[i-16]
843    ldr     r3, [sp, $216]                          @ r3 = w[i-14]
844    eor     r4, r3                                  @ XOR w[i-14]
845    ldr     r3, [sp, $240]                          @ r3 = w[i-8]
846    eor     r4, r3                                  @ XOR w[i-8]
847    ldr     r3, [sp, $260]                          @ r3 = w[i-3]
848    eor     r4, r3                                  @ XOR w[i-3]
849    ror     r3, r4, $31                             @ rotate left by 1
850    str     r3, [sp, $272]                          @ store w[52]
851    add     r3, r7                                  @ add k[2] to w[52]
852    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
853    str     r5, [fp]                                @ shift regs
854
855    str     r9, [fp]                                @ +Maj(b,c,d), +e
856    ldr     r4, [sp, $212]                          @ r4 = w[i-16]
857    ldr     r3, [sp, $220]                          @ r3 = w[i-14]
858    eor     r4, r3                                  @ XOR w[i-14]
859    ldr     r3, [sp, $244]                          @ r3 = w[i-8]
860    eor     r4, r3                                  @ XOR w[i-8]
861    ldr     r3, [sp, $264]                          @ r3 = w[i-3]
862    eor     r4, r3                                  @ XOR w[i-3]
863    ror     r3, r4, $31                             @ rotate left by 1
864    str     r3, [sp, $276]                          @ store w[53]
865    add     r3, r7                                  @ add k[2] to w[53]
866    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
867    str     r5, [fp]                                @ shift regs
868
869    str     r9, [fp]                                @ +Maj(b,c,d), +e
870    ldr     r4, [sp, $216]                          @ r4 = w[i-16]
871    ldr     r3, [sp, $224]                          @ r3 = w[i-14]
872    eor     r4, r3                                  @ XOR w[i-14]
873    ldr     r3, [sp, $248]                          @ r3 = w[i-8]
874    eor     r4, r3                                  @ XOR w[i-8]
875    ldr     r3, [sp, $268]                          @ r3 = w[i-3]
876    eor     r4, r3                                  @ XOR w[i-3]
877    ror     r3, r4, $31                             @ rotate left by 1
878    str     r3, [sp, $280]                          @ store w[54]
879    add     r3, r7                                  @ add k[2] to w[54]
880    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
881    str     r5, [fp]                                @ shift regs
882
883    str     r9, [fp]                                @ +Maj(b,c,d), +e
884    ldr     r4, [sp, $220]                          @ r4 = w[i-16]
885    ldr     r3, [sp, $228]                          @ r3 = w[i-14]
886    eor     r4, r3                                  @ XOR w[i-14]
887    ldr     r3, [sp, $252]                          @ r3 = w[i-8]
888    eor     r4, r3                                  @ XOR w[i-8]
889    ldr     r3, [sp, $272]                          @ r3 = w[i-3]
890    eor     r4, r3                                  @ XOR w[i-3]
891    ror     r3, r4, $31                             @ rotate left by 1
892    str     r3, [sp, $284]                          @ store w[55]
893    add     r3, r7                                  @ add k[2] to w[55]
894    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
895    str     r5, [fp]                                @ shift regs
896
897    str     r9, [fp]                                @ +Maj(b,c,d), +e
898    ldr     r4, [sp, $224]                          @ r4 = w[i-16]
899    ldr     r3, [sp, $232]                          @ r3 = w[i-14]
900    eor     r4, r3                                  @ XOR w[i-14]
901    ldr     r3, [sp, $256]                          @ r3 = w[i-8]
902    eor     r4, r3                                  @ XOR w[i-8]
903    ldr     r3, [sp, $276]                          @ r3 = w[i-3]
904    eor     r4, r3                                  @ XOR w[i-3]
905    ror     r3, r4, $31                             @ rotate left by 1
906    str     r3, [sp, $288]                          @ store w[56]
907    add     r3, r7                                  @ add k[2] to w[56]
908    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
909    str     r5, [fp]                                @ shift regs
910
911    str     r9, [fp]                                @ +Maj(b,c,d), +e
912    ldr     r4, [sp, $228]                          @ r4 = w[i-16]
913    ldr     r3, [sp, $236]                          @ r3 = w[i-14]
914    eor     r4, r3                                  @ XOR w[i-14]
915    ldr     r3, [sp, $260]                          @ r3 = w[i-8]
916    eor     r4, r3                                  @ XOR w[i-8]
917    ldr     r3, [sp, $280]                          @ r3 = w[i-3]
918    eor     r4, r3                                  @ XOR w[i-3]
919    ror     r3, r4, $31                             @ rotate left by 1
920    str     r3, [sp, $292]                          @ store w[57]
921    add     r3, r7                                  @ add k[2] to w[57]
922    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
923    str     r5, [fp]                                @ shift regs
924
925    str     r9, [fp]                                @ +Maj(b,c,d), +e
926    ldr     r4, [sp, $232]                          @ r4 = w[i-16]
927    ldr     r3, [sp, $240]                          @ r3 = w[i-14]
928    eor     r4, r3                                  @ XOR w[i-14]
929    ldr     r3, [sp, $264]                          @ r3 = w[i-8]
930    eor     r4, r3                                  @ XOR w[i-8]
931    ldr     r3, [sp, $284]                          @ r3 = w[i-3]
932    eor     r4, r3                                  @ XOR w[i-3]
933    ror     r3, r4, $31                             @ rotate left by 1
934    str     r3, [sp, $296]                          @ store w[58]
935    add     r3, r7                                  @ add k[2] to w[58]
936    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
937    str     r5, [fp]                                @ shift regs
938
939    str     r9, [fp]                                @ +Maj(b,c,d), +e
940    ldr     r4, [sp, $236]                          @ r4 = w[i-16]
941    ldr     r3, [sp, $244]                          @ r3 = w[i-14]
942    eor     r4, r3                                  @ XOR w[i-14]
943    ldr     r3, [sp, $268]                          @ r3 = w[i-8]
944    eor     r4, r3                                  @ XOR w[i-8]
945    ldr     r3, [sp, $288]                          @ r3 = w[i-3]
946    eor     r4, r3                                  @ XOR w[i-3]
947    ror     r3, r4, $31                             @ rotate left by 1
948    str     r3, [sp, $300]                          @ store w[59]
949    add     r3, r7                                  @ add k[2] to w[59]
950    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
951    str     r5, [fp]                                @ shift regs
952
953
954# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
955# for (j = 0; j < 20; j++)
956# {
957#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(HASH+HFP,ADRA+CA4); // +Par(b,c,d), +e
958#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha1_k[3];        // +k[3]
959#   *(MMCAU_PPB_INDIRECT + (LDR+CA5))  = w[i-16];          // ld w[i-16] -> CA5
960#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-14];          // xor w[i-14]
961#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-8];           // xor w[i-8]
962#   *(MMCAU_PPB_INDIRECT + (XOR+CA5))  = w[i-3];           // xor w[i-3]
963#   *(MMCAU_PPB_INDIRECT + (ROTL+CA5)) = 1;                // rotate by 1
964#   w[i++] = *(MMCAU_PPB_INDIRECT + (STR+CA5));            // store w[i]
965#   *(MMCAU_PPB_DIRECT) = mmcau_2_cmds(ADRA+CA5,SHS);      // +w[i], shift regs
966# }
967# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
968
969    ldr     r7, [sl]                                @ get k[3]
970
971# -- (loop unrolled)
972
973    str     r8, [fp]                                @ +Par(b,c,d), +e
974    ldr     r4, [sp, $240]                          @ r4 = w[i-16]
975    ldr     r3, [sp, $248]                          @ r3 = w[i-14]
976    eor     r4, r3                                  @ XOR w[i-14]
977    ldr     r3, [sp, $272]                          @ r3 = w[i-8]
978    eor     r4, r3                                  @ XOR w[i-8]
979    ldr     r3, [sp, $292]                          @ r3 = w[i-3]
980    eor     r4, r3                                  @ XOR w[i-3]
981    ror     r3, r4, $31                             @ rotate left by 1
982    str     r3, [sp, $304]                          @ store w[60]
983    add     r3, r7                                  @ add k[3] to w[60]
984    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
985    str     r5, [fp]                                @ shift regs
986
987    str     r8, [fp]                                @ +Par(b,c,d), +e
988    ldr     r4, [sp, $244]                          @ r4 = w[i-16]
989    ldr     r3, [sp, $252]                          @ r3 = w[i-14]
990    eor     r4, r3                                  @ XOR w[i-14]
991    ldr     r3, [sp, $276]                          @ r3 = w[i-8]
992    eor     r4, r3                                  @ XOR w[i-8]
993    ldr     r3, [sp, $296]                          @ r3 = w[i-3]
994    eor     r4, r3                                  @ XOR w[i-3]
995    ror     r3, r4, $31                             @ rotate left by 1
996    str     r3, [sp, $308]                          @ store w[61]
997    add     r3, r7                                  @ add k[3] to w[61]
998    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
999    str     r5, [fp]                                @ shift regs
1000
1001    str     r8, [fp]                                @ +Par(b,c,d), +e
1002    ldr     r4, [sp, $248]                          @ r4 = w[i-16]
1003    ldr     r3, [sp, $256]                          @ r3 = w[i-14]
1004    eor     r4, r3                                  @ XOR w[i-14]
1005    ldr     r3, [sp, $280]                          @ r3 = w[i-8]
1006    eor     r4, r3                                  @ XOR w[i-8]
1007    ldr     r3, [sp, $300]                          @ r3 = w[i-3]
1008    eor     r4, r3                                  @ XOR w[i-3]
1009    ror     r3, r4, $31                             @ rotate left by 1
1010    str     r3, [sp, $312]                          @ store w[62]
1011    add     r3, r7                                  @ add k[3] to w[62]
1012    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1013    str     r5, [fp]                                @ shift regs
1014
1015    str     r8, [fp]                                @ +Par(b,c,d), +e
1016    ldr     r4, [sp, $252]                          @ r4 = w[i-16]
1017    ldr     r3, [sp, $260]                          @ r3 = w[i-14]
1018    eor     r4, r3                                  @ XOR w[i-14]
1019    ldr     r3, [sp, $284]                          @ r3 = w[i-8]
1020    eor     r4, r3                                  @ XOR w[i-8]
1021    ldr     r3, [sp, $304]                          @ r3 = w[i-3]
1022    eor     r4, r3                                  @ XOR w[i-3]
1023    ror     r3, r4, $31                             @ rotate left by 1
1024    str     r3, [sp, $316]                          @ store w[63]
1025    add     r3, r7                                  @ add k[3] to w[63]
1026    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1027    str     r5, [fp]                                @ shift regs
1028
1029    str     r8, [fp]                                @ +Par(b,c,d), +e
1030    ldr     r4, [sp, $256]                          @ r4 = w[i-16]
1031    ldr     r3, [sp, $264]                          @ r3 = w[i-14]
1032    eor     r4, r3                                  @ XOR w[i-14]
1033    ldr     r3, [sp, $288]                          @ r3 = w[i-8]
1034    eor     r4, r3                                  @ XOR w[i-8]
1035    ldr     r3, [sp, $308]                          @ r3 = w[i-3]
1036    eor     r4, r3                                  @ XOR w[i-3]
1037    ror     r3, r4, $31                             @ rotate left by 1
1038    str     r3, [sp, $320]                          @ store w[64]
1039    add     r3, r7                                  @ add k[3] to w[64]
1040    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1041    str     r5, [fp]                                @ shift regs
1042
1043    str     r8, [fp]                                @ +Par(b,c,d), +e
1044    ldr     r4, [sp, $260]                          @ r4 = w[i-16]
1045    ldr     r3, [sp, $268]                          @ r3 = w[i-14]
1046    eor     r4, r3                                  @ XOR w[i-14]
1047    ldr     r3, [sp, $292]                          @ r3 = w[i-8]
1048    eor     r4, r3                                  @ XOR w[i-8]
1049    ldr     r3, [sp, $312]                          @ r3 = w[i-3]
1050    eor     r4, r3                                  @ XOR w[i-3]
1051    ror     r3, r4, $31                             @ rotate left by 1
1052    str     r3, [sp, $324]                          @ store w[65]
1053    add     r3, r7                                  @ add k[3] to w[65]
1054    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1055    str     r5, [fp]                                @ shift regs
1056
1057    str     r8, [fp]                                @ +Par(b,c,d), +e
1058    ldr     r4, [sp, $264]                          @ r4 = w[i-16]
1059    ldr     r3, [sp, $272]                          @ r3 = w[i-14]
1060    eor     r4, r3                                  @ XOR w[i-14]
1061    ldr     r3, [sp, $296]                          @ r3 = w[i-8]
1062    eor     r4, r3                                  @ XOR w[i-8]
1063    ldr     r3, [sp, $316]                          @ r3 = w[i-3]
1064    eor     r4, r3                                  @ XOR w[i-3]
1065    ror     r3, r4, $31                             @ rotate left by 1
1066    str     r3, [sp, $328]                          @ store w[66]
1067    add     r3, r7                                  @ add k[3] to w[66]
1068    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1069    str     r5, [fp]                                @ shift regs
1070
1071    str     r8, [fp]                                @ +Par(b,c,d), +e
1072    ldr     r4, [sp, $268]                          @ r4 = w[i-16]
1073    ldr     r3, [sp, $276]                          @ r3 = w[i-14]
1074    eor     r4, r3                                  @ XOR w[i-14]
1075    ldr     r3, [sp, $300]                          @ r3 = w[i-8]
1076    eor     r4, r3                                  @ XOR w[i-8]
1077    ldr     r3, [sp, $320]                          @ r3 = w[i-3]
1078    eor     r4, r3                                  @ XOR w[i-3]
1079    ror     r3, r4, $31                             @ rotate left by 1
1080    str     r3, [sp, $332]                          @ store w[67]
1081    add     r3, r7                                  @ add k[3] to w[67]
1082    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1083    str     r5, [fp]                                @ shift regs
1084
1085    str     r8, [fp]                                @ +Par(b,c,d), +e
1086    ldr     r4, [sp, $272]                          @ r4 = w[i-16]
1087    ldr     r3, [sp, $280]                          @ r3 = w[i-14]
1088    eor     r4, r3                                  @ XOR w[i-14]
1089    ldr     r3, [sp, $304]                          @ r3 = w[i-8]
1090    eor     r4, r3                                  @ XOR w[i-8]
1091    ldr     r3, [sp, $324]                          @ r3 = w[i-3]
1092    eor     r4, r3                                  @ XOR w[i-3]
1093    ror     r3, r4, $31                             @ rotate left by 1
1094    str     r3, [sp, $336]                          @ store w[68]
1095    add     r3, r7                                  @ add k[3] to w[68]
1096    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1097    str     r5, [fp]                                @ shift regs
1098
1099    str     r8, [fp]                                @ +Par(b,c,d), +e
1100    ldr     r4, [sp, $276]                          @ r4 = w[i-16]
1101    ldr     r3, [sp, $284]                          @ r3 = w[i-14]
1102    eor     r4, r3                                  @ XOR w[i-14]
1103    ldr     r3, [sp, $308]                          @ r3 = w[i-8]
1104    eor     r4, r3                                  @ XOR w[i-8]
1105    ldr     r3, [sp, $328]                          @ r3 = w[i-3]
1106    eor     r4, r3                                  @ XOR w[i-3]
1107    ror     r3, r4, $31                             @ rotate left by 1
1108    str     r3, [sp, $340]                          @ store w[69]
1109    add     r3, r7                                  @ add k[3] to w[69]
1110    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1111    str     r5, [fp]                                @ shift regs
1112
1113    str     r8, [fp]                                @ +Par(b,c,d), +e
1114    ldr     r4, [sp, $280]                          @ r4 = w[i-16]
1115    ldr     r3, [sp, $288]                          @ r3 = w[i-14]
1116    eor     r4, r3                                  @ XOR w[i-14]
1117    ldr     r3, [sp, $312]                          @ r3 = w[i-8]
1118    eor     r4, r3                                  @ XOR w[i-8]
1119    ldr     r3, [sp, $332]                          @ r3 = w[i-3]
1120    eor     r4, r3                                  @ XOR w[i-3]
1121    ror     r3, r4, $31                             @ rotate left by 1
1122    str     r3, [sp, $344]                          @ store w[70]
1123    add     r3, r7                                  @ add k[3] to w[70]
1124    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1125    str     r5, [fp]                                @ shift regs
1126
1127    str     r8, [fp]                                @ +Par(b,c,d), +e
1128    ldr     r4, [sp, $284]                          @ r4 = w[i-16]
1129    ldr     r3, [sp, $292]                          @ r3 = w[i-14]
1130    eor     r4, r3                                  @ XOR w[i-14]
1131    ldr     r3, [sp, $316]                          @ r3 = w[i-8]
1132    eor     r4, r3                                  @ XOR w[i-8]
1133    ldr     r3, [sp, $336]                          @ r3 = w[i-3]
1134    eor     r4, r3                                  @ XOR w[i-3]
1135    ror     r3, r4, $31                             @ rotate left by 1
1136    str     r3, [sp, $348]                          @ store w[71]
1137    add     r3, r7                                  @ add k[3] to w[71]
1138    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1139    str     r5, [fp]                                @ shift regs
1140
1141    str     r8, [fp]                                @ +Par(b,c,d), +e
1142    ldr     r4, [sp, $288]                          @ r4 = w[i-16]
1143    ldr     r3, [sp, $296]                          @ r3 = w[i-14]
1144    eor     r4, r3                                  @ XOR w[i-14]
1145    ldr     r3, [sp, $320]                          @ r3 = w[i-8]
1146    eor     r4, r3                                  @ XOR w[i-8]
1147    ldr     r3, [sp, $340]                          @ r3 = w[i-3]
1148    eor     r4, r3                                  @ XOR w[i-3]
1149    ror     r3, r4, $31                             @ rotate left by 1
1150    str     r3, [sp, $352]                          @ store w[72]
1151    add     r3, r7                                  @ add k[3] to w[72]
1152    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1153    str     r5, [fp]                                @ shift regs
1154
1155    str     r8, [fp]                                @ +Par(b,c,d), +e
1156    ldr     r4, [sp, $292]                          @ r4 = w[i-16]
1157    ldr     r3, [sp, $300]                          @ r3 = w[i-14]
1158    eor     r4, r3                                  @ XOR w[i-14]
1159    ldr     r3, [sp, $324]                          @ r3 = w[i-8]
1160    eor     r4, r3                                  @ XOR w[i-8]
1161    ldr     r3, [sp, $344]                          @ r3 = w[i-3]
1162    eor     r4, r3                                  @ XOR w[i-3]
1163    ror     r3, r4, $31                             @ rotate left by 1
1164    str     r3, [sp, $356]                          @ store w[73]
1165    add     r3, r7                                  @ add k[3] to w[73]
1166    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1167    str     r5, [fp]                                @ shift regs
1168
1169    str     r8, [fp]                                @ +Par(b,c,d), +e
1170    ldr     r4, [sp, $296]                          @ r4 = w[i-16]
1171    ldr     r3, [sp, $304]                          @ r3 = w[i-14]
1172    eor     r4, r3                                  @ XOR w[i-14]
1173    ldr     r3, [sp, $328]                          @ r3 = w[i-8]
1174    eor     r4, r3                                  @ XOR w[i-8]
1175    ldr     r3, [sp, $348]                          @ r3 = w[i-3]
1176    eor     r4, r3                                  @ XOR w[i-3]
1177    ror     r3, r4, $31                             @ rotate left by 1
1178    str     r3, [sp, $360]                          @ store w[74]
1179    add     r3, r7                                  @ add k[3] to w[74]
1180    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1181    str     r5, [fp]                                @ shift regs
1182
1183    str     r8, [fp]                                @ +Par(b,c,d), +e
1184    ldr     r4, [sp, $300]                          @ r4 = w[i-16]
1185    ldr     r3, [sp, $308]                          @ r3 = w[i-14]
1186    eor     r4, r3                                  @ XOR w[i-14]
1187    ldr     r3, [sp, $332]                          @ r3 = w[i-8]
1188    eor     r4, r3                                  @ XOR w[i-8]
1189    ldr     r3, [sp, $352]                          @ r3 = w[i-3]
1190    eor     r4, r3                                  @ XOR w[i-3]
1191    ror     r3, r4, $31                             @ rotate left by 1
1192    str     r3, [sp, $364]                          @ store w[75]
1193    add     r3, r7                                  @ add k[3] to w[75]
1194    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1195    str     r5, [fp]                                @ shift regs
1196
1197    str     r8, [fp]                                @ +Par(b,c,d), +e
1198    ldr     r4, [sp, $304]                          @ r4 = w[i-16]
1199    ldr     r3, [sp, $312]                          @ r3 = w[i-14]
1200    eor     r4, r3                                  @ XOR w[i-14]
1201    ldr     r3, [sp, $336]                          @ r3 = w[i-8]
1202    eor     r4, r3                                  @ XOR w[i-8]
1203    ldr     r3, [sp, $356]                          @ r3 = w[i-3]
1204    eor     r4, r3                                  @ XOR w[i-3]
1205    ror     r3, r4, $31                             @ rotate left by 1
1206    str     r3, [sp, $368]                          @ store w[76]
1207    add     r3, r7                                  @ add k[3] to w[76]
1208    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1209    str     r5, [fp]                                @ shift regs
1210
1211    str     r8, [fp]                                @ +Par(b,c,d), +e
1212    ldr     r4, [sp, $308]                          @ r4 = w[i-16]
1213    ldr     r3, [sp, $316]                          @ r3 = w[i-14]
1214    eor     r4, r3                                  @ XOR w[i-14]
1215    ldr     r3, [sp, $340]                          @ r3 = w[i-8]
1216    eor     r4, r3                                  @ XOR w[i-8]
1217    ldr     r3, [sp, $360]                          @ r3 = w[i-3]
1218    eor     r4, r3                                  @ XOR w[i-3]
1219    ror     r3, r4, $31                             @ rotate left by 1
1220    str     r3, [sp, $372]                          @ store w[77]
1221    add     r3, r7                                  @ add k[3] to w[77]
1222    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1223    str     r5, [fp]                                @ shift regs
1224
1225    str     r8, [fp]                                @ +Par(b,c,d), +e
1226    ldr     r4, [sp, $312]                          @ r4 = w[i-16]
1227    ldr     r3, [sp, $320]                          @ r3 = w[i-14]
1228    eor     r4, r3                                  @ XOR w[i-14]
1229    ldr     r3, [sp, $344]                          @ r3 = w[i-8]
1230    eor     r4, r3                                  @ XOR w[i-8]
1231    ldr     r3, [sp, $364]                          @ r3 = w[i-3]
1232    eor     r4, r3                                  @ XOR w[i-3]
1233    ror     r3, r4, $31                             @ rotate left by 1
1234    str     r3, [sp, $376]                          @ store w[78]
1235    add     r3, r7                                  @ add k[3] to w[78]
1236    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1237    str     r5, [fp]                                @ shift regs
1238
1239    str     r8, [fp]                                @ +Par(b,c,d), +e
1240    ldr     r4, [sp, $316]                          @ r4 = w[i-16]
1241    ldr     r3, [sp, $324]                          @ r3 = w[i-14]
1242    eor     r4, r3                                  @ XOR w[i-14]
1243    ldr     r3, [sp, $348]                          @ r3 = w[i-8]
1244    eor     r4, r3                                  @ XOR w[i-8]
1245    ldr     r3, [sp, $368]                          @ r3 = w[i-3]
1246    eor     r4, r3                                  @ XOR w[i-3]
1247    ror     r3, r4, $31                             @ rotate left by 1
1248    str     r3, [sp, $380]                          @ store w[79]
1249    add     r3, r7                                  @ add k[3] to w[79]
1250    str     r3, [fp, $0x800+((ADR+CAA)<<2)]         @ add sum to CAA
1251    str     r5, [fp]                                @ shift regs
1252
1253    add     r9, sp, $28                             @ r9 -> output[0] on stack
1254    add     r8, fp, $0x800+((ADR+CA0)<<2)           @ r8 = indirect_cmd ADR+CA0
1255    add     sl, fp, $0x800+((STR+CA0)<<2)           @ sl = indirect_cmd STR+CA0
1256
1257    ldmia   r9, {r3-r7}                             @ get current outputs
1258    stmia   r8, {r3-r7}                             @ add output[i] to CA[4:0]
1259    ldmia   sl, {r3-r7}                             @ get CA[4:0]
1260
1261    subs    r1, $1                                  @ decrement num_blks
1262    bne     next_blk
1263
1264    add     sp, $384                                @ unreserve stack space
1265    stmia   r2, {r3-r7}                             @ store CA[i] to output[i]
1266    ldmia   sp!, {r4-fp}                            @ restore regs and return
1267    bx      lr
1268
1269#*******************************************************************************
1270#*******************************************************************************
1271#
1272# SHA1: Updates SHA1 state variables for one or more input message blocks
1273#   arguments
1274#           *msg_data   pointer to start of input message data
1275#           num_blks    number of 512-bit blocks to process
1276#           *sha1_state pointer to 160-bit block of SHA1 state variables:
1277#                           a,b,c,d,e
1278#
1279#   calling convention
1280#   void    mmcau_sha1_update (const unsigned char *msg_data,
1281#                              const int            num_blks,
1282#                              unsigned int        *sha1_state)
1283
1284
1285    .global _mmcau_sha1_update
1286    .global mmcau_sha1_update
1287    .type   mmcau_sha1_update, %function
1288    .align  4
1289
1290_mmcau_sha1_update:
1291mmcau_sha1_update:
1292
1293    stmdb   sp!, {r3-r7, lr}                        @ save registers on stack
1294
1295    movw    r3, #:lower16:sha1_initial_h            @ r3 -> initial data
1296    movt    r3, #:upper16:sha1_initial_h
1297
1298# copy initial data into hash output buffer
1299    ldmia   r3, {r3-r7}                             @ get initial sha1[0-4]
1300    stmia   r2, {r3-r7}                             @ copy to sha1_state[0-4]
1301
1302    bl      mmcau_sha1_hash_n                       @ call hash_n routine
1303
1304    ldmia   sp!, {r3-r7, pc}                        @ restore regs and return
1305
1306#*******************************************************************************
1307#*******************************************************************************
1308#
1309# SHA1: Perform the hash and generate SHA1 state variables for one input
1310#       message block.
1311#
1312#   arguments
1313#           *msg_data   pointer to start of input message data
1314#           *sha1_state pointer to 160-bit block of SHA1 state variables:
1315#                           a,b,c,d,e
1316#
1317#   NOTE    Input message and digest output blocks must not overlap
1318#
1319#   calling convention
1320#   void    mmcau_sha1_hash (const unsigned char *msg_data,
1321#                            unsigned int        *sha1_state)
1322
1323    .global _mmcau_sha1_hash
1324    .global mmcau_sha1_hash
1325    .type   mmcau_sha1_hash, %function
1326    .align  4
1327
1328_mmcau_sha1_hash:
1329mmcau_sha1_hash:
1330
1331    mov    r2, r1                                   @ arg2 = arg1 (*sha1_state)
1332    mov    r1, $1                                   @ arg1 = num_blks = 1
1333    b      mmcau_sha1_hash_n                        @ branch to hash_n routine
1334
1335#*******************************************************************************
1336
1337    .data
1338    .type   sha1_initial_h, %object
1339    .align  4
1340
1341sha1_initial_h:
1342    .word   0x67452301
1343    .word   0xefcdab89
1344    .word   0x98badcfe
1345    .word   0x10325476
1346    .word   0xc3d2e1f0
1347
1348    .type   sha1_k, %object
1349    .align  4
1350
1351sha1_k:
1352    .word   0x5a827999
1353    .word   0x6ed9eba1
1354    .word   0x8f1bbcdc
1355    .word   0xca62c1d6
1356