1# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2#
3# Copyright (c) Freescale Semiconductor, Inc 2013.
4#
5# FILE NAME         : mmcau_sha256_functions.s
6# VERSION           : $Id:  $
7# TYPE              : Source Cortex-M0+ assembly library code
8# DEPARTMENT        : MCG R&D Core and Platforms
9# AUTHOR            : Anthony (Teejay) Ciancio
10# AUTHOR EMAIL      : teejay.ciancio@freescale.com
11#
12#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
13#
14# VERSION   DATE        AUTHOR          DESCRIPTION
15# *******   ****        ******          ***********
16# 1.0       2013-11     Ciancio         initial release, using the ARMv6-M ISA
17#
18# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
19
20
21    .include "cau2_defines.hdr"
22    .syntax unified
23
24
25    .equ      MMCAU_PPB_DIRECT, 0xf0005000
26    .equ    MMCAU_PPB_INDIRECT, 0xf0005800
27    .equ           MMCAU_1_CMD, 0x80000000
28    .equ          MMCAU_3_CMDS, 0x80100200
29
30
31# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
32#
33#   MMCAU_SHA256_INITIALIZE_OUTPUT
34#   Initializes the hash output and checks the CAU hardware revision
35#
36#   ARGUMENTS
37#   *output         pointer to 256-bit message digest output
38#
39#   CALLING CONVENTION
40#   int mmcau_sha256_initialize_output (const unsigned int *output)
41#
42#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
43#
44#    REGISTER  |  ALLOCATION (at the start of mmcau_sha256_initialize_output)
45#   -----------+------------------------------------------------------------
46#          r0  |  *output       (arg0)
47#              |
48#        > r0  |  irrelevant
49#
50# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
51
52    .global _mmcau_sha256_initialize_output
53    .global mmcau_sha256_initialize_output
54    .type   mmcau_sha256_initialize_output, %function
55    .align  4
56
57_mmcau_sha256_initialize_output:
58mmcau_sha256_initialize_output:
59
60# store regs r4-r7, we need to restore them at the end of the routine
61    push    {r4-r7}                         @ store regs
62
63    ldr     r3, =sha256_initial_h
64    ldmia   r3!, {r4-r7}                    @ load sha256_initial_h[0-3]
65#   stmia   r0!, {r4-r7}                    @ store in output[0-3]
66    str     r4, [r0, #0<<2]                 @ expand stmia into str to be interruptible
67    str     r5, [r0, #1<<2]
68    str     r6, [r0, #2<<2]
69    str     r7, [r0, #3<<2]
70    adds    r0, #1<<4
71
72    ldmia   r3!, {r4-r7}                    @ load sha256_initial_h[4-7]
73#   stmia   r0!, {r4-r7}                    @ store in output[4-7]
74    str     r4, [r0, #0<<2]                 @ expand stmia into str to be interruptible
75    str     r5, [r0, #1<<2]
76    str     r6, [r0, #2<<2]
77    str     r7, [r0, #3<<2]
78    adds    r0, #1<<4
79
80    movs    r0, #0                          @ clear the return value
81    pop     {r4-r7}                         @ restore regs
82    bx      lr                              @ exit routine
83
84
85# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
86#
87#   MMCAU_SHA256_HASH_N
88#   Perform the hash for one or more input message blocks and generate the
89#   message digest output
90#
91#   ARGUMENTS
92#   *input          pointer to start of input message data
93#   num_blks        number of 512-bit blocks to process
94#   *output         pointer to 256-bit message digest
95#
96#   NOTE
97#   Input message and digest output blocks must not overlap
98#
99#   CALLING CONVENTION
100#   void mmcau_sha256_hash_n   (const unsigned char     *input,
101#                               const int               num_blks,
102#                               unsigned int            *output)
103#
104#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
105#
106#    REGISTER  |  ALLOCATION (at the start of mmcau_sha256_hash_n)
107#   -----------+------------------------------------------------------------
108#          r0  |  *input        (arg0)
109#          r1  |  num_blks      (arg1)
110#          r2  |  *output       (arg2)
111#
112#        > r2  |  irrelevant
113#
114#
115#       STACK  |  ALLOCATION (throughout mmcau_sha256_hash_n)
116#   -----------+------------------------------------------------------------
117#        #268  |  *output
118#        #264  |  num_blks
119#        #260  |  *input
120#        #256  |  mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2C)
121#    #64-#252  |  w[i] in loop
122#      #0-#60  |  w[0-15] in next_blk
123#
124# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
125
126    .global _mmcau_sha256_hash_n
127    .global mmcau_sha256_hash_n
128    .type   mmcau_sha256_hash_n, %function
129    .align  4
130
131_mmcau_sha256_hash_n:
132mmcau_sha256_hash_n:
133
134# store *input, num_blks, and *output, we need them later in the routine
135# store regs r4-r10, we need to restore them at the end of the routine
136    push    {r4-r7, lr}     @ store low regs and link reg
137    mov     r3, r8
138    mov     r4, r9
139    mov     r5, sl
140    mov     r6, fp
141    mov     r7, ip
142    push    {r0-r2, r3-r7}  @ store *input, num_blks, *output, high regs
143
144    sub     sp, #260        @ reserve stack
145
146# initialize the CAU data regs with the current contents of output[0-7]
147    ldr     r1, =MMCAU_PPB_INDIRECT+((LDR+CA0)<<2)
148    ldmia   r2!, {r4-r7}                    @ load output[0-3]
149#   stmia   r1!, {r4-r7}                    @ store in CA[0-3]
150    str     r4, [r1, #0<<2]                 @ expand stmia into str to be interruptible
151    str     r5, [r1, #1<<2]
152    str     r6, [r1, #2<<2]
153    str     r7, [r1, #3<<2]
154    adds    r1, #4<<2
155
156    ldmia   r2!, {r4-r7}                    @ load output[4-7]
157#   stmia   r1!, {r4-r7}                    @ store in CA[4-7]
158    str     r4, [r1, #0<<2]                 @ expand stmia into str to be interruptible
159    str     r5, [r1, #1<<2]
160    str     r6, [r1, #2<<2]
161    str     r7, [r1, #3<<2]
162    adds    r1, #4<<2
163
164# prepare for next_blk
165    ldr     r1, =sha256_reg_data+3<<2       @ get *sha256_reg_data[3]
166#   ldmia   r1, {r1-r7}                     @ load sha256_reg_data[3-9]
167    adds    r1, #1<<2                       @ move r1 by 4 bytes
168    ldmia   r1!, {r2-r7}                    @ load sha256_reg_data[4-9] and move r1 by 24 bytes
169    subs    r1, #7<<2                       @ move r1 back by 28 bytes
170    ldr     r1, [r1]                        @ load sha256_reg_data[3]
171
172    mov     r9, r5                          @ store mmcau_indirect_cmd(LDR+CAA)
173    mov     sl, r6                          @ store mmcau_indirect_cmd(ADR+CAA)
174    mov     fp, r7                          @ store mmcau_indirect_cmd(STR+CAA)
175    ldr     r5, =MMCAU_PPB_DIRECT
176
177
178    .align  2
179next_blk:
180
181#   i = 0;
182#       for (j = 0; j < 16; j++, i++)
183#       {
184#           w[i] = byterev(input[i]);                   // copy m[i] to w[i]
185#           *(MMCAU_PPB_INDIRECT + (LDR+CAA)) = w[i];   // +w[i]+h+SIGMA1(e)
186#                                                       // add Ch(e,f,g)
187#           *(MMCAU_PPB_DIRECT) = mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2C);
188#                                                       // +k[i]+t1+SIGMA0(e)
189#           *(MMCAU_PPB_INDIRECT + (ADR+CAA)) = sha256_k[i];
190#                                                       // add Maj(a,b,c)
191#           *(MMCAU_PPB_DIRECT) = mmcau_3_cmds(MVAR+CA8,HASH+HF2S,HASH+HF2M);
192#           *(MMCAU_PPB_DIRECT) = mmcau_1_cmd(SHS2);    // shift regs
193#       }
194#
195#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
196#
197#    REGISTER  |  ALLOCATION (throughout next_blk)
198#   -----------+------------------------------------------------------------
199#          r0  |  *input
200#          r1  |  mmcau_1_cmd(SHS2)
201#          r2  |  mmcau_3_cmds(MVAR+CA8,HASH+HF2S,HASH+HF2M)
202#          r3  |  mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2C)
203#          r4  |  *sha256_k
204#          r5  |  *mmcau_direct_cmd()
205#          r6  |  scratch
206#          r7  |  scratch
207#          r8  |  not used
208#          r9  |  mmcau_indirect_cmd(LDR+CAA)
209#    (sl) r10  |  mmcau_indirect_cmd(ADR+CAA)
210#    (fp) r11  |  mmcau_indirect_cmd(STR+CAA)
211#    (ip) r12  |  mmcau_1_cmd(SHS2)
212#    (sp) r13  |  stack pointer
213#    (lr) r14  |  mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2M)
214
215    ldmia   r0!, {r7}                       @ m[0], *input++
216    rev     r7, r7                          @ w[0]
217    str     r7, [sp, #0<<2]                 @ store w[0]
218    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
219    str     r7, [r6]                        @ add w[i]
220    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
221    ldmia   r4!, {r7}                       @ k[0], *sha256_k++
222    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
223    str     r7, [r6]                        @ add k[0]
224    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
225    str     r1, [r5]                        @ shift registers
226
227    ldmia   r0!, {r7}                       @ m[1], *input++
228    rev     r7, r7                          @ w[1]
229    str     r7, [sp, #1<<2]                 @ store w[1]
230    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
231    str     r7, [r6]                        @ add w[1]
232    ldmia   r4!, {r7}                       @ k[1], *sha256_k++
233    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
234    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
235    str     r7, [r6]                        @ add k[1]
236    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
237    str     r1, [r5]                        @ shift registers
238
239    ldmia   r0!, {r7}                       @ m[2], *input++
240    rev     r7, r7                          @ w[2]
241    str     r7, [sp, #2<<2]                 @ store w[2]
242    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
243    str     r7, [r6]                        @ add w[2]
244    ldmia   r4!, {r7}                       @ k[2], *sha256_k++
245    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
246    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
247    str     r7, [r6]                        @ add k[2]
248    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
249    str     r1, [r5]                        @ shift registers
250
251    ldmia   r0!, {r7}                       @ m[3], *input++
252    rev     r7, r7                          @ w[3]
253    str     r7, [sp, #3<<2]                 @ store w[3]
254    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
255    str     r7, [r6]                        @ add w[3]
256    ldmia   r4!, {r7}                       @ k[3], *sha256_k++
257    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
258    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
259    str     r7, [r6]                        @ add k[3]
260    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
261    str     r1, [r5]                        @ shift registers
262
263    ldmia   r0!, {r7}                       @ m[4], *input++
264    rev     r7, r7                          @ w[4]
265    str     r7, [sp, #4<<2]                 @ store w[4]
266    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
267    str     r7, [r6]                        @ add w[4]
268    ldmia   r4!, {r7}                       @ k[4], *sha256_k++
269    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
270    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
271    str     r7, [r6]                        @ add k[4]
272    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
273    str     r1, [r5]                        @ shift registers
274
275    ldmia   r0!, {r7}                       @ m[5], *input++
276    rev     r7, r7                          @ w[5]
277    str     r7, [sp, #5<<2]                 @ store w[5]
278    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
279    str     r7, [r6]                        @ add w[5]
280    ldmia   r4!, {r7}                       @ k[5], *sha256_k++
281    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
282    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
283    str     r7, [r6]                        @ add k[5]
284    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
285    str     r1, [r5]                        @ shift registers
286
287    ldmia   r0!, {r7}                       @ m[6], *input++
288    rev     r7, r7                          @ w[6]
289    str     r7, [sp, #6<<2]                 @ store w[6]
290    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
291    str     r7, [r6]                        @ add w[6]
292    ldmia   r4!, {r7}                       @ k[6], *sha256_k++
293    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
294    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
295    str     r7, [r6]                        @ add k[6]
296    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
297    str     r1, [r5]                        @ shift registers
298
299    ldmia   r0!, {r7}                       @ m[7], *input++
300    rev     r7, r7                          @ w[7]
301    str     r7, [sp, #7<<2]                 @ store w[7]
302    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
303    str     r7, [r6]                        @ add w[7]
304    ldmia   r4!, {r7}                       @ k[7], *sha256_k++
305    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
306    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
307    str     r7, [r6]                        @ add k[7]
308    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
309    str     r1, [r5]                        @ shift registers
310
311    ldmia   r0!, {r7}                       @ m[8], *input++
312    rev     r7, r7                          @ w[8]
313    str     r7, [sp, #8<<2]                 @ store w[8]
314    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
315    str     r7, [r6]                        @ add w[8]
316    ldmia   r4!, {r7}                       @ k[8], *sha256_k++
317    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
318    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
319    str     r7, [r6]                        @ add k[8]
320    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
321    str     r1, [r5]                        @ shift registers
322
323    ldmia   r0!, {r7}                       @ m[9], *input++
324    rev     r7, r7                          @ w[9]
325    str     r7, [sp, #9<<2]                 @ store w[9]
326    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
327    str     r7, [r6]                        @ add w[9]
328    ldmia   r4!, {r7}                       @ k[9], *sha256_k++
329    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
330    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
331    str     r7, [r6]                        @ add k[9]
332    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
333    str     r1, [r5]                        @ shift registers
334
335    ldmia   r0!, {r7}                       @ m[10], *input++
336    rev     r7, r7                          @ w[10]
337    str     r7, [sp, #10<<2]                @ store w[10]
338    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
339    str     r7, [r6]                        @ add w[10]
340    ldmia   r4!, {r7}                       @ k[10], *sha256_k++
341    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
342    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
343    str     r7, [r6]                        @ add k[10]
344    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
345    str     r1, [r5]                        @ shift registers
346
347    ldmia   r0!, {r7}                       @ m[11], *input++
348    rev     r7, r7                          @ w[11]
349    str     r7, [sp, #11<<2]                @ store w[11]
350    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
351    str     r7, [r6]                        @ add w[11]
352    ldmia   r4!, {r7}                       @ k[11], *sha256_k++
353    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
354    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
355    str     r7, [r6]                        @ add k[11]
356    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
357    str     r1, [r5]                        @ shift registers
358
359    ldmia   r0!, {r7}                       @ m[12], *input++
360    rev     r7, r7                          @ w[12]
361    str     r7, [sp, #12<<2]                @ store w[12]
362    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
363    str     r7, [r6]                        @ add w[12]
364    ldmia   r4!, {r7}                       @ k[12], *sha256_k++
365    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
366    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
367    str     r7, [r6]                        @ add k[12]
368    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
369    str     r1, [r5]                        @ shift registers
370
371    ldmia   r0!, {r7}                       @ m[13], *input++
372    rev     r7, r7                          @ w[13]
373    str     r7, [sp, #13<<2]                @ store w[13]
374    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
375    str     r7, [r6]                        @ add w[13]
376    ldmia   r4!, {r7}                       @ k[13], *sha256_k++
377    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
378    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
379    str     r7, [r6]                        @ add k[13]
380    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
381    str     r1, [r5]                        @ shift registers
382
383    ldmia   r0!, {r7}                       @ m[14], *input++
384    rev     r7, r7                          @ w[14]
385    str     r7, [sp, #14<<2]                @ store w[14]
386    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
387    str     r7, [r6]                        @ add w[14]
388    ldmia   r4!, {r7}                       @ k[14], *sha256_k++
389    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
390    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
391    str     r7, [r6]                        @ add k[14]
392    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
393    str     r1, [r5]                        @ shift registers
394
395    ldmia   r0!, {r7}                       @ m[15], *input++
396    rev     r7, r7                          @ w[15]
397    str     r7, [sp, #15<<2]                @ store w[15]
398    mov     r6, r9                          @ mmcau_indirect_cmd(LDR+CAA)
399    str     r7, [r6]                        @ add w[15]
400    ldmia   r4!, {r7}                       @ k[15], *sha256_k++
401    str     r3, [r5]                        @ +h, +SIGMA1(e), +Ch(e,f,g)
402    mov     r6, sl                          @ mmcau_indirect_cmd(ADR+CAA)
403    str     r7, [r6]                        @ add k[15]
404    str     r2, [r5]                        @ t1, +SIGMA0(e), +Maj(a,b,c)
405    str     r1, [r5]                        @ shift registers
406
407# prepare for loop
408    str     r0, [sp, #260]                  @ store *input
409    mov     ip, r1                          @ store SHS2
410    mov     lr, r2                          @ store HF2M
411    str     r3, [sp, #256]                  @ store HF2C
412    ldr     r0, =sha256_reg_data            @ get *sha256_reg_data
413#   ldmia   r0, {r0-r2}                     @ load sha256_reg_data[0-2]
414    adds    r0, #1<<2                       @ move r0 by 4 bytes
415    ldmia   r0!, {r1-r2}                    @ load sha256_reg_data[1-2] and move r0 by 8 bytes
416    subs    r0, #3<<2                       @ move r0 back by 12 bytes
417    ldr     r0, [r0]                        @ load sha256_reg_data[0]
418    add     r3, sp, #0                      @ get *w[0]
419    movs    r6, #48                         @ set number of loops = 48
420
421
422loop:
423
424#   for (j = 0; j < 48; j++, i++)
425#   {
426#   *(MMCAU_PPB_INDIRECT + (LDR+CAA))  = w[i-16];       // [i-16]
427#   *(MMCAU_PPB_INDIRECT + (LDR+CA8))  = w[i-15];       // [i-15]
428#   *(MMCAU_PPB_DIRECT) = mmcau_1_cmd(HASH+HF2U);       // + Sigma2(w[i-15])
429#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = w[i-7];        // add w[i-7]
430#   *(MMCAU_PPB_INDIRECT + (LDR+CA8))  = w[i-2];        // load w[i-2]
431#   *(MMCAU_PPB_DIRECT) = mmcau_1_cmd(HASH+HF2V);       // + Sigma1(w[i-2])
432#   w[i] = *(MMCAU_PPB_INDIRECT + (STR+CAA));           // store w[i]
433#   *(MMCAU_PPB_DIRECT) = mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2C);
434#   *(MMCAU_PPB_INDIRECT + (ADR+CAA))  = sha256_k[i];   // add k[i]
435#   *(MMCAU_PPB_DIRECT) = mmcau_3_cmds(MVAR+CA8,HASH+HF2S,HASH+HF2M);
436#   *(MMCAU_PPB_DIRECT) = mmcau_1_cmd(SHS2);            // shift registers
437#   }
438#
439#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
440#
441#    REGISTER  |  ALLOCATION
442#   -----------+------------------------------------------------------------
443#          r0  |  mmcau_1_cmd(HASH+HF2U)
444#          r1  |  mmcau_1_cmd(SHS2)
445#          r2  |  mmcau_indirect_cmd(LDR+CA8)
446#          r3  |  *w[0]
447#          r4  |  *sha256_k
448#          r5  |  *mmcau_direct_cmd()
449#          r6  |  scratch
450#          r7  |  scratch
451#          r8  |  loop count
452#          r9  |  mmcau_indirect_cmd(LDR+CAA)
453#    (sl) r10  |  mmcau_indirect_cmd(ADR+CAA)
454#    (fp) r11  |  mmcau_indirect_cmd(STR+CAA)
455#    (ip) r12  |  mmcau_1_cmd(SHS2)
456#    (sp) r13  |  stack pointer
457#    (lr) r14  |  mmcau_3_cmds(ADRA+CA7,HASH+HF2T,HASH+HF2M)
458
459    mov     r8, r6                          @ store loop count
460    ldmia   r3!, {r6}                       @ w[i-16], *w[0]++
461    mov     r7, r9                          @ (LDR+CAA)
462    str     r6, [r7]                        @ CAA += w[i-16]
463    ldr     r6, [r3, #0<<2]                 @ w[i-15]
464    str     r6, [r2]                        @ CA8 += w[i-15]
465    str     r0, [r5]                        @ (HASH+HF2U)
466    ldr     r6, [r3, #8<<2]                 @ w[i-7]
467    mov     r7, sl                          @ (ADR+CAA)
468    str     r6, [r7]                        @ CAA += w[i-7]
469    ldr     r6, [r3, #13<<2]                @ w[i-2]
470    str     r6, [r2]                        @ CA8 += w[i-2]
471    str     r1, [r5]                        @ (HASH+HF2V)
472    mov     r7, fp                          @ (STR+CAA)
473    ldr     r6, [r7]                        @ w[i]
474    str     r6, [r3, #15<<2]                @ store w[i]
475    ldr     r7, [sp, #256]                  @ (ADRA+CA7,HASH+HF2T,HASH+HF2C)
476    str     r7, [r5]                        @ +h, SIGMA1(e) & Ch(e,f,g)
477    ldmia   r4!, {r6}                       @ k[i], *sha256_k++
478    mov     r7, sl                          @ (ADR+CAA)
479    str     r6, [r7]                        @ add k[i]
480    mov     r7, lr                          @ (MVAR+CA8,HASH+HF2S,HASH+HF2M)
481    str     r7, [r5]                        @ t1, + SIGMA0(e) + Maj(a,b,c)
482    mov     r7, ip                          @ (SHS2)
483    str     r7, [r5]                        @ shift reGs
484
485
486# find out if loop should be repeated
487    mov     r6, r8                          @ restore loop count
488    subs    r6, #1                          @ decrement loop count
489    bne     loop                            @ check loop count
490
491# after going through the loop for the last time
492    ldr     r2, =MMCAU_PPB_INDIRECT+((ADR+CA0)<<2)
493    ldr     r3, [sp, #268]                  @ restore *output
494    ldmia   r3!, {r4-r7}                    @ load output[0-3]
495#   stmia   r2!, {r4-r7}                    @ add to CA[0-3]
496    str     r4, [r2, #0<<2]                 @ expand stmia into str to be interruptible
497    str     r5, [r2, #1<<2]
498    str     r6, [r2, #2<<2]
499    str     r7, [r2, #3<<2]
500    adds    r2, #1<<4
501
502    ldmia   r3!, {r4-r7}                    @ load output[4-7]
503#   stmia   r2!, {r4-r7}                    @ add to CA[4-7]
504    str     r4, [r2, #0<<2]                 @ expand stmia into str to be interruptible
505    str     r5, [r2, #1<<2]
506    str     r6, [r2, #2<<2]
507    str     r7, [r2, #3<<2]
508    adds    r2, #1<<4
509    subs    r2, #96                         @ mmcau_indirect_cmd(STR+CA0)
510    subs    r3, #8<<2                       @ reset *output
511    ldmia   r2!, {r4-r7}                    @ load new CA[0-3]
512#   stmia   r3!, {r4-r7}                    @ store in output[0-3]
513    str     r4, [r3, #0<<2]                 @ expand stmia into str to be interruptible
514    str     r5, [r3, #1<<2]
515    str     r6, [r3, #2<<2]
516    str     r7, [r3, #3<<2]
517    adds    r3, #1<<4
518
519    ldmia   r2!, {r4-r7}                    @ load new CA[4-7]
520#   stmia   r3!, {r4-r7}                    @ store in output[4-7]
521    str     r4, [r3, #0<<2]                 @ expand stmia into str to be interruptible
522    str     r5, [r3, #1<<2]
523    str     r6, [r3, #2<<2]
524    str     r7, [r3, #3<<2]
525    adds    r3, #1<<4
526
527# find out if next_blk should be repeated
528    ldr     r1, [sp, #264]                  @ restore num_blks
529    subs    r1, #1                          @ decrement num_blks
530    bne     repeat_next_blk                 @ check num_blks
531
532# if num_blks = 0,
533    add     sp, #272                        @ unreserve stack
534    pop     {r3-r7}                         @ restore high regs
535    mov     r8, r3
536    mov     r9, r4
537    mov     sl, r5
538    mov     fp, r6
539    mov     ip, r7
540    pop     {r4-r7, pc}                     @ restore low regs, exit routine
541
542# else (num_blks > 0),
543repeat_next_blk:
544    str     r1, [sp, #264]                  @ store num_blks
545    ldr     r0, [sp, #260]                  @ restore *input
546    ldr     r1, =sha256_reg_data+3<<2       @ get *sha256_reg_data[3]
547#   ldmia   r1, {r1-r4}                     @ load sha256_reg_data[3-6]
548    adds    r1, #1<<2                       @ move r0 by 4 bytes
549    ldmia   r1!, {r2-r4}                    @ load sha256_reg_data[4-6] and move r0 by 12 bytes
550    subs    r1, #1<<4                       @ move r0 back by 16 bytes
551    ldr     r1, [r1]                        @ load sha256_reg_data[3]
552    ldr     r5, =MMCAU_PPB_DIRECT
553    b       next_blk                        @ repeat next_blk
554
555
556# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
557#
558#   MMCAU_SHA256_UPDATE
559#   Updates SHA256 state variables for one or more input message blocks
560#
561#   ARGUMENTS
562#   *input          pointer to start of input message data
563#   num_blks        number of 512-bit blocks to process
564#   *output         pointer to 256-bit message digest
565#
566#   CALLING CONVENTION
567#   void mmcau_sha256_update   (const unsigned char     *input,
568#                               const int               num_blks,
569#                               unsigned int            *output)
570#
571#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
572#
573#    REGISTER  |  ALLOCATION (at the start of mmcau_sha256_update)
574#   -----------+------------------------------------------------------------
575#          r0  |  *input        (arg0)
576#          r1  |  num_blks      (arg1)
577#          r2  |  *output       (arg2)
578#              |
579#        > r2  |  irrelevant
580#
581# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
582
583    .global _mmcau_sha256_update
584    .global mmcau_sha256_update
585    .type   mmcau_sha256_update, %function
586    .align  4
587
588_mmcau_sha256_update:
589mmcau_sha256_update:
590
591# store regs r4-r7 and r14, we need to restore them at the end of the routine
592    push    {r4-r7, lr}                     @ store regs
593
594    ldr     r3, =sha256_initial_h
595    ldmia   r3!, {r4-r7}                    @ load sha256_initial_h[0-3]
596#   stmia   r2!, {r4-r7}                    @ store in output[0-3]
597    str     r4, [r2, #0<<2]                 @ expand stmia into str to be interruptible
598    str     r5, [r2, #1<<2]
599    str     r6, [r2, #2<<2]
600    str     r7, [r2, #3<<2]
601    adds    r2, #1<<4
602    ldmia   r3!, {r4-r7}                    @ load sha256_initial_h[4-7]
603#   stmia   r2!, {r4-r7}                    @ store in output[4-7]
604    str     r4, [r2, #0<<2]                 @ expand stmia into str to be interruptible
605    str     r5, [r2, #1<<2]
606    str     r6, [r2, #2<<2]
607    str     r7, [r2, #3<<2]
608    adds    r2, #1<<4
609    subs    r2, #32                         @ reset *output
610
611    bl      mmcau_sha256_hash_n             @ do mmcau_sha256_hash_n
612
613    pop     {r4-r7, pc}                     @ restore regs, exit routine
614
615
616# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
617#
618#   MMCAU_SHA256_HASH
619#   Perform the hash and generate SHA256 state variables for one input
620#   Message block.
621#
622#   ARGUMENTS
623#   *input          pointer to start of input message data
624#   *output         pointer to 256-bit message digest
625#
626#   NOTE
627#   Input message and digest output blocks must not overlap
628#
629#   CALLING CONVENTION
630#   void mmcau_sha256_hash     (const unsigned char     *input,
631#                               unsigned int            *output)
632#
633#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
634#
635#    REGISTER  |  ALLOCATION (at the start of mmcau_sha256_hash)
636#   -----------+------------------------------------------------------------
637#          r0  |  *input        (arg0)
638#          r1  |  *output       (arg1)
639#              |
640#        > r1  |  irrelevant
641#
642# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
643
644    .global _mmcau_sha256_hash
645    .global mmcau_sha256_hash
646    .type   mmcau_sha256_hash, %function
647    .align  4
648
649_mmcau_sha256_hash:
650mmcau_sha256_hash:
651
652    mov     r2, r1                          @ move arg1 (*output) to arg2
653    movs    r1, #1                          @ set arg1 (num_blks) = 1
654
655    b       mmcau_sha256_hash_n             @ do mmcau_sha256_hash_n
656
657
658# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
659
660    .data
661
662
663    .type   sha256_reg_data, %object
664    .align  4
665
666sha256_reg_data:
667    .word   MMCAU_1_CMD+((HASH+HF2U)<<22)                               @ r0
668    .word   MMCAU_1_CMD+((HASH+HF2V)<<22)                               @ r1
669    .word   MMCAU_PPB_INDIRECT+((LDR+CA8)<<2)                           @ r2
670    .word   MMCAU_1_CMD+((SHS2)<<22)                                    @ r1
671    .word   MMCAU_3_CMDS+((MVAR+CA8)<<22)+((HASH+HF2S)<<11)+HASH+HF2M   @ r2
672    .word   MMCAU_3_CMDS+((ADRA+CA7)<<22)+((HASH+HF2T)<<11)+HASH+HF2C   @ r3
673    .word   sha256_k                                                    @ r4
674    .word   MMCAU_PPB_INDIRECT+((LDR+CAA)<<2)                           @ r5
675    .word   MMCAU_PPB_INDIRECT+((ADR+CAA)<<2)                           @ r6
676    .word   MMCAU_PPB_INDIRECT+((STR+CAA)<<2)                           @ r7
677
678
679    .type   sha256_initial_h, %object
680    .align  4
681
682sha256_initial_h:
683    .word   0x6a09e667
684    .word   0xbb67ae85
685    .word   0x3c6ef372
686    .word   0xa54ff53a
687    .word   0x510e527f
688    .word   0x9b05688c
689    .word   0x1f83d9ab
690    .word   0x5be0cd19
691
692
693    .type   sha256_k, %object
694    .align  4
695
696sha256_k:
697    .word   0x428a2f98
698    .word   0x71374491
699    .word   0xb5c0fbcf
700    .word   0xe9b5dba5
701    .word   0x3956c25b
702    .word   0x59f111f1
703    .word   0x923f82a4
704    .word   0xab1c5ed5
705    .word   0xd807aa98
706    .word   0x12835b01
707    .word   0x243185be
708    .word   0x550c7dc3
709    .word   0x72be5d74
710    .word   0x80deb1fe
711    .word   0x9bdc06a7
712    .word   0xc19bf174
713    .word   0xe49b69c1
714    .word   0xefbe4786
715    .word   0x0fc19dc6
716    .word   0x240ca1cc
717    .word   0x2de92c6f
718    .word   0x4a7484aa
719    .word   0x5cb0a9dc
720    .word   0x76f988da
721    .word   0x983e5152
722    .word   0xa831c66d
723    .word   0xb00327c8
724    .word   0xbf597fc7
725    .word   0xc6e00bf3
726    .word   0xd5a79147
727    .word   0x06ca6351
728    .word   0x14292967
729    .word   0x27b70a85
730    .word   0x2e1b2138
731    .word   0x4d2c6dfc
732    .word   0x53380d13
733    .word   0x650a7354
734    .word   0x766a0abb
735    .word   0x81c2c92e
736    .word   0x92722c85
737    .word   0xa2bfe8a1
738    .word   0xa81a664b
739    .word   0xc24b8b70
740    .word   0xc76c51a3
741    .word   0xd192e819
742    .word   0xd6990624
743    .word   0xf40e3585
744    .word   0x106aa070
745    .word   0x19a4c116
746    .word   0x1e376c08
747    .word   0x2748774c
748    .word   0x34b0bcb5
749    .word   0x391c0cb3
750    .word   0x4ed8aa4a
751    .word   0x5b9cca4f
752    .word   0x682e6ff3
753    .word   0x748f82ee
754    .word   0x78a5636f
755    .word   0x84c87814
756    .word   0x8cc70208
757    .word   0x90befffa
758    .word   0xa4506ceb
759    .word   0xbef9a3f7
760    .word   0xc67178f2
761