1#*******************************************************************************
2#*******************************************************************************
3#
4# Copyright (c) Freescale Semiconductor, Inc 2011.
5#
6# FILE NAME      : mmcau_md5_functions.s
7# VERSION        : $Id: mmcau_md5_functions.s.rca 1.6 Thu Nov 21 14:18:27 2013 b40907 Experimental $
8# TYPE           : Source Cortex-Mx assembly library code
9# DEPARTMENT     : MSG R&D Core and Platforms
10# AUTHOR         : David Schimke
11# AUTHOR'S EMAIL : David.Schimke@freescale.com
12# -----------------------------------------------------------------------------
13# Release history
14# VERSION  Date       AUTHOR           DESCRIPTION
15#          08-2010    David Schimke    Initial Release
16#          12-2010    David Schimke    Remove "global" on data objects
17#          01-2011    David Schimke    Header added
18#          11-2013    Teejay Ciancio   Cleanup
19#
20#*******************************************************************************
21#*******************************************************************************
22
23    .include "cau2_defines.hdr"
24    .equ    MMCAU_PPB_DIRECT,0xe0081000
25    .equ    MMCAU_PPB_INDIRECT,0xe0081800
26
27    .syntax unified
28
29#*******************************************************************************
30#*******************************************************************************
31#
32# MD5: Initializes the MD5 state variables
33#   arguments
34#           *md5_state pointer to 128-bit block of md5 state variables:
35#                           a,b,c,d
36#
37#   calling convention
38#   void    mmcau_md5_initialize_output (const unsigned int *md5_state)
39
40    .global _mmcau_md5_initialize_output
41    .global mmcau_md5_initialize_output
42    .type   mmcau_md5_initialize_output, %function
43    .align  4
44
45_mmcau_md5_initialize_output:
46mmcau_md5_initialize_output:
47
48    stmdb   sp!, {r4}                               @ save registers
49
50    movw    r1, #:lower16:md5_initial_h             @ r1 -> initial data
51    movt    r1, #:upper16:md5_initial_h
52
53# copy initial data into hash output buffer
54    ldmia   r1, {r1-r4}                             @ get md5[0-3]
55    stmia   r0, {r1-r4}                             @ copy to md5_state[0-3]
56
57    ldmia   sp!, {r4}                               @ restore registers
58    bx      lr
59
60
61#*******************************************************************************
62#*******************************************************************************
63#
64# MD5: Updates MD5 state variables for one or more input message blocks
65#
66#   arguments
67#           *msg_data   pointer to start of input message data
68#           num_blks    number of 512-bit blocks to process
69#           *md5_state  pointer to 128-bit block of MD5 state variables: a,b,c,d
70#
71#   calling convention
72#   void    mmucau_md5_hash_n (const unsigned char *msg_data,
73#                              const int            num_blks,
74#                              unsigned char       *md5_state)
75
76    .global _mmcau_md5_hash_n
77    .global mmcau_md5_hash_n
78    .type   mmcau_md5_hash_n, %function
79    .align  4
80
81_mmcau_md5_hash_n:
82mmcau_md5_hash_n:
83
84#  register allocation
85# --------------------
86#  r0      = input pointer (arg0)
87#  r1      = a / input num_blks (arg1)
88#  r2      = b / output pointer (arg2)
89#  r3      = c
90#  r4      = d
91#  r5      = scratch
92#  r6      = scratch
93#  r7      = pointer to md5_t
94#  r8      = output pointer
95#  r9      = input num_blks
96# r10 (sl) = not used
97# r11 (fp) = not used
98# r12 (ip) = not used
99# r13 (sp) = stack pointer
100# r14 (lr) = link register
101
102    stmdb   sp!, {r4-r9}                            @ save registers on stack
103
104    mov     r9, r1                                  @ r9 = num_blks
105    mov     r8, r2                                  @ r8 = output pointer
106
107    ldmia   r8, {r1-r4}                             @ get md5_state[0-3]
108
109    movw    r7, #:lower16:md5_t                     @ r7 -> md5_t
110    movt    r7, #:upper16:md5_t
111
112    .align  2
113next_blk:
114
115# 16 rounds with F(x,y,z) = (x & y) | (~x & z)
116
117    bic.w   r5, r4, r2                              @ ~b & d
118    and.w   r6, r3, r2                              @ b & c
119    orrs    r5, r6                                  @ F(b,c,d)
120    add     r1, r5                                  @ a += F(b,c,d)
121    ldr     r6, [r0]                                @ input[0]
122    add     r1, r6                                  @ a += input[0]
123    ldr     r6, [r7]                                @ t[0]
124    add     r1, r6                                  @ a += t[0]
125    add.w   r1, r2, r1, ror #25                     @ a = b + ROTL(a,7)
126
127    bic.w   r5, r3, r1                              @ ~a & c
128    and.w   r6, r2, r1                              @ a & b
129    orrs    r5, r6                                  @ F(a,b,c)
130    add     r4, r5                                  @ d += F(a,b,c)
131    ldr     r6, [r0, $1<<2]                         @ input[1]
132    add     r4, r6                                  @ d += input[1]
133    ldr     r6, [r7, $1<<2]                         @ t[1]
134    add     r4, r6                                  @ d += t[1]
135    add.w   r4, r1, r4, ror #20                     @ d = a + ROTL(d,12)
136
137    bic.w   r5, r2, r4                              @ ~d & b
138    and.w   r6, r1, r4                              @ d & a
139    orrs    r5, r6                                  @ F(d,a,b)
140    add     r3, r5                                  @ c += F(d,a,b)
141    ldr     r6, [r0, $2<<2]                         @ input[2]
142    add     r3, r6                                  @ c += input[2]
143    ldr     r6, [r7, $2<<2]                         @ t[2]
144    add     r3, r6                                  @ c += t[2]
145    add.w   r3, r4, r3, ror #15                     @ c = d + ROTL(c,17)
146
147    bic.w   r5, r1, r3                              @ ~c & a
148    and.w   r6, r4, r3                              @ c & d
149    orrs    r5, r6                                  @ F(c,d,a)
150    add     r2, r5                                  @ b += F(c,d,a)
151    ldr     r6, [r0, $3<<2]                         @ input[3]
152    add     r2, r6                                  @ b += input[3]
153    ldr     r6, [r7, $3<<2]                         @ t[3]
154    add     r2, r6                                  @ b += t[3]
155    add.w   r2, r3, r2, ror #10                     @ b = c + ROTL(b,22)
156
157    bic.w   r5, r4, r2                              @ ~b & d
158    and.w   r6, r3, r2                              @ b & c
159    orrs    r5, r6                                  @ F(b,c,d)
160    add     r1, r5                                  @ a += F(b,c,d)
161    ldr     r6, [r0, $4<<2]                         @ input[4]
162    add     r1, r6                                  @ a += input[4]
163    ldr     r6, [r7, $4<<2]                         @ t[4]
164    add     r1, r6                                  @ a += t[4]
165    add.w   r1, r2, r1, ror #25                     @ a = b + ROTL(a,7)
166
167    bic.w   r5, r3, r1                              @ ~a & c
168    and.w   r6, r2, r1                              @ a & b
169    orrs    r5, r6                                  @ F(a,b,c)
170    add     r4, r5                                  @ d += F(a,b,c)
171    ldr     r6, [r0, $5<<2]                         @ input[5]
172    add     r4, r6                                  @ d += input[5]
173    ldr     r6, [r7, $5<<2]                         @ t[5]
174    add     r4, r6                                  @ d += t[5]
175    add.w   r4, r1, r4, ror #20                     @ d = a + ROTL(d,12)
176
177    bic.w   r5, r2, r4                              @ ~d & b
178    and.w   r6, r1, r4                              @ d & a
179    orrs    r5, r6                                  @ F(d,a,b)
180    add     r3, r5                                  @ c += F(d,a,b)
181    ldr     r6, [r0, $6<<2]                         @ input[6]
182    add     r3, r6                                  @ c += input[6]
183    ldr     r6, [r7, $6<<2]                         @ t[6]
184    add     r3, r6                                  @ c += t[6]
185    add.w   r3, r4, r3, ror #15                     @ c = d + ROTL(c,17)
186
187    bic.w   r5, r1, r3                              @ ~c & a
188    and.w   r6, r4, r3                              @ c & d
189    orrs    r5, r6                                  @ F(c,d,a)
190    add     r2, r5                                  @ b += F(c,d,a)
191    ldr     r6, [r0, $7<<2]                         @ input[7]
192    add     r2, r6                                  @ b += input[7]
193    ldr     r6, [r7, $7<<2]                         @ t[7]
194    add     r2, r6                                  @ b += t[7]
195    add.w   r2, r3, r2, ror #10                     @ b = c + ROTL(b,22)
196
197    bic.w   r5, r4, r2                              @ ~b & d
198    and.w   r6, r3, r2                              @ b & c
199    orrs    r5, r6                                  @ F(b,c,d)
200    add     r1, r5                                  @ a += F(b,c,d)
201    ldr     r6, [r0, $8<<2]                         @ input[8]
202    add     r1, r6                                  @ a += input[8]
203    ldr     r6, [r7, $8<<2]                         @ t[8]
204    add     r1, r6                                  @ a += t[8]
205    add.w   r1, r2, r1, ror #25                     @ a = b + ROTL(a,7)
206
207    bic.w   r5, r3, r1                              @ ~a & c
208    and.w   r6, r2, r1                              @ a & b
209    orrs    r5, r6                                  @ F(a,b,c)
210    add     r4, r5                                  @ d += F(a,b,c)
211    ldr     r6, [r0, $9<<2]                         @ input[9]
212    add     r4, r6                                  @ d += input[9]
213    ldr     r6, [r7, $9<<2]                         @ t[9]
214    add     r4, r6                                  @ d += t[9]
215    add.w   r4, r1, r4, ror #20                     @ d = a + ROTL(d,12)
216
217    bic.w   r5, r2, r4                              @ ~d & b
218    and.w   r6, r1, r4                              @ d & a
219    orrs    r5, r6                                  @ F(d,a,b)
220    add     r3, r5                                  @ c += F(d,a,b)
221    ldr     r6, [r0, $10<<2]                        @ input[10]
222    add     r3, r6                                  @ c += input[10]
223    ldr     r6, [r7, $10<<2]                        @ t[10]
224    add     r3, r6                                  @ c += t[10]
225    add.w   r3, r4, r3, ror #15                     @ c = d + ROTL(c,17)
226
227    bic.w   r5, r1, r3                              @ ~c & a
228    and.w   r6, r4, r3                              @ c & d
229    orrs    r5, r6                                  @ F(c,d,a)
230    add     r2, r5                                  @ b += F(c,d,a)
231    ldr     r6, [r0, $11<<2]                        @ input[11]
232    add     r2, r6                                  @ b += input[11]
233    ldr     r6, [r7, $11<<2]                        @ t[11]
234    add     r2, r6                                  @ b += t[11]
235    add.w   r2, r3, r2, ror #10                     @ b = c + ROTL(b,22)
236
237    bic.w   r5, r4, r2                              @ ~b & d
238    and.w   r6, r3, r2                              @ b & c
239    orrs    r5, r6                                  @ F(b,c,d)
240    add     r1, r5                                  @ a += F(b,c,d)
241    ldr     r6, [r0, $12<<2]                        @ input[12]
242    add     r1, r6                                  @ a += input[12]
243    ldr     r6, [r7, $12<<2]                        @ t[12]
244    add     r1, r6                                  @ a += t[12]
245    add.w   r1, r2, r1, ror #25                     @ a = b + ROTL(a,7)
246
247    bic.w   r5, r3, r1                              @ ~a & c
248    and.w   r6, r2, r1                              @ a & b
249    orrs    r5, r6                                  @ F(a,b,c)
250    add     r4, r5                                  @ d += F(a,b,c)
251    ldr     r6, [r0, $13<<2]                        @ input[13]
252    add     r4, r6                                  @ d += input[13]
253    ldr     r6, [r7, $13<<2]                        @ t[13]
254    add     r4, r6                                  @ d += t[13]
255    add.w   r4, r1, r4, ror #20                     @ d = a + ROTL(d,12)
256
257    bic.w   r5, r2, r4                              @ ~d & b
258    and.w   r6, r1, r4                              @ d & a
259    orrs    r5, r6                                  @ F(d,a,b)
260    add     r3, r5                                  @ c += F(d,a,b)
261    ldr     r6, [r0, $14<<2]                        @ input[14]
262    add     r3, r6                                  @ c += input[14]
263    ldr     r6, [r7, $14<<2]                        @ t[14]
264    add     r3, r6                                  @ c += t[14]
265    add.w   r3, r4, r3, ror #15                     @ c = d + ROTL(c,17)
266
267    bic.w   r5, r1, r3                              @ ~c & a
268    and.w   r6, r4, r3                              @ c & d
269    orrs    r5, r6                                  @ F(c,d,a)
270    add     r2, r5                                  @ b += F(c,d,a)
271    ldr     r6, [r0, $15<<2]                        @ input[15]
272    add     r2, r6                                  @ b += input[15]
273    ldr     r6, [r7, $15<<2]                        @ t[15]
274    add     r2, r6                                  @ b += t[15]
275    add.w   r2, r3, r2, ror #10                     @ b = c + ROTL(b,22)
276
277# 16 rounds with G(x,y,z) = (x & z) | (y & ~z)
278
279    bic.w   r5, r3, r4                              @ ~d & c
280    and.w   r6, r2, r4                              @ d & b
281    orrs    r5, r6                                  @ G(b,c,d)
282    add     r1, r5                                  @ a += G(b,c,d)
283    ldr     r6, [r0, $1<<2]                         @ input[1]
284    add     r1, r6                                  @ a += input[1]
285    ldr     r6, [r7, $16<<2]                        @ t[16]
286    add     r1, r6                                  @ a += t[16]
287    add.w   r1, r2, r1, ror #27                     @ a = b + ROTL(a,5)
288
289    bic.w   r5, r2, r3                              @ ~c & b
290    and.w   r6, r1, r3                              @ c & a
291    orrs    r5, r6                                  @ G(a,b,c)
292    add     r4, r5                                  @ d += G(a,b,c)
293    ldr     r6, [r0, $6<<2]                         @ input[6]
294    add     r4, r6                                  @ d += input[6]
295    ldr     r6, [r7, $17<<2]                        @ t[17]
296    add     r4, r6                                  @ d += t[17]
297    add.w   r4, r1, r4, ror #23                     @ d = a + ROTL(d,9)
298
299    bic.w   r5, r1, r2                              @ ~b & a
300    and.w   r6, r4, r2                              @ b & d
301    orrs    r5, r6                                  @ G(d,a,b)
302    add     r3, r5                                  @ c += G(d,a,b)
303    ldr     r6, [r0, $11<<2]                        @ input[11]
304    add     r3, r6                                  @ c += input[11]
305    ldr     r6, [r7, $18<<2]                        @ t[18]
306    add     r3, r6                                  @ c += t[18]
307    add.w   r3, r4, r3, ror #18                     @ c = d + ROTL(c,14)
308
309    bic.w   r5, r4, r1                              @ ~a & d
310    and.w   r6, r3, r1                              @ a & c
311    orrs    r5, r6                                  @ G(d,a,b)
312    add     r2, r5                                  @ b += G(c,d,a)
313    ldr     r6, [r0]                                @ input[0]
314    add     r2, r6                                  @ b += input[0]
315    ldr     r6, [r7, $19<<2]                        @ t[19]
316    add     r2, r6                                  @ b += t[19]
317    add.w   r2, r3, r2, ror #12                     @ b = c + ROTL(b,20)
318
319    bic.w   r5, r3, r4                              @ ~d & c
320    and.w   r6, r2, r4                              @ d & b
321    orrs    r5, r6                                  @ G(b,c,d)
322    add     r1, r5                                  @ a += G(b,c,d)
323    ldr     r6, [r0, $5<<2]                         @ input[5]
324    add     r1, r6                                  @ a += input[5]
325    ldr     r6, [r7, $20<<2]                        @ t[20]
326    add     r1, r6                                  @ a += t[20]
327    add.w   r1, r2, r1, ror #27                     @ a = b + ROTL(a,5)
328
329    bic.w   r5, r2, r3                              @ ~c & b
330    and.w   r6, r1, r3                              @ c & a
331    orrs    r5, r6                                  @ G(a,b,c)
332    add     r4, r5                                  @ d += G(a,b,c)
333    ldr     r6, [r0, $10<<2]                        @ input[10]
334    add     r4, r6                                  @ d += input[10]
335    ldr     r6, [r7, $21<<2]                        @ t[21]
336    add     r4, r6                                  @ d += t[21]
337    add.w   r4, r1, r4, ror #23                     @ d = a + ROTL(d,9)
338
339    bic.w   r5, r1, r2                              @ ~b & a
340    and.w   r6, r4, r2                              @ b & d
341    orrs    r5, r6                                  @ G(d,a,b)
342    add     r3, r5                                  @ c += G(d,a,b)
343    ldr     r6, [r0, $15<<2]                        @ input[15]
344    add     r3, r6                                  @ c += input[15]
345    ldr     r6, [r7, $22<<2]                        @ t[22]
346    add     r3, r6                                  @ c += t[22]
347    add.w   r3, r4, r3, ror #18                     @ c = d + ROTL(c,14)
348
349    bic.w   r5, r4, r1                              @ ~a & d
350    and.w   r6, r3, r1                              @ a & c
351    orrs    r5, r6                                  @ G(d,a,b)
352    add     r2, r5                                  @ b += G(c,d,a)
353    ldr     r6, [r0, $4<<2]                         @ input[4]
354    add     r2, r6                                  @ b += input[4]
355    ldr     r6, [r7, $23<<2]                        @ t[23]
356    add     r2, r6                                  @ b += t[23]
357    add.w   r2, r3, r2, ror #12                     @ b = c + ROTL(b,20)
358
359    bic.w   r5, r3, r4                              @ ~d & c
360    and.w   r6, r2, r4                              @ d & b
361    orrs    r5, r6                                  @ G(b,c,d)
362    add     r1, r5                                  @ a += G(b,c,d)
363    ldr     r6, [r0, $9<<2]                         @ input[9]
364    add     r1, r6                                  @ a += input[9]
365    ldr     r6, [r7, $24<<2]                        @ t[24]
366    add     r1, r6                                  @ a += t[24]
367    add.w   r1, r2, r1, ror #27                     @ a = b + ROTL(a,5)
368
369    bic.w   r5, r2, r3                              @ ~c & b
370    and.w   r6, r1, r3                              @ c & a
371    orrs    r5, r6                                  @ G(a,b,c)
372    add     r4, r5                                  @ d += G(a,b,c)
373    ldr     r6, [r0, $14<<2]                        @ input[14]
374    add     r4, r6                                  @ d += input[14]
375    ldr     r6, [r7, $25<<2]                        @ t[25]
376    add     r4, r6                                  @ d += t[25]
377    add.w   r4, r1, r4, ror #23                     @ d = a + ROTL(d,9)
378
379    bic.w   r5, r1, r2                              @ ~b & a
380    and.w   r6, r4, r2                              @ b & d
381    orrs    r5, r6                                  @ G(d,a,b)
382    add     r3, r5                                  @ c += G(d,a,b)
383    ldr     r6, [r0, $3<<2]                         @ input[3]
384    add     r3, r6                                  @ c += input[3]
385    ldr     r6, [r7, $26<<2]                        @ t[26]
386    add     r3, r6                                  @ c += t[26]
387    add.w   r3, r4, r3, ror #18                     @ c = d + ROTL(c,14)
388
389    bic.w   r5, r4, r1                              @ ~a & d
390    and.w   r6, r3, r1                              @ a & c
391    orrs    r5, r6                                  @ G(d,a,b)
392    add     r2, r5                                  @ b += G(c,d,a)
393    ldr     r6, [r0, $8<<2]                         @ input[8]
394    add     r2, r6                                  @ b += input[8]
395    ldr     r6, [r7, $27<<2]                        @ t[27]
396    add     r2, r6                                  @ b += t[27]
397    add.w   r2, r3, r2, ror #12                     @ b = c + ROTL(b,20)
398
399    bic.w   r5, r3, r4                              @ ~d & c
400    and.w   r6, r2, r4                              @ d & b
401    orrs    r5, r6                                  @ G(b,c,d)
402    add     r1, r5                                  @ a += G(b,c,d)
403    ldr     r6, [r0, $13<<2]                        @ input[13]
404    add     r1, r6                                  @ a += input[13]
405    ldr     r6, [r7, $28<<2]                        @ t[28]
406    add     r1, r6                                  @ a += t[28]
407    add.w   r1, r2, r1, ror #27                     @ a = b + ROTL(a,5)
408
409    bic.w   r5, r2, r3                              @ ~c & b
410    and.w   r6, r1, r3                              @ c & a
411    orrs    r5, r6                                  @ G(a,b,c)
412    add     r4, r5                                  @ d += G(a,b,c)
413    ldr     r6, [r0, $2<<2]                         @ input[2]
414    add     r4, r6                                  @ d += input[2]
415    ldr     r6, [r7, $29<<2]                        @ t[29]
416    add     r4, r6                                  @ d += t[29]
417    add.w   r4, r1, r4, ror #23                     @ d = a + ROTL(d,9)
418
419    bic.w   r5, r1, r2                              @ ~b & a
420    and.w   r6, r4, r2                              @ b & d
421    orrs    r5, r6                                  @ G(d,a,b)
422    add     r3, r5                                  @ c += G(d,a,b)
423    ldr     r6, [r0, $7<<2]                         @ input[7]
424    add     r3, r6                                  @ c += input[7]
425    ldr     r6, [r7, $30<<2]                        @ t[30]
426    add     r3, r6                                  @ c += t[30]
427    add.w   r3, r4, r3, ror #18                     @ c = d + ROTL(c,14)
428
429    bic.w   r5, r4, r1                              @ ~a & d
430    and.w   r6, r3, r1                              @ a & c
431    orrs    r5, r6                                  @ G(d,a,b)
432    add     r2, r5                                  @ b += G(c,d,a)
433    ldr     r6, [r0, $12<<2]                        @ input[12]
434    add     r2, r6                                  @ b += input[12]
435    ldr     r6, [r7, $31<<2]                        @ t[31]
436    add     r2, r6                                  @ b += t[31]
437    add.w   r2, r3, r2, ror #12                     @ b = c + ROTL(b,20)
438
439# 16 rounds with H(x,y,z) = x ^ y ^ z
440
441    eor.w   r5, r2, r3                              @ b ^ c
442    eors    r5, r4                                  @ H(b,c,d)
443    add     r1, r5                                  @ a += H(b,c,d)
444    ldr     r6, [r0, $5<<2]                         @ input[5]
445    add     r1, r6                                  @ a += input[5]
446    ldr     r6, [r7, $32<<2]                        @ t[32]
447    add     r1, r6                                  @ a += t[32]
448    add.w   r1, r2, r1, ror #28                     @ a = b + ROTL(a,4)
449
450    eor.w   r5, r1, r2                              @ a ^ b
451    eors    r5, r3                                  @ H(a,b,c)
452    add     r4, r5                                  @ d += H(a,b,c)
453    ldr     r6, [r0, $8<<2]                         @ input[8]
454    add     r4, r6                                  @ d += input[8]
455    ldr     r6, [r7, $33<<2]                        @ t[33]
456    add     r4, r6                                  @ d += t[33]
457    add.w   r4, r1, r4, ror #21                     @ d = a + ROTL(d,11)
458
459    eor.w   r5, r4, r1                              @ d ^ a
460    eors    r5, r2                                  @ H(d,a,b)
461    add     r3, r5                                  @ c += H(d,a,b)
462    ldr     r6, [r0, $11<<2]                        @ input[11]
463    add     r3, r6                                  @ c += input[11]
464    ldr     r6, [r7, $34<<2]                        @ t[34]
465    add     r3, r6                                  @ c += t[34]
466    add.w   r3, r4, r3, ror #16                     @ c = d + ROTL(c,16)
467
468    eor.w   r5, r3, r4                              @ c ^ d
469    eors    r5, r1                                  @ H(c,d,a)
470    add     r2, r5                                  @ b += H(c,d,a)
471    ldr     r6, [r0, $14<<2]                        @ input[14]
472    add     r2, r6                                  @ b += input[14]
473    ldr     r6, [r7, $35<<2]                        @ t[35]
474    add     r2, r6                                  @ b += t[35]
475    add.w   r2, r3, r2, ror #9                      @ b = c + ROTL(d,23)
476
477    eor.w   r5, r2, r3                              @ b ^ c
478    eors    r5, r4                                  @ H(b,c,d)
479    add     r1, r5                                  @ a += H(b,c,d)
480    ldr     r6, [r0, $1<<2]                         @ input[1]
481    add     r1, r6                                  @ a += input[1]
482    ldr     r6, [r7, $36<<2]                        @ t[36]
483    add     r1, r6                                  @ a += t[36]
484    add.w   r1, r2, r1, ror #28                     @ a = b + ROTL(a,4)
485
486    eor.w   r5, r1, r2                              @ a ^ b
487    eors    r5, r3                                  @ H(a,b,c)
488    add     r4, r5                                  @ d += H(a,b,c)
489    ldr     r6, [r0, $4<<2]                         @ input[4]
490    add     r4, r6                                  @ d += input[4]
491    ldr     r6, [r7, $37<<2]                        @ t[37]
492    add     r4, r6                                  @ d += t[37]
493    add.w   r4, r1, r4, ror #21                     @ d = a + ROTL(d,11)
494
495    eor.w   r5, r4, r1                              @ d ^ a
496    eors    r5, r2                                  @ H(d,a,b)
497    add     r3, r5                                  @ c += H(d,a,b)
498    ldr     r6, [r0, $7<<2]                         @ input[7]
499    add     r3, r6                                  @ c += input[7]
500    ldr     r6, [r7, $38<<2]                        @ t[38]
501    add     r3, r6                                  @ c += t[38]
502    add.w   r3, r4, r3, ror #16                     @ c = d + ROTL(c,16)
503
504    eor.w   r5, r3, r4                              @ c ^ d
505    eors    r5, r1                                  @ H(c,d,a)
506    add     r2, r5                                  @ b += H(c,d,a)
507    ldr     r6, [r0, $10<<2]                        @ input[10]
508    add     r2, r6                                  @ b += input[10]
509    ldr     r6, [r7, $39<<2]                        @ t[39]
510    add     r2, r6                                  @ b += t[39]
511    add.w   r2, r3, r2, ror #9                      @ b = c + ROTL(d,23)
512
513    eor.w   r5, r2, r3                              @ b ^ c
514    eors    r5, r4                                  @ H(b,c,d)
515    add     r1, r5                                  @ a += H(b,c,d)
516    ldr     r6, [r0, $13<<2]                        @ input[13]
517    add     r1, r6                                  @ a += input[13]
518    ldr     r6, [r7, $40<<2]                        @ t[40]
519    add     r1, r6                                  @ a += t[40]
520    add.w   r1, r2, r1, ror #28                     @ a = b + ROTL(a,4)
521
522    eor.w   r5, r1, r2                              @ a ^ b
523    eors    r5, r3                                  @ H(a,b,c)
524    add     r4, r5                                  @ d += H(a,b,c)
525    ldr     r6, [r0]                                @ input[0]
526    add     r4, r6                                  @ d += input[0]
527    ldr     r6, [r7, $41<<2]                        @ t[41]
528    add     r4, r6                                  @ d += t[41]
529    add.w   r4, r1, r4, ror #21                     @ d = a + ROTL(d,11)
530
531    eor.w   r5, r4, r1                              @ d ^ a
532    eors    r5, r2                                  @ H(d,a,b)
533    add     r3, r5                                  @ c += H(d,a,b)
534    ldr     r6, [r0, $3<<2]                         @ input[3]
535    add     r3, r6                                  @ c += input[3]
536    ldr     r6, [r7, $42<<2]                        @ t[42]
537    add     r3, r6                                  @ c += t[42]
538    add.w   r3, r4, r3, ror #16                     @ c = d + ROTL(c,16)
539
540    eor.w   r5, r3, r4                              @ c ^ d
541    eors    r5, r1                                  @ H(c,d,a)
542    add     r2, r5                                  @ b += H(c,d,a)
543    ldr     r6, [r0, $6<<2]                         @ input[6]
544    add     r2, r6                                  @ b += input[6]
545    ldr     r6, [r7, $43<<2]                        @ t[43]
546    add     r2, r6                                  @ b += t[43]
547    add.w   r2, r3, r2, ror #9                      @ b = c + ROTL(d,23)
548
549    eor.w   r5, r2, r3                              @ b ^ c
550    eors    r5, r4                                  @ H(b,c,d)
551    add     r1, r5                                  @ a += H(b,c,d)
552    ldr     r6, [r0, $9<<2]                         @ input[9]
553    add     r1, r6                                  @ a += input[9]
554    ldr     r6, [r7, $44<<2]                        @ t[44]
555    add     r1, r6                                  @ a += t[44]
556    add.w   r1, r2, r1, ror #28                     @ a = b + ROTL(a,4)
557
558    eor.w   r5, r1, r2                              @ a ^ b
559    eors    r5, r3                                  @ H(a,b,c)
560    add     r4, r5                                  @ d += H(a,b,c)
561    ldr     r6, [r0, $12<<2]                        @ input[12]
562    add     r4, r6                                  @ d += input[12]
563    ldr     r6, [r7, $45<<2]                        @ t[45]
564    add     r4, r6                                  @ d += t[45]
565    add.w   r4, r1, r4, ror #21                     @ d = a + ROTL(d,11)
566
567    eor.w   r5, r4, r1                              @ d ^ a
568    eors    r5, r2                                  @ H(d,a,b)
569    add     r3, r5                                  @ c += H(d,a,b)
570    ldr     r6, [r0, $15<<2]                        @ input[15]
571    add     r3, r6                                  @ c += input[15]
572    ldr     r6, [r7, $46<<2]                        @ t[46]
573    add     r3, r6                                  @ c += t[46]
574    add.w   r3, r4, r3, ror #16                     @ c = d + ROTL(c,16)
575
576    eor.w   r5, r3, r4                              @ c ^ d
577    eors    r5, r1                                  @ H(c,d,a)
578    add     r2, r5                                  @ b += H(c,d,a)
579    ldr     r6, [r0, $2<<2]                         @ input[2]
580    add     r2, r6                                  @ b += input[2]
581    ldr     r6, [r7, $47<<2]                        @ t[47]
582    add     r2, r6                                  @ b += t[47]
583    add.w   r2, r3, r2, ror #9                      @ b = c + ROTL(d,23)
584
585# 16 rounds with I(x,y,z) = y ^ (x | ~z)
586
587    orn     r5, r2, r4                              @ b | ~d
588    eors    r5, r3                                  @ I(b,c,d)
589    add     r1, r5                                  @ a += I(b,c,d)
590    ldr     r6, [r0]                                @ input[0]
591    add     r1, r6                                  @ a += input[0]
592    ldr     r6, [r7, $48<<2]                        @ t[48]
593    add     r1, r6                                  @ a += t[48]
594    add.w   r1, r2, r1, ror #26                     @ a = b + ROTL(a,6)
595
596    orn     r5, r1, r3                              @ a | ~c
597    eors    r5, r2                                  @ I(a,b,c)
598    add     r4, r5                                  @ d += I(a,b,c)
599    ldr     r6, [r0, $7<<2]                         @ input[7]
600    add     r4, r6                                  @ d += input[7]
601    ldr     r6, [r7, $49<<2]                        @ t[49]
602    add     r4, r6                                  @ d += t[49]
603    add.w   r4, r1, r4, ror #22                     @ d = a + ROTL(d,10)
604
605    orn     r5, r4, r2                              @ d | ~b
606    eors    r5, r1                                  @ I(d,a,b)
607    add     r3, r5                                  @ c += I(d,a,b)
608    ldr     r6, [r0, $14<<2]                        @ input[14]
609    add     r3, r6                                  @ c += input[14]
610    ldr     r6, [r7, $50<<2]                        @ t[50]
611    add     r3, r6                                  @ c += t[50]
612    add.w   r3, r4, r3, ror #17                     @ c = d + ROTL(c,15)
613
614    orn     r5, r3, r1                              @ c | ~a
615    eors    r5, r4                                  @ I(c,d,a)
616    add     r2, r5                                  @ b += I(c,d,a)
617    ldr     r6, [r0, $5<<2]                         @ input[5]
618    add     r2, r6                                  @ b += input[5]
619    ldr     r6, [r7, $51<<2]                        @ t[51]
620    add     r2, r6                                  @ b += t[51]
621    add.w   r2, r3, r2, ror #11                     @ b = c + ROTL(b,21)
622
623    orn     r5, r2, r4                              @ b | ~d
624    eors    r5, r3                                  @ I(b,c,d)
625    add     r1, r5                                  @ a += I(b,c,d)
626    ldr     r6, [r0, $12<<2]                        @ input[12]
627    add     r1, r6                                  @ a += input[12]
628    ldr     r6, [r7, $52<<2]                        @ t[52]
629    add     r1, r6                                  @ a += t[52]
630    add.w   r1, r2, r1, ror #26                     @ a = b + ROTL(a,6)
631
632    orn     r5, r1, r3                              @ a | ~c
633    eors    r5, r2                                  @ I(a,b,c)
634    add     r4, r5                                  @ d += I(a,b,c)
635    ldr     r6, [r0, $3<<2]                         @ input[3]
636    add     r4, r6                                  @ d += input[3]
637    ldr     r6, [r7, $53<<2]                        @ t[53]
638    add     r4, r6                                  @ d += t[53]
639    add.w   r4, r1, r4, ror #22                     @ d = a + ROTL(d,10)
640
641    orn     r5, r4, r2                              @ d | ~b
642    eors    r5, r1                                  @ I(d,a,b)
643    add     r3, r5                                  @ c += I(d,a,b)
644    ldr     r6, [r0, $10<<2]                        @ input[10]
645    add     r3, r6                                  @ c += input[10]
646    ldr     r6, [r7, $54<<2]                        @ t[54]
647    add     r3, r6                                  @ c += t[54]
648    add.w   r3, r4, r3, ror #17                     @ c = d + ROTL(c,15)
649
650    orn     r5, r3, r1                              @ c | ~a
651    eors    r5, r4                                  @ I(c,d,a)
652    add     r2, r5                                  @ b += I(c,d,a)
653    ldr     r6, [r0, $1<<2]                         @ input[1]
654    add     r2, r6                                  @ b += input[1]
655    ldr     r6, [r7, $55<<2]                        @ t[55]
656    add     r2, r6                                  @ b += t[55]
657    add.w   r2, r3, r2, ror #11                     @ b = c + ROTL(b,21)
658
659    orn     r5, r2, r4                              @ b | ~d
660    eors    r5, r3                                  @ I(b,c,d)
661    add     r1, r5                                  @ a += I(b,c,d)
662    ldr     r6, [r0, $8<<2]                         @ input[8]
663    add     r1, r6                                  @ a += input[8]
664    ldr     r6, [r7, $56<<2]                        @ t[56]
665    add     r1, r6                                  @ a += t[56]
666    add.w   r1, r2, r1, ror #26                     @ a = b + ROTL(a,6)
667
668    orn     r5, r1, r3                              @ a | ~c
669    eors    r5, r2                                  @ I(a,b,c)
670    add     r4, r5                                  @ d += I(a,b,c)
671    ldr     r6, [r0, $15<<2]                        @ input[15]
672    add     r4, r6                                  @ d += input[15]
673    ldr     r6, [r7, $57<<2]                        @ t[57]
674    add     r4, r6                                  @ d += t[57]
675    add.w   r4, r1, r4, ror #22                     @ d = a + ROTL(d,10)
676
677    orn     r5, r4, r2                              @ d | ~b
678    eors    r5, r1                                  @ I(d,a,b)
679    add     r3, r5                                  @ c += I(d,a,b)
680    ldr     r6, [r0, $6<<2]                         @ input[6]
681    add     r3, r6                                  @ c += input[6]
682    ldr     r6, [r7, $58<<2]                        @ t[58]
683    add     r3, r6                                  @ c += t[58]
684    add.w   r3, r4, r3, ror #17                     @ c = d + ROTL(c,15)
685
686    orn     r5, r3, r1                              @ c | ~a
687    eors    r5, r4                                  @ I(c,d,a)
688    add     r2, r5                                  @ b += I(c,d,a)
689    ldr     r6, [r0, $13<<2]                        @ input[13]
690    add     r2, r6                                  @ b += input[13]
691    ldr     r6, [r7, $59<<2]                        @ t[59]
692    add     r2, r6                                  @ b += t[59]
693    add.w   r2, r3, r2, ror #11                     @ b = c + ROTL(b,21)
694
695    orn     r5, r2, r4                              @ b | ~d
696    eors    r5, r3                                  @ I(b,c,d)
697    add     r1, r5                                  @ a += I(b,c,d)
698    ldr     r6, [r0, $4<<2]                         @ input[4]
699    add     r1, r6                                  @ a += input[4]
700    ldr     r6, [r7, $60<<2]                        @ t[60]
701    add     r1, r6                                  @ a += t[60]
702    add.w   r1, r2, r1, ror #26                     @ a = b + ROTL(a,6)
703
704    orn     r5, r1, r3                              @ a | ~c
705    eors    r5, r2                                  @ I(a,b,c)
706    add     r4, r5                                  @ d += I(a,b,c)
707    ldr     r6, [r0, $11<<2]                        @ input[11]
708    add     r4, r6                                  @ d += input[11]
709    ldr     r6, [r7, $61<<2]                        @ t[61]
710    add     r4, r6                                  @ d += t[61]
711    add.w   r4, r1, r4, ror #22                     @ d = a + ROTL(d,10)
712
713    orn     r5, r4, r2                              @ d | ~b
714    eors    r5, r1                                  @ I(d,a,b)
715    add     r3, r5                                  @ c += I(d,a,b)
716    ldr     r6, [r0, $2<<2]                         @ input[2]
717    add     r3, r6                                  @ c += input[2]
718    ldr     r6, [r7, $62<<2]                        @ t[62]
719    add     r3, r6                                  @ c += t[62]
720    add.w   r3, r4, r3, ror #17                     @ c = d + ROTL(c,15)
721
722    orn     r5, r3, r1                              @ c | ~a
723    eors    r5, r4                                  @ I(c,d,a)
724    add     r2, r5                                  @ b += I(c,d,a)
725    ldr     r6, [r0, $9<<2]                         @ input[9]
726    add     r2, r6                                  @ b += input[9]
727    ldr     r6, [r7, $63<<2]                        @ t[63]
728    add     r2, r6                                  @ b += t[63]
729    add.w   r2, r3, r2, ror #11                     @ b = c + ROTL(b,21)
730
731    ldr     r5, [r8]                                @ get original md5_stats[0]
732    add     r1, r5
733    ldr     r5, [r8, $1<<2]                         @ get original md5_stats[1]
734    add     r2, r5
735    ldr     r5, [r8, $2<<2]                         @ get original md5_stats[2]
736    add     r3, r5
737    ldr     r5, [r8, $3<<2]                         @ get original md5_stats[3]
738    add     r4, r5
739
740    stmia   r8, {r1-r4}                             @ store new md5_state[0-3]
741
742    add     r0, $64                                 @ input ptr -> next block
743    subs    r9, $1                                  @ decrement num_blks
744    bne     next_blk
745
746    ldmia   sp!, {r4-r9}                            @ restore regs and return
747    bx      lr
748
749
750#*******************************************************************************
751#*******************************************************************************
752#
753# MD5: Updates MD5 state variables for one or more input message blocks
754#   arguments
755#           *msg_data   pointer to start of input message data
756#           num_blks    number of 512-bit blocks to process
757#           *md5_state  pointer to 120-bit block of MD5 state variables:
758#                           a,b,c,d
759#
760#   calling convention
761#   void    mmcau_md5_update  (const unsigned char *msg_data,
762#                              const int            num_blks,
763#                              unsigned char       *md5_state)
764
765
766    .global _mmcau_md5_update
767    .global mmcau_md5_update
768    .type   mmcau_md5_update, %function
769    .align  4
770
771_mmcau_md5_update:
772mmcau_md5_update:
773
774    push    {r4-r7,lr}
775
776    movw    r4, #:lower16:md5_initial_h             @ r4 -> initial data
777    movt    r4, #:upper16:md5_initial_h
778
779# copy initial data into hash output buffer
780    ldmia   r4, {r4-r7}                             @ get md5[0-3]
781    stmia   r2, {r4-r7}                             @ copy to md5_state[0-3]
782
783    bl      mmcau_md5_hash_n                        @ call hash_n routine
784
785    pop     {r4-r7,pc}
786
787#*******************************************************************************
788#*******************************************************************************
789#
790# MD5: Updates MD5 state variables for one input message block
791#
792#   arguments
793#           *msg_data   pointer to start of input message data
794#           *md5_state  pointer to 128-bit block of MD5 state variables: a,b,c,d
795#
796#   calling convention
797#   void    mmucau_md5_hash (const unsigned char *msg_data,
798#                            unsigned char       *md5_state)
799
800    .global _mmcau_md5_hash
801    .global mmcau_md5_hash
802    .type   mmcau_md5_hash, %function
803    .align  4
804
805_mmcau_md5_hash:
806mmcau_md5_hash:
807
808    mov    r2, r1                                   @ arg1 (*md5_state) to arg2
809    mov    r1, $1                                   @ num_blks = 1
810    b      mmcau_md5_hash_n                         @ call hash_n routine
811
812
813#*******************************************************************************
814
815    .data
816    .type   md5_initial_h, %object
817    .align  4
818
819md5_initial_h:
820    .word   0x67452301                              @ initial a
821    .word   0xefcdab89                              @ initial b
822    .word   0x98badcfe                              @ initial c
823    .word   0x10325476                              @ initial d
824
825    .type   md5_t, %object
826    .align  4
827md5_t:
828    .word   0xd76aa478
829    .word   0xe8c7b756
830    .word   0x242070db
831    .word   0xc1bdceee
832    .word   0xf57c0faf
833    .word   0x4787c62a
834    .word   0xa8304613
835    .word   0xfd469501
836    .word   0x698098d8
837    .word   0x8b44f7af
838    .word   0xffff5bb1
839    .word   0x895cd7be
840    .word   0x6b901122
841    .word   0xfd987193
842    .word   0xa679438e
843    .word   0x49b40821
844    .word   0xf61e2562
845    .word   0xc040b340
846    .word   0x265e5a51
847    .word   0xe9b6c7aa
848    .word   0xd62f105d
849    .word   0x02441453
850    .word   0xd8a1e681
851    .word   0xe7d3fbc8
852    .word   0x21e1cde6
853    .word   0xc33707d6
854    .word   0xf4d50d87
855    .word   0x455a14ed
856    .word   0xa9e3e905
857    .word   0xfcefa3f8
858    .word   0x676f02d9
859    .word   0x8d2a4c8a
860    .word   0xfffa3942
861    .word   0x8771f681
862    .word   0x6d9d6122
863    .word   0xfde5380c
864    .word   0xa4beea44
865    .word   0x4bdecfa9
866    .word   0xf6bb4b60
867    .word   0xbebfbc70
868    .word   0x289b7ec6
869    .word   0xeaa127fa
870    .word   0xd4ef3085
871    .word   0x04881d05
872    .word   0xd9d4d039
873    .word   0xe6db99e5
874    .word   0x1fa27cf8
875    .word   0xc4ac5665
876    .word   0xf4292244
877    .word   0x432aff97
878    .word   0xab9423a7
879    .word   0xfc93a039
880    .word   0x655b59c3
881    .word   0x8f0ccc92
882    .word   0xffeff47d
883    .word   0x85845dd1
884    .word   0x6fa87e4f
885    .word   0xfe2ce6e0
886    .word   0xa3014314
887    .word   0x4e0811a1
888    .word   0xf7537e82
889    .word   0xbd3af235
890    .word   0x2ad7d2bb
891    .word   0xeb86d391
892