1# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2#
3# Copyright (c) Freescale Semiconductor, Inc 2013.
4#
5# FILE NAME         : mmcau_aes_functions.s
6# VERSION           : $Id:  $
7# TYPE              : Source Cortex-M0+ assembly library code
8# DEPARTMENT        : MCG R&D Core and Platforms
9# AUTHOR            : Anthony (Teejay) Ciancio
10# AUTHOR EMAIL      : teejay.ciancio@freescale.com
11#
12#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
13#
14# VERSION   DATE        AUTHOR          DESCRIPTION
15# *******   ****        ******          ***********
16# 1.0       2013-11     Ciancio         initial release, using the ARMv6-M ISA
17#
18# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
19
20
21    .include "cau2_defines.hdr"
22    .syntax unified
23
24
25    .equ      MMCAU_PPB_DIRECT, 0xf0005000
26    .equ    MMCAU_PPB_INDIRECT, 0xf0005800
27    .equ           MMCAU_1_CMD, 0x80000000
28    .equ          MMCAU_2_CMDS, 0x80100000
29    .equ          MMCAU_3_CMDS, 0x80100200
30
31
32# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
33#
34#   MMCAU_AES_SET_KEY
35#   Performs an AES key expansion
36#
37#   ARGUMENTS
38#   *key            pointer to input key (128, 192, 256 bits in length)
39#   key_size        key_size in bits (128, 192, 256)
40#   *key_sch        pointer to key schedule output (44, 52, 60 longwords)
41#
42#   CALLING CONVENTION
43#   void mmcau_aes_set_key     (const unsigned char     *key,
44#                               const int               key_size,
45#                               unsigned char           *key_sch)
46#
47#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
48#
49#    REGISTER  |  ALLOCATION (at the start of mmcau_aes_set_key)
50#   -----------+------------------------------------------------------------
51#          r0  |  *key          (arg0)
52#          r1  |  key_size      (arg1)
53#          r2  |  *key_sch      (arg2)
54#              |
55#        > r2  |  irrelevant
56#
57# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
58
59    .global _mmcau_aes_set_key
60    .global mmcau_aes_set_key
61    .type   mmcau_aes_set_key, %function
62    .align  4
63
64_mmcau_aes_set_key:
65mmcau_aes_set_key:
66
67# store regs r4-r12 and r14, we need to restore them at the end of the routine
68    push    {r4-r7, lr}                     @ store low regs and link reg
69    mov     r3, r8
70    mov     r4, r9
71    mov     r5, sl
72    mov     r6, fp
73    mov     r7, ip
74    push    {r3-r7}                         @ store high regs
75
76    ldr     r3, =set_key_reg_data            @ prepare for set_key reg load
77
78
79set_key_check_size:
80    cmp     r1, #128                        @ if key_size != 128,
81    bne     set_key_check_size_again        @ then = 192 or 256, so check again
82    b       set_key_128                     @ else = 128, so do set_key_128
83
84
85set_key_check_size_again:
86    cmp     r1, #192                        @ if key_size != 192,
87    bne     set_key_256                     @ then = 256, so do set_key_256
88    b       set_key_192                     @ else = 192, so do set_key_192
89    .ltorg
90
91
92set_key_256:
93
94#    REGISTER  |  ALLOCATION (throughout set_key_256)
95#   -----------+------------------------------------------------------------
96#          r0  |  scratch
97#          r1  |  scratch
98#          r2  |  *key_sch
99#          r3  |  key_sch[0+8i] / scratch
100#          r4  |  key_sch[1+8i] / scratch
101#          r5  |  key_sch[2+8i] / scratch
102#          r6  |  key_sch[3+8i] / scratch
103#          r7  |  scratch
104#          r8  |  *rcon
105#          r9  |  mmcau_1_cmd(AESS+CAA)
106#    (sl) r10  |  *mmcau_direct_cmd()
107#    (fp) r11  |  mmcau_indirect_cmd(LDR+CAA)
108#    (ip) r12  |  mmcau_indirect_cmd(STR+CAA)
109#    (sp) r13  |  stack pointer
110#    (lr) r14  |  link register
111
112# load some of the regs in preperation of the AES-256 set key calculations
113#   ldmia   r3, {r3-r7}
114    mov     r1, r3                          @ store r3 in scratch r1 to be interruptible
115    adds    r3, #1<<2                       @ move r3 by 4 bytes
116    ldmia   r3!, {r4-r7}                    @ ldmia without r3
117    ldr     r3, [r1]                        @ load to r3 from scratch address
118    mov     r8, r3                          @ r8 = *rcon
119    mov     r9, r4                          @ r9 = mmcau_1_cmd(AESS+CAA)
120    mov     sl, r5                          @ sl = *mmcau_direct_cmd()
121    mov     fp, r6                          @ fp = mmcau_indirect_cmd(LDR+CAA)
122    mov     ip, r7                          @ ip = mmcau_indirect_cmd(STR+CAA)
123
124# calculate key_sch[0-4]
125    ldmia   r0!, {r3-r7}                    @ load key[0-4]; *key++
126    rev     r3, r3                          @ byterev(key[0]) = key_sch[0]
127    rev     r4, r4                          @ byterev(key[1]) = key_sch[1]
128    rev     r5, r5                          @ byterev(key[2]) = key_sch[2]
129    rev     r6, r6                          @ byterev(key[3]) = key_sch[3]
130    rev     r7, r7                          @ byterev(key[4]) = key_sch[4]
131    stmia   r2!, {r3-r7}                    @ store key_sch[0-4], key_sch++
132
133# calculate key_sch[5-7]
134#   ldmia   r0, {r0-r1,r7}                  @ load key[5-7]
135    adds    r0, #1<<2                       @ move by 4 byte, make ldmia interruptible in MMCAU
136    ldmia   r0!, {r1,r7}                    @ load key[6-7] and move r0 by 8 bytes
137    subs    r0, #3<<2                       @ move r0 back by 12 bytes
138    ldr     r0, [r0]                        @ load key[5]
139    rev     r0, r0                          @ byterev(key[5]) = key_sch[5]
140    rev     r1, r1                          @ byterev(key[6]) = key_sch[6]
141    rev     r7, r7                          @ byterev(key[7]) = key_sch[7]
142    stmia   r2!, {r0-r1, r7}                @ store key_sch[5-7], key_sch++
143
144# calculate key_sch[8-11]
145    mov     r0, r8
146    ldr     r1, [r0]                        @ load rcon[0]
147    movs    r0, #24
148    rors    r7, r0                          @ ROTL(key_sch[7],8)
149    mov     r0, fp
150    str     r7, [r0]                        @ ROTL(key_sch[7]) -> acc
151    mov     r7, r9
152    mov     r0, sl
153    str     r7, [r0]                        @ AES SubBytes
154    mov     r0, ip
155    ldr     r7, [r0]                        @ load CAA
156    eors    r1, r7                          @ XOR rcon[0]
157    eors    r3, r1                          @ XOR key_sch[0]  = key_sch[8]
158    eors    r4, r3                          @ XOR key_sch[1]  = key_sch[9]
159    eors    r5, r4                          @ XOR key_sch[2]  = key_sch[10]
160    eors    r6, r5                          @ XOR key_sch[3]  = key_sch[11]
161    stmia   r2!, {r3-r6}                    @ store key_sch[8-11], *key_sch++
162
163# calculate key_sch[12-15]
164    mov     r5, fp
165    str     r6, [r5]                        @ ROTL(key_sch[11]) -> acc
166    mov     r3, r9
167    mov     r4, sl
168    str     r3, [r4]                        @ AES SubBytes
169    mov     r7, ip
170    ldr     r1, [r7]                        @ load CAA
171    subs    r2, #8<<2                       @ set *key_sch[4]
172    ldmia   r2!, {r3-r6}                    @ load key_sch[4-7], *key_sch++
173    eors    r3, r1                          @ XOR key_sch[4]  = key_sch[12]
174    eors    r4, r3                          @ XOR key_sch[5]  = key_sch[13]
175    eors    r5, r4                          @ XOR key_sch[6]  = key_sch[14]
176    eors    r6, r5                          @ XOR key_sch[7]  = key_sch[15]
177    adds    r2, #4<<2                       @ set *key_sch[12]
178    stmia   r2!, {r3-r6}                    @ store key_sch[12-15], *key_sch++
179
180# calculate key_sch[16-19]
181    mov     r0, r8
182    ldr     r7, [r0, #1<<2]                 @ load rcon[1]
183    mov     r5, fp
184    movs    r0, #24
185    mov     r3, r9
186    rors    r6, r0                          @ ROTL(key_sch[15],8)
187    mov     r4, sl
188    str     r6, [r5]                        @ ROTL(key_sch[15]) -> acc
189    mov     r0, ip
190    str     r3, [r4]                        @ AES SubBytes
191    ldr     r1, [r0]                        @ load CAA
192    eors    r1, r7                          @ XOR rcon[1]
193    subs    r2, #8<<2                       @ set *key_sch[8]
194    ldmia   r2!, {r3-r6}                    @ load key_sch[8-11], *key_sch++
195    eors    r3, r1                          @ XOR key_sch[8]  = key_sch[16]
196    eors    r4, r3                          @ XOR key_sch[9]  = key_sch[17]
197    eors    r5, r4                          @ XOR key_sch[10] = key_sch[18]
198    eors    r6, r5                          @ XOR key_sch[11] = key_sch[19]
199    adds    r2, #4<<2                       @ set *key_sch[16]
200    stmia   r2!, {r3-r6}                    @ store key_sch[16-19], *key_sch++
201
202# calculate key_sch[20-23]
203    mov     r5, fp
204    str     r6, [r5]                        @ ROTL(key_sch[19]) -> acc
205    mov     r3, r9
206    mov     r4, sl
207    str     r3, [r4]                        @ AES SubBytes
208    mov     r7, ip
209    ldr     r1, [r7]                        @ load CAA
210    subs    r2, #8<<2                       @ set *key_sch[12]
211    ldmia   r2!, {r3-r6}                    @ load key_sch[12-15], *key_sch++
212    eors    r3, r1                          @ XOR key_sch[12] = key_sch[20]
213    eors    r4, r3                          @ XOR key_sch[13] = key_sch[21]
214    eors    r5, r4                          @ XOR key_sch[14] = key_sch[22]
215    eors    r6, r5                          @ XOR key_sch[15] = key_sch[23]
216    adds    r2, #4<<2                       @ set *key_sch[20]
217    stmia   r2!, {r3-r6}                    @ store key_sch[20-23], *key_sch++
218
219# calculate key_sch[24-27]
220    mov     r0, r8
221    ldr     r7, [r0, #2<<2]                 @ load rcon[2]
222    mov     r5, fp
223    movs    r0, #24
224    mov     r3, r9
225    rors    r6, r0                          @ ROTL(key_sch[23],8)
226    mov     r4, sl
227    str     r6, [r5]                        @ ROTL(key_sch[23]) -> acc
228    mov     r0, ip
229    str     r3, [r4]                        @ AES SubBytes
230    ldr     r1, [r0]                        @ load CAA
231    eors    r1, r7                          @ XOR rcon[2]
232    subs    r2, #8<<2                       @ set *key_sch[16]
233    ldmia   r2!, {r3-r6}                    @ load key_sch[16-19], *key_sch++
234    eors    r3, r1                          @ XOR key_sch[16] = key_sch[24]
235    eors    r4, r3                          @ XOR key_sch[17] = key_sch[25]
236    eors    r5, r4                          @ XOR key_sch[18] = key_sch[26]
237    eors    r6, r5                          @ XOR key_sch[19] = key_sch[27]
238    adds    r2, #4<<2                       @ set *key_sch[24]
239    stmia   r2!, {r3-r6}                    @ store key_sch[24-27], *key_sch++
240
241# calculate key_sch[28-31]
242    mov     r5, fp
243    str     r6, [r5]                        @ ROTL(key_sch[27]) -> acc
244    mov     r3, r9
245    mov     r4, sl
246    str     r3, [r4]                        @ AES SubBytes
247    mov     r7, ip
248    ldr     r1, [r7]                        @ load CAA
249    subs    r2, #8<<2                       @ set *key_sch[20]
250    ldmia   r2!, {r3-r6}                    @ load key_sch[20-23], *key_sch++
251    eors    r3, r1                          @ XOR key_sch[20] = key_sch[28]
252    eors    r4, r3                          @ XOR key_sch[21] = key_sch[29]
253    eors    r5, r4                          @ XOR key_sch[22] = key_sch[30]
254    eors    r6, r5                          @ XOR key_sch[23] = key_sch[31]
255    adds    r2, #4<<2                       @ set *key_sch[28]
256    stmia   r2!, {r3-r6}                    @ store key_sch[28-31], *key_sch++
257
258# calculate key_sch[32-35]
259    mov     r0, r8
260    ldr     r7, [r0, #3<<2]                 @ load rcon[3]
261    mov     r5, fp
262    movs    r0, #24
263    mov     r3, r9
264    rors    r6, r0                          @ ROTL(key_sch[31],8)
265    mov     r4, sl
266    str     r6, [r5]                        @ ROTL(key_sch[31]) -> acc
267    mov     r0, ip
268    str     r3, [r4]                        @ AES SubBytes
269    ldr     r1, [r0]                        @ load CAA
270    eors    r1, r7                          @ XOR rcon[3]
271    subs    r2, #8<<2                       @ set *key_sch[24]
272    ldmia   r2!, {r3-r6}                    @ load key_sch[24-27], *key_sch++
273    eors    r3, r1                          @ XOR key_sch[24] = key_sch[32]
274    eors    r4, r3                          @ XOR key_sch[25] = key_sch[33]
275    eors    r5, r4                          @ XOR key_sch[26] = key_sch[34]
276    eors    r6, r5                          @ XOR key_sch[27] = key_sch[35]
277    adds    r2, #4<<2                       @ set *key_sch[32]
278    stmia   r2!, {r3-r6}                    @ store key_sch[32-35], *key_sch++
279
280# calculate key_sch[36-39]
281    mov     r5, fp
282    str     r6, [r5]                        @ ROTL(key_sch[35]) -> acc
283    mov     r3, r9
284    mov     r4, sl
285    str     r3, [r4]                        @ AES SubBytes
286    mov     r7, ip
287    ldr     r1, [r7]                        @ load CAA
288    subs    r2, #8<<2                       @ set *key_sch[28]
289    ldmia   r2!, {r3-r6}                    @ load key_sch[28-31], *key_sch++
290    eors    r3, r1                          @ XOR key_sch[28] = key_sch[36]
291    eors    r4, r3                          @ XOR key_sch[29] = key_sch[37]
292    eors    r5, r4                          @ XOR key_sch[30] = key_sch[38]
293    eors    r6, r5                          @ XOR key_sch[31] = key_sch[39]
294    adds    r2, #4<<2                       @ set *key_sch[36]
295    stmia   r2!, {r3-r6}                    @ store key_sch[36-39], *key_sch++
296
297# calculate key_sch[40-43]
298    mov     r0, r8
299    ldr     r7, [r0, #4<<2]                 @ load rcon[4]
300    mov     r5, fp
301    movs    r0, #24
302    mov     r3, r9
303    rors    r6, r0                          @ ROTL(key_sch[39],8)
304    mov     r4, sl
305    str     r6, [r5]                        @ ROTL(key_sch[39]) -> acc
306    mov     r0, ip
307    str     r3, [r4]                        @ AES SubBytes
308    ldr     r1, [r0]                        @ load CAA
309    eors    r1, r7                          @ XOR rcon[4]
310    subs    r2, #8<<2                       @ set *key_sch[32]
311    ldmia   r2!, {r3-r6}                    @ load key_sch[32-35], *key_sch++
312    eors    r3, r1                          @ XOR key_sch[32] = key_sch[40]
313    eors    r4, r3                          @ XOR key_sch[33] = key_sch[41]
314    eors    r5, r4                          @ XOR key_sch[34] = key_sch[42]
315    eors    r6, r5                          @ XOR key_sch[35] = key_sch[43]
316    adds    r2, #4<<2                       @ set *key_sch[40]
317    stmia   r2!, {r3-r6}                    @ store key_sch[40-43], *key_sch++
318
319# calculate key_sch[44-47]
320    mov     r5, fp
321    str     r6, [r5]                        @ ROTL(key_sch[43]) -> acc
322    mov     r3, r9
323    mov     r4, sl
324    str     r3, [r4]                        @ AES SubBytes
325    mov     r7, ip
326    ldr     r1, [r7]                        @ load CAA
327    subs    r2, #8<<2                       @ set *key_sch[36]
328    ldmia   r2!, {r3-r6}                    @ load key_sch[36-39], *key_sch++
329    eors    r3, r1                          @ XOR key_sch[36] = key_sch[44]
330    eors    r4, r3                          @ XOR key_sch[37] = key_sch[45]
331    eors    r5, r4                          @ XOR key_sch[38] = key_sch[46]
332    eors    r6, r5                          @ XOR key_sch[39] = key_sch[47]
333    adds    r2, #4<<2                       @ set *key_sch[44]
334    stmia   r2!, {r3-r6}                    @ store key_sch[44-47], *key_sch++
335
336# calculate key_sch[48-51]
337    mov     r0, r8
338    ldr     r7, [r0, #5<<2]                 @ load rcon[5]
339    mov     r5, fp
340    movs    r0, #24
341    mov     r3, r9
342    rors    r6, r0                          @ ROTL(key_sch[47],8)
343    mov     r4, sl
344    str     r6, [r5]                        @ ROTL(key_sch[47]) -> acc
345    mov     r0, ip
346    str     r3, [r4]                        @ AES SubBytes
347    ldr     r1, [r0]                        @ load CAA
348    eors    r1, r7                          @ XOR rcon[5]
349    subs    r2, #8<<2                       @ set *key_sch[40]
350    ldmia   r2!, {r3-r6}                    @ load key_sch[40-43], *key_sch++
351    eors    r3, r1                          @ XOR key_sch[40] = key_sch[48]
352    eors    r4, r3                          @ XOR key_sch[41] = key_sch[49]
353    eors    r5, r4                          @ XOR key_sch[42] = key_sch[50]
354    eors    r6, r5                          @ XOR key_sch[43] = key_sch[51]
355    adds    r2, #4<<2                       @ set *key_sch[48]
356    stmia   r2!, {r3-r6}                    @ store key_sch[48-51], *key_sch++
357
358# calculate key_sch[52-55]
359    mov     r5, fp
360    str     r6, [r5]                        @ ROTL(key_sch[51]) -> acc
361    mov     r3, r9
362    mov     r4, sl
363    str     r3, [r4]                        @ AES SubBytes
364    mov     r7, ip
365    ldr     r1, [r7]                        @ load CAA
366    subs    r2, #8<<2                       @ set *key_sch[44]
367    ldmia   r2!, {r3-r6}                    @ load key_sch[44-47], *key_sch++
368    eors    r3, r1                          @ XOR key_sch[44] = key_sch[52]
369    eors    r4, r3                          @ XOR key_sch[45] = key_sch[53]
370    eors    r5, r4                          @ XOR key_sch[46] = key_sch[54]
371    eors    r6, r5                          @ XOR key_sch[47] = key_sch[55]
372    adds    r2, #4<<2                       @ set *key_sch[52]
373    stmia   r2!, {r3-r6}                    @ store key_sch[52-55], *key_sch++
374
375# calculate key_sch[56-59]
376    mov     r0, r8
377    ldr     r7, [r0, #6<<2]                 @ load rcon[6]
378    mov     r5, fp
379    movs    r0, #24
380    mov     r3, r9
381    rors    r6, r0                          @ ROTL(key_sch[55],8)
382    mov     r4, sl
383    str     r6, [r5]                        @ ROTL(key_sch[55]) -> acc
384    mov     r0, ip
385    str     r3, [r4]                        @ AES SubBytes
386    ldr     r1, [r0]                        @ load CAA
387    eors    r1, r7                          @ XOR rcon[6]
388    subs    r2, #8<<2                       @ set *key_sch[48]
389    ldmia   r2!, {r3-r6}                    @ load key_sch[48-51], *key_sch++
390    eors    r3, r1                          @ XOR key_sch[48] = key_sch[56]
391    eors    r4, r3                          @ XOR key_sch[49] = key_sch[57]
392    eors    r5, r4                          @ XOR key_sch[50] = key_sch[58]
393    eors    r6, r5                          @ XOR key_sch[51] = key_sch[59]
394    adds    r2, #4<<2                       @ set *key_sch[56]
395    stmia   r2!, {r3-r6}                    @ store key_sch[56-59], *key_sch++
396
397    b       set_key_end                     @ end routine
398
399
400set_key_192:
401
402#    REGISTER  |  ALLOCATION (throughout set_key_192)
403#   -----------+------------------------------------------------------------
404#          r0  |  key_sch[0+6i]
405#          r1  |  key_sch[1+6i]
406#          r2  |  *key_sch
407#          r3  |  key_sch[2+6i]
408#          r4  |  key_sch[3+6i]
409#          r5  |  key_sch[4+6i] / rcon[i]
410#          r6  |  key_sch[5+6i] / scratch
411#          r7  |  scratch
412#          r8  |  *rcon
413#          r9  |  mmcau_1_cmd(AESS+CAA)
414#    (sl) r10  |  *mmcau_direct_cmd()
415#    (fp) r11  |  mmcau_indirect_cmd(LDR+CAA)
416#        NOTE  |  mmcau_indirect_cmd(STR+CAA) = mmcau_indirect_cmd(LDR+CAA)+64
417#    (ip) r12  |  temporary storage for key_sch[4+6i]
418#    (sp) r13  |  stack pointer
419#    (lr) r14  |  temporary storage for key_sch[5+6i]
420
421# load some of the regs in preperation of the AES-192 set key calculations
422#   ldmia   r3, {r3-r6}
423    mov     r7, r3                          @ make ldmia interuptible in MMCAU by storing r3 addr into scratch r7
424    adds    r3, #1<<2                       @ move r3 addr by 1 word
425    ldmia   r3!, {r4-r6}                    @ load from r3 (r7 + 1 word) to r4-r6 with writeback
426    ldr     r3, [r7]                        @ load to r3 from addres in scratch (r7)
427
428    mov     r8, r3                          @ r8 = *rcon
429    mov     r9, r4                          @ r9 = mmcau_1_cmd(AESS+CAA)
430    mov     sl, r5                          @ sl = *mmcau_direct_cmd()
431    mov     fp, r6                          @ fp = mmcau_indirect_cmd(LDR+CAA)
432
433# calculate key_sch[0-5]
434#   ldmia   r0, {r0-r1, r3-r6}              @ load key[0-5]
435                                            @ make ldmia interuptible in MMCAU
436    adds    r0, #1<<2                       @ move by 4 byte
437    ldmia   r0!, {r1, r3-r6}                @ load key[1-5] + move by 20 byte
438    subs    r0, #6<<2                       @ move back by 24byte
439    ldr     r0, [r0]                        @ load key[0]
440
441
442    rev     r0, r0                          @ byterev(key[0]) = key_sch[0]
443    rev     r1, r1                          @ byterev(key[1]) = key_sch[1]
444    rev     r3, r3                          @ byterev(key[2]) = key_sch[2]
445    rev     r4, r4                          @ byterev(key[3]) = key_sch[3]
446    rev     r5, r5                          @ byterev(key[4]) = key_sch[4]
447    rev     r6, r6                          @ byterev(key[5]) = key_sch[5]
448    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[0-5]
449
450# calculate key_sch[6-11]
451    mov     ip, r5                          @ temporarily store key_sch[4]
452    mov     lr, r6                          @ temporarily store key_sch[5]
453    mov     r7, r8
454    ldr     r5, [r7, #0<<2]                 @ load rcon[0]
455    movs    r7, #24
456    rors    r6, r7                          @ ROTL(key_sch[5],8)
457    mov     r7, fp
458    str     r6, [r7]                        @ ROTL(key_sch[5],8) -> acc
459    mov     r6, r9
460    mov     r7, sl
461    str     r6, [r7]                        @ AES SubBytes
462    mov     r6, fp
463    adds    r6, #64
464    ldr     r7, [r6]                        @ load CAA
465    eors    r7, r5                          @ XOR rcon[0]
466    mov     r5, ip                          @ restore key_sch[4]
467    mov     r6, lr                          @ restore key_sch[5]
468    eors    r0, r7                          @ XOR key_sch[0]  = key_sch[6]
469    eors    r1, r0                          @ XOR key_sch[1]  = key_sch[7]
470    eors    r3, r1                          @ XOR key_sch[2]  = key_sch[8]
471    eors    r4, r3                          @ XOR key_sch[3]  = key_sch[9]
472    eors    r5, r4                          @ XOR key_sch[4]  = key_sch[10]
473    eors    r6, r5                          @ XOR key_sch[5]  = key_sch[11]
474    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[6-11], *key_sch++
475
476# calculate key_sch[12-17]
477    mov     ip, r5                          @ temporarily store key_sch[10]
478    mov     lr, r6                          @ temporarily store key_sch[11]
479    mov     r7, r8
480    ldr     r5, [r7, #1<<2]                 @ load rcon[1]
481    movs    r7, #24
482    rors    r6, r7                          @ ROTL(key_sch[11],8)
483    mov     r7, fp
484    str     r6, [r7]                        @ ROTL(key_sch[11],8) -> acc
485    mov     r6, r9
486    mov     r7, sl
487    str     r6, [r7]                        @ AES SubBytes
488    mov     r6, fp
489    adds    r6, #64
490    ldr     r7, [r6]                        @ load CAA
491    eors    r7, r5                          @ XOR rcon[1]
492    mov     r5, ip                          @ restore key_sch[10]
493    mov     r6, lr                          @ restore key_sch[11]
494    eors    r0, r7                          @ XOR key_sch[6]  = key_sch[12]
495    eors    r1, r0                          @ XOR key_sch[7]  = key_sch[13]
496    eors    r3, r1                          @ XOR key_sch[8]  = key_sch[14]
497    eors    r4, r3                          @ XOR key_sch[9]  = key_sch[15]
498    eors    r5, r4                          @ XOR key_sch[10] = key_sch[16]
499    eors    r6, r5                          @ XOR key_sch[11] = key_sch[17]
500    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[12-17], *key_sch++
501
502# calculate key_sch[18-23]
503    mov     ip, r5                          @ temporarily store key_sch[16]
504    mov     lr, r6                          @ temporarily store key_sch[17]
505    mov     r7, r8
506    ldr     r5, [r7, #2<<2]                 @ load rcon[2]
507    movs    r7, #24
508    rors    r6, r7                          @ ROTL(key_sch[17],8)
509    mov     r7, fp
510    str     r6, [r7]                        @ ROTL(key_sch[17],8) -> acc
511    mov     r6, r9
512    mov     r7, sl
513    str     r6, [r7]                        @ AES SubBytes
514    mov     r6, fp
515    adds    r6, #64
516    ldr     r7, [r6]                        @ load CAA
517    eors    r7, r5                          @ XOR rcon[2]
518    mov     r5, ip                          @ restore key_sch[16]
519    mov     r6, lr                          @ restore key_sch[17]
520    eors    r0, r7                          @ XOR key_sch[12] = key_sch[18]
521    eors    r1, r0                          @ XOR key_sch[13] = key_sch[19]
522    eors    r3, r1                          @ XOR key_sch[14] = key_sch[20]
523    eors    r4, r3                          @ XOR key_sch[15] = key_sch[21]
524    eors    r5, r4                          @ XOR key_sch[16] = key_sch[22]
525    eors    r6, r5                          @ XOR key_sch[17] = key_sch[23]
526    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[18-23], *key_sch++
527
528# calculate key_sch[24-29]
529    mov     ip, r5                          @ temporarily store key_sch[22]
530    mov     lr, r6                          @ temporarily store key_sch[23]
531    mov     r7, r8
532    ldr     r5, [r7, #3<<2]                 @ load rcon[3]
533    movs    r7, #24
534    rors    r6, r7                          @ ROTL(key_sch[23],8)
535    mov     r7, fp
536    str     r6, [r7]                        @ ROTL(key_sch[23],8) -> acc
537    mov     r6, r9
538    mov     r7, sl
539    str     r6, [r7]                        @ AES SubBytes
540    mov     r6, fp
541    adds    r6, #64
542    ldr     r7, [r6]                        @ load CAA
543    eors    r7, r5                          @ XOR rcon[3]
544    mov     r5, ip                          @ restore key_sch[22]
545    mov     r6, lr                          @ restore key_sch[23]
546    eors    r0, r7                          @ XOR key_sch[18] = key_sch[24]
547    eors    r1, r0                          @ XOR key_sch[19] = key_sch[25]
548    eors    r3, r1                          @ XOR key_sch[20] = key_sch[26]
549    eors    r4, r3                          @ XOR key_sch[21] = key_sch[27]
550    eors    r5, r4                          @ XOR key_sch[22] = key_sch[28]
551    eors    r6, r5                          @ XOR key_sch[23] = key_sch[29]
552    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[24-29], *key_sch++
553
554# calculate key_sch[30-35]
555    mov     ip, r5                          @ temporarily store key_sch[28]
556    mov     lr, r6                          @ temporarily store key_sch[29]
557    mov     r7, r8
558    ldr     r5, [r7, #4<<2]                 @ load rcon[4]
559    movs    r7, #24
560    rors    r6, r7                          @ ROTL(key_sch[29],8)
561    mov     r7, fp
562    str     r6, [r7]                        @ ROTL(key_sch[29],8) -> acc
563    mov     r6, r9
564    mov     r7, sl
565    str     r6, [r7]                        @ AES SubBytes
566    mov     r6, fp
567    adds    r6, #64
568    ldr     r7, [r6]                        @ load CAA
569    eors    r7, r5                          @ XOR rcon[4]
570    mov     r5, ip                          @ restore key_sch[28]
571    mov     r6, lr                          @ restore key_sch[29]
572    eors    r0, r7                          @ XOR key_sch[24] = key_sch[30]
573    eors    r1, r0                          @ XOR key_sch[25] = key_sch[31]
574    eors    r3, r1                          @ XOR key_sch[26] = key_sch[32]
575    eors    r4, r3                          @ XOR key_sch[27] = key_sch[33]
576    eors    r5, r4                          @ XOR key_sch[28] = key_sch[34]
577    eors    r6, r5                          @ XOR key_sch[29] = key_sch[35]
578    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[30-35], *key_sch++
579
580# calculate key_sch[36-41]
581    mov     ip, r5                          @ temporarily store key_sch[34]
582    mov     lr, r6                          @ temporarily store key_sch[35]
583    mov     r7, r8
584    ldr     r5, [r7, #5<<2]                 @ load rcon[5]
585    movs    r7, #24
586    rors    r6, r7                          @ ROTL(key_sch[35],8)
587    mov     r7, fp
588    str     r6, [r7]                        @ ROTL(key_sch[35],8) -> acc
589    mov     r6, r9
590    mov     r7, sl
591    str     r6, [r7]                        @ AES SubBytes
592    mov     r6, fp
593    adds    r6, #64
594    ldr     r7, [r6]                        @ load CAA
595    eors    r7, r5                          @ XOR rcon[5]
596    mov     r5, ip                          @ restore key_sch[34]
597    mov     r6, lr                          @ restore key_sch[35]
598    eors    r0, r7                          @ XOR key_sch[30] = key_sch[36]
599    eors    r1, r0                          @ XOR key_sch[31] = key_sch[37]
600    eors    r3, r1                          @ XOR key_sch[32] = key_sch[38]
601    eors    r4, r3                          @ XOR key_sch[33] = key_sch[39]
602    eors    r5, r4                          @ XOR key_sch[34] = key_sch[40]
603    eors    r6, r5                          @ XOR key_sch[35] = key_sch[41]
604    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[35-41], *key_sch++
605
606# calculate key_sch[42-47]
607    mov     ip, r5                          @ temporarily store key_sch[40]
608    mov     lr, r6                          @ temporarily store key_sch[41]
609    mov     r7, r8
610    ldr     r5, [r7, #6<<2]                 @ load rcon[6]
611    movs    r7, #24
612    rors    r6, r7                          @ ROTL(key_sch[41],8)
613    mov     r7, fp
614    str     r6, [r7]                        @ ROTL(key_sch[41],8) -> acc
615    mov     r6, r9
616    mov     r7, sl
617    str     r6, [r7]                        @ AES SubBytes
618    mov     r6, fp
619    adds    r6, #64
620    ldr     r7, [r6]                        @ load CAA
621    eors    r7, r5                          @ XOR rcon[6]
622    mov     r5, ip                          @ restore key_sch[40]
623    mov     r6, lr                          @ restore key_sch[41]
624    eors    r0, r7                          @ XOR key_sch[36] = key_sch[42]
625    eors    r1, r0                          @ XOR key_sch[37] = key_sch[43]
626    eors    r3, r1                          @ XOR key_sch[38] = key_sch[44]
627    eors    r4, r3                          @ XOR key_sch[39] = key_sch[45]
628    eors    r5, r4                          @ XOR key_sch[40] = key_sch[46]
629    eors    r6, r5                          @ XOR key_sch[41] = key_sch[47]
630    stmia   r2!, {r0-r1, r3-r6}             @ store key_sch[42-47], *key_sch++
631
632# calculate key_sch[48-51]
633    mov     r7, r8
634    ldr     r5, [r7, #7<<2]                 @ load rcon[7]
635    movs    r7, #24
636    rors    r6, r7                          @ ROTL(key_sch[47],8)
637    mov     r7, fp
638    str     r6, [r7]                        @ ROTL(key_sch[47],8) -> acc
639    mov     r6, r9
640    mov     r7, sl
641    str     r6, [r7]                        @ AES SubBytes
642    mov     r6, fp
643    adds    r6, #64
644    ldr     r7, [r6]                        @ load CAA
645    eors    r7, r5                          @ XOR rcon[7]
646    eors    r0, r7                          @ XOR key_sch[42] = key_sch[48]
647    eors    r1, r0                          @ XOR key_sch[43] = key_sch[49]
648    eors    r3, r1                          @ XOR key_sch[44] = key_sch[50]
649    eors    r4, r3                          @ XOR key_sch[45] = key_sch[51]
650    stmia   r2!, {r0-r1, r3-r4}             @ store key_sch[48-51], *key_sch++
651
652    b       set_key_end                     @ end routine
653
654
655set_key_128:
656
657#    REGISTER  |  ALLOCATION (throughout set_key_128)
658#   -----------+------------------------------------------------------------
659#          r0  |  rcon[i]
660#          r1  |  scratch
661#          r2  |  *key_sch
662#          r3  |  key_sch[0+4i]
663#          r4  |  key_sch[1+4i]
664#          r5  |  key_sch[2+4i]
665#          r6  |  key_sch[3+4i]
666#          r7  |  scratch
667#          r8  |  *rcon
668#          r9  |  mmcau_1_cmd(AESS+CAA)
669#    (sl) r10  |  *mmcau_direct_cmd()
670#    (fp) r11  |  mmcau_indirect_cmd(LDR+CAA)
671#    (ip) r12  |  mmcau_indirect_cmd(STR+CAA)
672#    (sp) r13  |  stack pointer
673#    (lr) r14  |  link register
674
675# load some of the regs in preperation of the AES-128 set key calculations
676#   ldmia   r3, {r3-r7}
677    mov     r1, r3                          @ store r3 in r1 scratch to make ldmia interruptible
678    adds    r3, #1<<2                       @ move r3 by 4 bytes
679    ldmia   r3!, {r4-r7}                    @ ldmia without r3
680    ldr     r3, [r1]                        @ load to r3 from r1 scratch address
681
682    mov     r8, r3                          @ r8 = *rcon
683    mov     r9, r4                          @ r9 = mmcau_1_cmd(AESS+CAA)
684    mov     sl, r5                          @ sl = *mmcau_direct_cmd()
685    mov     fp, r6                          @ fp = mmcau_indirect_cmd(LDR+CAA)
686    mov     ip, r7                          @ ip = mmcau_indirect_cmd(STR+CAA)
687
688# calculate key_sch[0-3]
689    ldmia   r0!, {r3-r6}                    @ load key[0-3]
690    rev     r3, r3                          @ byterev(key[0]) = key_sch[0]
691    rev     r4, r4                          @ byterev(key[1]) = key_sch[1]
692    rev     r5, r5                          @ byterev(key[2]) = key_sch[2]
693    rev     r6, r6                          @ byterev(key[3]) = key_sch[3]
694    stmia   r2!, {r3-r6}                    @ store key_sch[0-3], *key_sch++
695
696# calculate key_sch[4-7]
697    mov     r7, r8
698    ldr     r0, [r7, #0<<2]                 @ load rcon[0]
699    movs    r7, #24
700    mov     r1, r6                          @ copy key_sch[3]
701    rors    r1, r7                          @ ROTL(key_sch[3],8)
702    mov     r7, fp
703    str     r1, [r7]                        @ ROTL(key_sch[3],8) -> acc
704    mov     r1, r9
705    mov     r7, sl
706    str     r1, [r7]                        @ AES SubBytes
707    mov     r1, ip
708    ldr     r7, [r1]                        @ load CAA
709    eors    r7, r0                          @ XOR rcon[0]
710    eors    r3, r7                          @ XOR key_sch[0]  = key_sch[4]
711    eors    r4, r3                          @ XOR key_sch[1]  = key_sch[5]
712    eors    r5, r4                          @ XOR key_sch[2]  = key_sch[6]
713    eors    r6, r5                          @ XOR key_sch[3]  = key_sch[7]
714    stmia   r2!, {r3-r6}                    @ store key_sch[4-7], *key_sch++
715
716# calculate key_sch[8-11]
717    mov     r7, r8
718    ldr     r0, [r7, #1<<2]                 @ load rcon[1]
719    movs    r7, #24
720    mov     r1, r6                          @ copy key_sch[7]
721    rors    r1, r7                          @ ROTL(key_sch[7],8)
722    mov     r7, fp
723    str     r1, [r7]                        @ ROTL(key_sch[7],8) -> acc
724    mov     r1, r9
725    mov     r7, sl
726    str     r1, [r7]                        @ AES SubBytes
727    mov     r1, ip
728    ldr     r7, [r1]                        @ load CAA
729    eors    r7, r0                          @ XOR rcon[1]
730    eors    r3, r7                          @ XOR key_sch[4]  = key_sch[8]
731    eors    r4, r3                          @ XOR key_sch[5]  = key_sch[9]
732    eors    r5, r4                          @ XOR key_sch[6]  = key_sch[10]
733    eors    r6, r5                          @ XOR key_sch[7]  = key_sch[11]
734    stmia   r2!, {r3-r6}                    @ store key_sch[8-11], *key_sch++
735
736# calculate key_sch[12-15]
737    mov     r7, r8
738    ldr     r0, [r7, #2<<2]                 @ load rcon[2]
739    movs    r7, #24
740    mov     r1, r6                          @ copy key_sch[11]
741    rors    r1, r7                          @ ROTL(key_sch[11],8)
742    mov     r7, fp
743    str     r1, [r7]                        @ ROTL(key_sch[11],8) -> acc
744    mov     r1, r9
745    mov     r7, sl
746    str     r1, [r7]                        @ AES SubBytes
747    mov     r1, ip
748    ldr     r7, [r1]                        @ load CAA
749    eors    r7, r0                          @ XOR rcon[2]
750    eors    r3, r7                          @ XOR key_sch[8]  = key_sch[12]
751    eors    r4, r3                          @ XOR key_sch[9]  = key_sch[13]
752    eors    r5, r4                          @ XOR key_sch[10] = key_sch[14]
753    eors    r6, r5                          @ XOR key_sch[11] = key_sch[15]
754    stmia   r2!, {r3-r6}                    @ store key_sch[12-15], *key_sch++
755
756# calculate key_sch[16-19]
757    mov     r7, r8
758    ldr     r0, [r7, #3<<2]                 @ load rcon[3]
759    movs    r7, #24
760    mov     r1, r6                          @ copy key_sch[15]
761    rors    r1, r7                          @ ROTL(key_sch[15],8)
762    mov     r7, fp
763    str     r1, [r7]                        @ ROTL(key_sch[15],8) -> acc
764    mov     r1, r9
765    mov     r7, sl
766    str     r1, [r7]                        @ AES SubBytes
767    mov     r1, ip
768    ldr     r7, [r1]                        @ load CAA
769    eors    r7, r0                          @ XOR rcon[3]
770    eors    r3, r7                          @ XOR key_sch[12] = key_sch[16]
771    eors    r4, r3                          @ XOR key_sch[13] = key_sch[17]
772    eors    r5, r4                          @ XOR key_sch[14] = key_sch[18]
773    eors    r6, r5                          @ XOR key_sch[15] = key_sch[19]
774    stmia   r2!, {r3-r6}                    @ store key_sch[16-19], *key_sch++
775
776# calculate key_sch[20-23]
777    mov     r7, r8
778    ldr     r0, [r7, #4<<2]                 @ load rcon[4]
779    movs    r7, #24
780    mov     r1, r6                          @ copy key_sch[19]
781    rors    r1, r7                          @ ROTL(key_sch[19],8)
782    mov     r7, fp
783    str     r1, [r7]                        @ ROTL(key_sch[19],8) -> acc
784    mov     r1, r9
785    mov     r7, sl
786    str     r1, [r7]                        @ AES SubBytes
787    mov     r1, ip
788    ldr     r7, [r1]                        @ load CAA
789    eors    r7, r0                          @ XOR rcon[4]
790    eors    r3, r7                          @ XOR key_sch[16] = key_sch[20]
791    eors    r4, r3                          @ XOR key_sch[17] = key_sch[21]
792    eors    r5, r4                          @ XOR key_sch[18] = key_sch[22]
793    eors    r6, r5                          @ XOR key_sch[19] = key_sch[23]
794    stmia   r2!, {r3-r6}                    @ store key_sch[20-23], *key_sch++
795
796# calculate key_sch[24-27]
797    mov     r7, r8
798    ldr     r0, [r7, #5<<2]                 @ load rcon[5]
799    movs    r7, #24
800    mov     r1, r6                          @ copy key_sch[23]
801    rors    r1, r7                          @ ROTL(key_sch[23],8)
802    mov     r7, fp
803    str     r1, [r7]                        @ ROTL(key_sch[23],8) -> acc
804    mov     r1, r9
805    mov     r7, sl
806    str     r1, [r7]                        @ AES SubBytes
807    mov     r1, ip
808    ldr     r7, [r1]                        @ load CAA
809    eors    r7, r0                          @ XOR rcon[5]
810    eors    r3, r7                          @ XOR key_sch[20] = key_sch[24]
811    eors    r4, r3                          @ XOR key_sch[21] = key_sch[25]
812    eors    r5, r4                          @ XOR key_sch[22] = key_sch[26]
813    eors    r6, r5                          @ XOR key_sch[23] = key_sch[27]
814    stmia   r2!, {r3-r6}                    @ store key_sch[24-27], *key_sch++
815
816# calculate key_sch[28-31]
817    mov     r7, r8
818    ldr     r0, [r7, #6<<2]                 @ load rcon[6]
819    movs    r7, #24
820    mov     r1, r6                          @ copy key_sch[27]
821    rors    r1, r7                          @ ROTL(key_sch[27],8)
822    mov     r7, fp
823    str     r1, [r7]                        @ ROTL(key_sch[27],8) -> acc
824    mov     r1, r9
825    mov     r7, sl
826    str     r1, [r7]                        @ AES SubBytes
827    mov     r1, ip
828    ldr     r7, [r1]                        @ load CAA
829    eors    r7, r0                          @ XOR rcon[6]
830    eors    r3, r7                          @ XOR key_sch[24] = key_sch[28]
831    eors    r4, r3                          @ XOR key_sch[25] = key_sch[29]
832    eors    r5, r4                          @ XOR key_sch[26] = key_sch[30]
833    eors    r6, r5                          @ XOR key_sch[27] = key_sch[31]
834    stmia   r2!, {r3-r6}                    @ store key_sch[28-31], *key_sch++
835
836# calculate key_sch[32-35]
837    mov     r7, r8
838    ldr     r0, [r7, #7<<2]                 @ load rcon[7]
839    movs    r7, #24
840    mov     r1, r6                          @ copy key_sch[31]
841    rors    r1, r7                          @ ROTL(key_sch[31],8)
842    mov     r7, fp
843    str     r1, [r7]                        @ ROTL(key_sch[31],8) -> acc
844    mov     r1, r9
845    mov     r7, sl
846    str     r1, [r7]                        @ AES SubBytes
847    mov     r1, ip
848    ldr     r7, [r1]                        @ load CAA
849    eors    r7, r0                          @ XOR rcon[7]
850    eors    r3, r7                          @ XOR key_sch[28] = key_sch[32]
851    eors    r4, r3                          @ XOR key_sch[29] = key_sch[33]
852    eors    r5, r4                          @ XOR key_sch[30] = key_sch[34]
853    eors    r6, r5                          @ XOR key_sch[31] = key_sch[35]
854    stmia   r2!, {r3-r6}                    @ store key_sch[32-35], *key_sch++
855
856# calculate key_sch[36-39]
857    mov     r7, r8
858    ldr     r0, [r7, #8<<2]                 @ load rcon[8]
859    movs    r7, #24
860    mov     r1, r6                          @ copy key_sch[35]
861    rors    r1, r7                          @ ROTL(key_sch[35],8)
862    mov     r7, fp
863    str     r1, [r7]                        @ ROTL(key_sch[35],8) -> acc
864    mov     r1, r9
865    mov     r7, sl
866    str     r1, [r7]                        @ AES SubBytes
867    mov     r1, ip
868    ldr     r7, [r1]                        @ load CAA
869    eors    r7, r0                          @ XOR rcon[8]
870    eors    r3, r7                          @ XOR key_sch[32] = key_sch[36]
871    eors    r4, r3                          @ XOR key_sch[33] = key_sch[37]
872    eors    r5, r4                          @ XOR key_sch[34] = key_sch[38]
873    eors    r6, r5                          @ XOR key_sch[35] = key_sch[39]
874    stmia   r2!, {r3-r6}                    @ store key_sch[36-39], *key_sch++
875
876# calculate key_sch[40-43]
877    mov     r7, r8
878    ldr     r0, [r7, #9<<2]                 @ load rcon[9]
879    movs    r7, #24
880    mov     r1, r6                          @ copy key_sch[39]
881    rors    r1, r7                          @ ROTL(key_sch[39],8)
882    mov     r7, fp
883    str     r1, [r7]                        @ ROTL(key_sch[39],8) -> acc
884    mov     r1, r9
885    mov     r7, sl
886    str     r1, [r7]                        @ AES SubBytes
887    mov     r1, ip
888    ldr     r7, [r1]                        @ load CAA
889    eors    r7, r0                          @ XOR rcon[9]
890    eors    r3, r7                          @ XOR key_sch[36] = key_sch[40]
891    eors    r4, r3                          @ XOR key_sch[37] = key_sch[41]
892    eors    r5, r4                          @ XOR key_sch[38] = key_sch[42]
893    eors    r6, r5                          @ XOR key_sch[39] = key_sch[43]
894    stmia   r2!, {r3-r6}                    @ store key_sch[40-43], *key_sch++
895
896
897set_key_end:
898
899    pop     {r3-r7}                         @ restore high regs
900    mov     r8, r3
901    mov     r9, r4
902    mov     sl, r5
903    mov     fp, r6
904    mov     ip, r7
905    pop     {r4-r7, pc}                     @ restore low regs, exit routine
906
907
908# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
909#
910#   MMCAU_AES_ENCRYPT
911#   Encrypts a single 16-byte block
912#
913#   ARGUMENTS
914#   *in             pointer to 16-byte block of input plaintext
915#   *key_sch        pointer to key schedule (44, 52, 60 longwords)
916#   nr              number of AES rounds (10, 12, 14 = f(key_schedule))
917#   *out            pointer to 16-byte block of output ciphertext
918#
919#
920#   CALLING CONVENTION
921#   void mmcau_aes_encrypt     (const unsigned char     *in,
922#                               const unsigned char     *key_sch,
923#                               const int               nr,
924#                               unsigned char           *out)
925#
926#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
927#
928#    REGISTER  |  ALLOCATION (at the start of mmcau_aes_encrypt)
929#   -----------+------------------------------------------------------------
930#          r0  |  *in           (arg0)
931#          r1  |  *key_sch      (arg1)
932#          r2  |  nr            (arg2)
933#          r3  |  *out          (arg3)
934#              |
935#        > r3  |  irrelevant
936#
937#
938#    REGISTER  |  ALLOCATION (throughout mmcau_aes_encrypt)
939#   -----------+------------------------------------------------------------
940#          r0  |  mmcau_3_cmds(AESS+CA0,AESS+CA1,AESS+CA2)
941#          r1  |  *key_sch
942#          r2  |  *mmcau_direct_cmd()
943#          r3  |  scratch
944#          r4  |  key_sch[0+4i]
945#          r5  |  key_sch[1+4i]
946#          r6  |  key_sch[2+4i]
947#          r7  |  key_sch[3+4i]
948#          r8  |  mmcau_indirect_cmd(AESC+CA0)
949#          r9  |  not used
950#    (sl) r10  |  not used
951#    (fp) r11  |  not used
952#    (ip) r12  |  not used
953#    (sp) r13  |  stack pointer
954#    (lr) r14  |  mmcau_2_cmds(AESS+CA3,AESR)
955#
956# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
957
958    .global _mmcau_aes_encrypt
959    .global mmcau_aes_encrypt
960    .type   mmcau_aes_encrypt, %function
961    .align  4
962
963_mmcau_aes_encrypt:
964mmcau_aes_encrypt:
965
966# store nr and *out, we need them later in the routine
967# store regs r4-r8, we need to restore them at the end of the routine
968    push    {r2-r7, lr}                     @ store nr, *out, low regs, and lr
969    mov     r4, r8
970    push    {r4}                            @ store high reg
971
972# XOR the first 4 keys into the 4 words of plaintext
973    ldmia   r1!, {r4-r7}                    @ load first 4 keys, *key_sch++
974    mov     lr, r1                          @ temporarily store *key_sch[4]
975#   ldmia   r0, {r0-r3}                     @ load plaintext
976    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
977    ldmia   r0!, {r1-r3}                    @ load plaintext and move r0 by 12 byte
978    subs    r0, #1<<4                       @ move r0 back by 16 bytes
979    ldr     r0, [r0]                        @ load the rest of plaintext
980
981    rev     r0, r0
982    rev     r1, r1
983    rev     r2, r2
984    rev     r3, r3
985    eors    r4, r0
986    eors    r5, r1
987    eors    r6, r2
988    eors    r7, r3
989    ldr     r1, =MMCAU_PPB_INDIRECT+(LDR+CA0)<<2
990    stmia   r1!, {r4-r7}                    @ store XOR results in CA[0-3]
991
992# load some of the regs in preperation of the encryption
993    ldr     r0, =encrypt_reg_data
994#   ldmia   r0, {r0-r3}
995    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
996    ldmia   r0!, {r1-r3}                    @ load plaintext and move r0 by 12 byte
997    subs    r0, #1<<4                       @ move r0 back by 16 bytes
998    ldr     r0, [r0]                        @ load the rest of plaintext
999
1000    mov     r8, r1                          @ r8 = mmcau_indirect_cmd(AESC+CA0)
1001    mov     r1, lr                          @ restore r1 = *key_sch[4]
1002    mov     lr, r3                          @ lr = mmcau_2_cmds(AESS+CA3,AESR)
1003
1004# send a series of cau commands to perform the encryption
1005    str     r0, [r2]                        @ SubBytes
1006    str     r3, [r2]                        @ SubBytes, ShiftRows
1007    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1008    mov     r3, r8
1009#   stmia   r3!, {r4-r7}                    @ MixColumns
1010    str     r4, [r3, #0<<2]                 @ MixColumns
1011    str     r5, [r3, #1<<2]
1012    str     r6, [r3, #2<<2]
1013    str     r7, [r3, #3<<2]
1014    adds    r3, #4<<2
1015
1016    str     r0, [r2]                        @ SubBytes
1017    mov     r3, lr
1018    str     r3, [r2]                        @ SubBytes, ShiftRows
1019    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1020    mov     r3, r8
1021#   stmia   r3!, {r4-r7}                    @ MixColumns
1022    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1023    str     r5, [r3, #1<<2]
1024    str     r6, [r3, #2<<2]
1025    str     r7, [r3, #3<<2]
1026    adds    r3, #4<<2
1027
1028    str     r0, [r2]                        @ SubBytes
1029    mov     r3, lr
1030    str     r3, [r2]                        @ SubBytes, ShiftRows
1031    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1032    mov     r3, r8
1033#   stmia   r3!, {r4-r7}                    @ MixColumns
1034    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1035    str     r5, [r3, #1<<2]
1036    str     r6, [r3, #2<<2]
1037    str     r7, [r3, #3<<2]
1038    adds    r3, #4<<2
1039
1040    str     r0, [r2]                        @ SubBytes
1041    mov     r3, lr
1042    str     r3, [r2]                        @ SubBytes, ShiftRows
1043    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1044    mov     r3, r8
1045#   stmia   r3!, {r4-r7}                    @ MixColumns
1046    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1047    str     r5, [r3, #1<<2]
1048    str     r6, [r3, #2<<2]
1049    str     r7, [r3, #3<<2]
1050    adds    r3, #4<<2
1051
1052    str     r0, [r2]                        @ SubBytes
1053    mov     r3, lr
1054    str     r3, [r2]                        @ SubBytes, ShiftRows
1055    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1056    mov     r3, r8
1057#   stmia   r3!, {r4-r7}                    @ MixColumns
1058    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1059    str     r5, [r3, #1<<2]
1060    str     r6, [r3, #2<<2]
1061    str     r7, [r3, #3<<2]
1062    adds    r3, #4<<2
1063
1064    str     r0, [r2]                        @ SubBytes
1065    mov     r3, lr
1066    str     r3, [r2]                        @ SubBytes, ShiftRows
1067    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1068    mov     r3, r8
1069#   stmia   r3!, {r4-r7}                    @ MixColumns
1070    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1071    str     r5, [r3, #1<<2]
1072    str     r6, [r3, #2<<2]
1073    str     r7, [r3, #3<<2]
1074    adds    r3, #4<<2
1075
1076    str     r0, [r2]                        @ SubBytes
1077    mov     r3, lr
1078    str     r3, [r2]                        @ SubBytes, ShiftRows
1079    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1080    mov     r3, r8
1081#   stmia   r3!, {r4-r7}                    @ MixColumns
1082    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1083    str     r5, [r3, #1<<2]
1084    str     r6, [r3, #2<<2]
1085    str     r7, [r3, #3<<2]
1086    adds    r3, #4<<2
1087
1088    str     r0, [r2]                        @ SubBytes
1089    mov     r3, lr
1090    str     r3, [r2]                        @ SubBytes, ShiftRows
1091    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1092    mov     r3, r8
1093#   stmia   r3!, {r4-r7}                    @ MixColumns
1094    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1095    str     r5, [r3, #1<<2]
1096    str     r6, [r3, #2<<2]
1097    str     r7, [r3, #3<<2]
1098    adds    r3, #4<<2
1099
1100    str     r0, [r2]                        @ SubBytes
1101    mov     r3, lr
1102    str     r3, [r2]                        @ SubBytes, ShiftRows
1103    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1104    mov     r3, r8
1105#   stmia   r3!, {r4-r7}                    @ MixColumns
1106    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1107    str     r5, [r3, #1<<2]
1108    str     r6, [r3, #2<<2]
1109    str     r7, [r3, #3<<2]
1110    adds    r3, #4<<2
1111
1112
1113    ldr     r3, [sp, #1<<2]                 @ load nr
1114    cmp     r3, #10                         @ check nr
1115    beq     encrypt_end                     @ if aes128, end routine
1116                                            @ else, continue on
1117
1118    str     r0, [r2]                        @ SubBytes
1119    mov     r3, lr
1120    str     r3, [r2]                        @ SubBytes, ShiftRows
1121    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1122    mov     r3, r8
1123#   stmia   r3!, {r4-r7}                    @ MixColumns
1124    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1125    str     r5, [r3, #1<<2]
1126    str     r6, [r3, #2<<2]
1127    str     r7, [r3, #3<<2]
1128    adds    r3, #4<<2
1129
1130    str     r0, [r2]                        @ SubBytes
1131    mov     r3, lr
1132    str     r3, [r2]                        @ SubBytes, ShiftRows
1133    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1134    mov     r3, r8
1135 #  stmia   r3!, {r4-r7}                    @ MixColumns
1136    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1137    str     r5, [r3, #1<<2]
1138    str     r6, [r3, #2<<2]
1139    str     r7, [r3, #3<<2]
1140    adds    r3, #4<<2
1141
1142
1143    ldr     r3, [sp, #1<<2]                 @ load nr
1144    cmp     r3, #12                         @ check nr
1145    beq     encrypt_end                     @ if aes192, end routine
1146                                            @ else, continue on
1147
1148    str     r0, [r2]                        @ SubBytes
1149    mov     r3, lr
1150    str     r3, [r2]                        @ SubBytes, ShiftRows
1151    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1152    mov     r3, r8
1153#   stmia   r3!, {r4-r7}                    @ MixColumns
1154    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1155    str     r5, [r3, #1<<2]
1156    str     r6, [r3, #2<<2]
1157    str     r7, [r3, #3<<2]
1158    adds    r3, #4<<2
1159
1160    str     r0, [r2]                        @ SubBytes
1161    mov     r3, lr
1162    str     r3, [r2]                        @ SubBytes, ShiftRows
1163    ldmia   r1!, {r4-r7}                    @ load next 4 keys, *key_sch++
1164    mov     r3, r8
1165#   stmia   r3!, {r4-r7}                    @ MixColumns
1166    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1167    str     r5, [r3, #1<<2]
1168    str     r6, [r3, #2<<2]
1169    str     r7, [r3, #3<<2]
1170    adds    r3, #4<<2
1171
1172
1173encrypt_end:
1174
1175    str     r0, [r2]                        @ SubBytes
1176    mov     r3, lr
1177    str     r3, [r2]                        @ SubBytes, ShiftRows
1178
1179# XOR the last 4 keys with the 4 words of ciphertext
1180    ldr     r0, =MMCAU_PPB_INDIRECT+(STR+CA0)<<2
1181    ldmia   r1!, {r4-r7}                    @ load last 4 keys
1182#   ldmia   r0, {r0-r3}                     @ load ciphertext
1183    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
1184    ldmia   r0!, {r1-r3}                    @ load ciphertext and move r0 by 12 byte
1185    subs    r0, #1<<4                       @ move r0 back by 16 bytes
1186    ldr     r0, [r0]                        @ load the rest of ciphertext
1187    eors    r4, r0
1188    eors    r5, r1
1189    eors    r6, r2
1190    eors    r7, r3
1191    rev     r4, r4
1192    rev     r5, r5
1193    rev     r6, r6
1194    rev     r7, r7
1195    ldr     r1, [sp, #2<<2]                 @ get *out
1196    stmia   r1!, {r4-r7}                    @ store XOR results in out[0-3]
1197
1198    pop     {r4}                            @ restore high reg
1199    mov     r8, r4
1200    add     sp, #2<<2                       @ set sp = *{r4-r7}
1201    pop     {r4-r7, pc}                     @ restore low regs, exit routine
1202
1203
1204# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
1205#
1206#   MMCAU_AES_DECRYPT
1207#   Decrypts a single 16-byte block
1208#
1209#   ARGUMENTS
1210#   *in             pointer to 16-byte block of input chiphertext
1211#   *key_sch        pointer to key schedule (44, 52, 60 longwords)
1212#   nr              number of AES rounds (10, 12, 14 = f(key_schedule))
1213#   *out            pointer to 16-byte block of output plaintext
1214#
1215#
1216#   CALLING CONVENTION
1217#   void mmcau_aes_decrypt     (const unsigned char     *in,
1218#                               const unsigned char     *key_sch,
1219#                               const int               nr,
1220#                               unsigned char           *out)
1221#
1222#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #
1223#
1224#    REGISTER  |  ALLOCATION (at the start of mmcau_aes_decrypt)
1225#   -----------+------------------------------------------------------------
1226#          r0  |  *in           (arg0)
1227#          r1  |  *key_sch      (arg1)
1228#          r2  |  nr            (arg2)
1229#          r3  |  *out          (arg3)
1230#              |
1231#        > r3  |  irrelevant
1232#
1233#
1234#    REGISTER  |  ALLOCATION (throughout mmcau_aes_decrypt)
1235#   -----------+------------------------------------------------------------
1236#          r0  |  mmcau_3_cmds(AESIR,AESIS+CA3,AESIS+CA)
1237#          r1  |  *key_sch
1238#          r2  |  *mmcau_direct_cmd()
1239#          r3  |  scratch
1240#          r4  |  *key_sch[0-4i]
1241#          r5  |  *key_sch[1-4i]
1242#          r6  |  *key_sch[2-4i]
1243#          r7  |  *key_sch[3-4i]
1244#          r8  |  mmcau_indirect_cmd(AESIC+CA0)
1245#          r9  |  not used
1246#    (sl) r10  |  not used
1247#    (fp) r11  |  not used
1248#    (ip) r12  |  not used
1249#    (sp) r13  |  stack pointer
1250#    (lr) r14  |  mmcau_2_cmds(AESIS+CA1,AESIS+CA0)
1251#
1252# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
1253
1254    .global _mmcau_aes_decrypt
1255    .global mmcau_aes_decrypt
1256    .type   mmcau_aes_decrypt, %function
1257    .align  4
1258
1259_mmcau_aes_decrypt:
1260mmcau_aes_decrypt:
1261
1262# store nr and *out, we need them later in the routine
1263# store regs r4-r8, we need to restore them at the end of the routine
1264    push    {r2-r7, lr}                     @ store nr, *out, low regs, and lr
1265    mov     r4, r8
1266    push    {r4}                            @ store high reg
1267
1268# *key_sch is adjusted to define the end of the elements, such that
1269# the adjustment factor = f(nr) is defined by the expression:
1270#   end of key_sch = 4 * (nr + 1), where nr = {10, 12, 14}
1271    movs    r3, #28
1272    rors    r2, r3
1273    add     r1, r2                          @ calculate end of key_sch
1274    mov     lr, r1                          @ temporarily store end of key_sch
1275
1276# XOR the last 4 keys into the 4 words of ciphertext
1277    ldmia   r1!, {r4-r7}                    @ load last 4 keys
1278#   ldmia   r0, {r0-r3}                     @ load ciphertext
1279    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
1280    ldmia   r0!, {r1-r3}                    @ load ciphertext and move r0 by 12 byte
1281    subs    r0, #1<<4                       @ move r0 back by 16 bytes
1282    ldr     r0, [r0]                        @ load the rest of ciphertext
1283    rev     r0, r0
1284    rev     r1, r1
1285    rev     r2, r2
1286    rev     r3, r3
1287    eors    r4, r0
1288    eors    r5, r1
1289    eors    r6, r2
1290    eors    r7, r3
1291    ldr     r1, =MMCAU_PPB_INDIRECT+(LDR+CA0)<<2
1292    stmia   r1!, {r4-r7}                    @ store XOR results in CA[0-3]
1293
1294# load some of the regs in preperation of the decryption
1295    ldr     r0, =decrypt_reg_data
1296#   ldmia   r0, {r0-r3}
1297    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
1298    ldmia   r0!, {r1-r3}                    @ load and move r0 by 12 byte
1299    subs    r0, #1<<4                       @ move r0 back by 16 bytes
1300    ldr     r0, [r0]                        @ load the rest
1301
1302    mov     r8, r1                          @ r8 = mmcau_indirect_cmd(AESC+CA0)
1303    mov     r1, lr                          @ restore end of key_sch
1304    subs    r1, #4<<2                       @ *key_sch--
1305    mov     lr, r3                          @ lr = mmcau_2_cmds(AESS+CA3,AESR)
1306
1307# send a series of cau commands to perform the decryption
1308    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1309    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1310    subs    r1, #8<<2                       @ *key_sch--
1311    str     r3, [r2]                        @ InvSubBytes
1312    mov     r3, r8
1313#   stmia   r3!, {r4-r7}                    @ MixColumns
1314    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1315    str     r5, [r3, #1<<2]
1316    str     r6, [r3, #2<<2]
1317    str     r7, [r3, #3<<2]
1318    adds    r3, #4<<2
1319
1320    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1321    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1322    mov     r3, lr
1323    subs    r1, #8<<2                       @ *key_sch--
1324    str     r3, [r2]                        @ InvSubBytes
1325    mov     r3, r8
1326#   stmia   r3!, {r4-r7}                    @ MixColumns
1327    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1328    str     r5, [r3, #1<<2]
1329    str     r6, [r3, #2<<2]
1330    str     r7, [r3, #3<<2]
1331    adds    r3, #4<<2
1332
1333    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1334    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1335    mov     r3, lr
1336    subs    r1, #8<<2                       @ *key_sch--
1337    str     r3, [r2]                        @ InvSubBytes
1338    mov     r3, r8
1339#   stmia   r3!, {r4-r7}                    @ MixColumns
1340    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1341    str     r5, [r3, #1<<2]
1342    str     r6, [r3, #2<<2]
1343    str     r7, [r3, #3<<2]
1344    adds    r3, #4<<2
1345
1346    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1347    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1348    mov     r3, lr
1349    subs    r1, #8<<2                       @ *key_sch--
1350    str     r3, [r2]                        @ InvSubBytes
1351    mov     r3, r8
1352#   stmia   r3!, {r4-r7}                    @ MixColumns
1353    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1354    str     r5, [r3, #1<<2]
1355    str     r6, [r3, #2<<2]
1356    str     r7, [r3, #3<<2]
1357    adds    r3, #4<<2
1358
1359    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1360    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1361    mov     r3, lr
1362    subs    r1, #8<<2                       @ *key_sch--
1363    str     r3, [r2]                        @ InvSubBytes
1364    mov     r3, r8
1365#   stmia   r3!, {r4-r7}                    @ MixColumns
1366    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1367    str     r5, [r3, #1<<2]
1368    str     r6, [r3, #2<<2]
1369    str     r7, [r3, #3<<2]
1370    adds    r3, #4<<2
1371
1372    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1373    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1374    mov     r3, lr
1375    subs    r1, #8<<2                       @ *key_sch--
1376    str     r3, [r2]                        @ InvSubBytes
1377    mov     r3, r8
1378#   stmia   r3!, {r4-r7}                    @ MixColumns
1379    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1380    str     r5, [r3, #1<<2]
1381    str     r6, [r3, #2<<2]
1382    str     r7, [r3, #3<<2]
1383    adds    r3, #4<<2
1384
1385    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1386    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1387    mov     r3, lr
1388    subs    r1, #8<<2                       @ *key_sch--
1389    str     r3, [r2]                        @ InvSubBytes
1390    mov     r3, r8
1391#   stmia   r3!, {r4-r7}                    @ MixColumns
1392    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1393    str     r5, [r3, #1<<2]
1394    str     r6, [r3, #2<<2]
1395    str     r7, [r3, #3<<2]
1396    adds    r3, #4<<2
1397
1398    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1399    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1400    mov     r3, lr
1401    subs    r1, #8<<2                       @ *key_sch--
1402    str     r3, [r2]                        @ InvSubBytes
1403    mov     r3, r8
1404#   stmia   r3!, {r4-r7}                    @ MixColumns
1405    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1406    str     r5, [r3, #1<<2]
1407    str     r6, [r3, #2<<2]
1408    str     r7, [r3, #3<<2]
1409    adds    r3, #4<<2
1410
1411    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1412    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1413    mov     r3, lr
1414    subs    r1, #8<<2                       @ *key_sch--
1415    str     r3, [r2]                        @ InvSubBytes
1416    mov     r3, r8
1417#   stmia   r3!, {r4-r7}                    @ MixColumns
1418    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1419    str     r5, [r3, #1<<2]
1420    str     r6, [r3, #2<<2]
1421    str     r7, [r3, #3<<2]
1422    adds    r3, #4<<2
1423
1424
1425    ldr     r3, [sp, #1<<2]                 @ restore nr
1426    cmp     r3, #10                         @ check nr
1427    beq     decrypt_end                     @ if aes128, end routine
1428                                            @ else, continue on
1429
1430    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1431    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1432    mov     r3, lr
1433    subs    r1, #8<<2                       @ *key_sch--
1434    str     r3, [r2]                        @ InvSubBytes
1435    mov     r3, r8
1436#   stmia   r3!, {r4-r7}                    @ MixColumns
1437    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1438    str     r5, [r3, #1<<2]
1439    str     r6, [r3, #2<<2]
1440    str     r7, [r3, #3<<2]
1441    adds    r3, #4<<2
1442
1443    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1444    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1445    mov     r3, lr
1446    subs    r1, #8<<2                       @ *key_sch--
1447    str     r3, [r2]                        @ InvSubBytes
1448    mov     r3, r8
1449#   stmia   r3!, {r4-r7}                    @ MixColumns
1450    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1451    str     r5, [r3, #1<<2]
1452    str     r6, [r3, #2<<2]
1453    str     r7, [r3, #3<<2]
1454    adds    r3, #4<<2
1455
1456
1457    ldr     r3, [sp, #1<<2]                 @ restore nr
1458    cmp     r3, #12                         @ check nr
1459    beq     decrypt_end                     @ if aes192, end routine
1460                                            @ else, continue on
1461
1462    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1463    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1464    mov     r3, lr
1465    subs    r1, #8<<2                       @ *key_sch--
1466    str     r3, [r2]                        @ InvSubBytes
1467    mov     r3, r8
1468#   stmia   r3!, {r4-r7}                    @ MixColumns
1469    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1470    str     r5, [r3, #1<<2]
1471    str     r6, [r3, #2<<2]
1472    str     r7, [r3, #3<<2]
1473    adds    r3, #4<<2
1474
1475    ldmia   r1!, {r4-r7}                    @ load previous 4 keys
1476    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1477    mov     r3, lr
1478    subs    r1, #8<<2                       @ *key_sch--
1479    str     r3, [r2]                        @ InvSubBytes
1480    mov     r3, r8
1481#   stmia   r3!, {r4-r7}                    @ MixColumns
1482    str     r4, [r3, #0<<2]                 @ MixColumns without stmia
1483    str     r5, [r3, #1<<2]
1484    str     r6, [r3, #2<<2]
1485    str     r7, [r3, #3<<2]
1486    adds    r3, #4<<2
1487
1488
1489decrypt_end:
1490
1491    str     r0, [r2]                        @ InvShiftRows, InvSubBytes
1492    mov     r3, lr
1493    str     r3, [r2]                        @ InvSubBytes
1494
1495# XOR the first 4 keys with the 4 words of plaintext
1496    ldr     r0, =MMCAU_PPB_INDIRECT+(STR+CA0)<<2
1497    ldmia   r1!, {r4-r7}                    @ load first 4 keys
1498#   ldmia   r0, {r0-r3}                     @ load plaintext
1499    adds    r0, #1<<2                       @ move by 4 byte to make ldmia interuptible
1500    ldmia   r0!, {r1-r3}                    @ load plaintext and move r0 by 12 byte
1501    subs    r0, #1<<4                       @ move r0 back by 16 bytes
1502    ldr     r0, [r0]                        @ load the rest of plaintext
1503    eors    r4, r0
1504    eors    r5, r1
1505    eors    r6, r2
1506    eors    r7, r3
1507    rev     r4, r4
1508    rev     r5, r5
1509    rev     r6, r6
1510    rev     r7, r7
1511    ldr     r1, [sp, #2<<2]                 @ get *out
1512    stmia   r1!, {r4-r7}                    @ store XOR results in out[0-3]
1513
1514    pop     {r4}                            @ restore high reg
1515    mov     r8, r4
1516    add     sp, #2<<2                       @ set sp = *{r4-r7}
1517    pop     {r4-r7, pc}                     @ restore low regs, exit routine
1518
1519
1520# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
1521
1522    .data
1523
1524
1525    .type   set_key_reg_data, %object
1526    .align  4
1527
1528set_key_reg_data:
1529    .word   rcon                            @ r8
1530    .word   MMCAU_1_CMD+(AESS+CAA)<<22      @ r9
1531    .word   MMCAU_PPB_DIRECT                @ sl
1532    .word   MMCAU_PPB_INDIRECT+(LDR+CAA)<<2 @ fp
1533    .word   MMCAU_PPB_INDIRECT+(STR+CAA)<<2 @ ip
1534
1535
1536    .type   encrypt_reg_data, %object
1537    .align  4
1538
1539encrypt_reg_data:
1540    .word   MMCAU_3_CMDS+(AESS+CA0)<<22+(AESS+CA1)<<11+AESS+CA2 @ r0
1541    .word   MMCAU_PPB_INDIRECT+(AESC+CA0)<<2                    @ r8
1542    .word   MMCAU_PPB_DIRECT                                    @ r2
1543    .word   MMCAU_2_CMDS+(AESS+CA3)<<22+(AESR)<<11              @ lr
1544
1545
1546    .type   decrypt_reg_data, %object
1547    .align  4
1548
1549decrypt_reg_data:
1550    .word   MMCAU_3_CMDS+(AESIR)<<22+(AESIS+CA3)<<11+AESIS+CA2  @ r0
1551    .word   MMCAU_PPB_INDIRECT+(AESIC+CA0)<<2                   @ r8
1552    .word   MMCAU_PPB_DIRECT                                    @ r2
1553    .word   MMCAU_2_CMDS+(AESIS+CA1)<<22+(AESIS+CA0)<<11        @ lr
1554
1555
1556    .type   rcon, %object
1557    .align  4
1558
1559rcon:
1560    .word 0x01000000
1561    .word 0x02000000
1562    .word 0x04000000
1563    .word 0x08000000
1564    .word 0x10000000
1565    .word 0x20000000
1566    .word 0x40000000
1567    .word 0x80000000
1568    .word 0x1b000000
1569    .word 0x36000000
1570