1# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 2# 3# Copyright (c) Freescale Semiconductor, Inc 2013. 4# 5# FILE NAME : mmcau_aes_functions.s 6# VERSION : $Id: $ 7# TYPE : Source Cortex-M0+ assembly library code 8# DEPARTMENT : MCG R&D Core and Platforms 9# AUTHOR : Anthony (Teejay) Ciancio 10# AUTHOR EMAIL : teejay.ciancio@freescale.com 11# 12# # # # # # # # # # # # # # # # # # # # # # # # # # # 13# 14# VERSION DATE AUTHOR DESCRIPTION 15# ******* **** ****** *********** 16# 1.0 2013-11 Ciancio initial release, using the ARMv6-M ISA 17# 18# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 19 20 21 .include "cau2_defines.hdr" 22 .syntax unified 23 24 25 .equ MMCAU_PPB_DIRECT, 0xf0005000 26 .equ MMCAU_PPB_INDIRECT, 0xf0005800 27 .equ MMCAU_1_CMD, 0x80000000 28 .equ MMCAU_2_CMDS, 0x80100000 29 .equ MMCAU_3_CMDS, 0x80100200 30 31 32# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 33# 34# MMCAU_AES_SET_KEY 35# Performs an AES key expansion 36# 37# ARGUMENTS 38# *key pointer to input key (128, 192, 256 bits in length) 39# key_size key_size in bits (128, 192, 256) 40# *key_sch pointer to key schedule output (44, 52, 60 longwords) 41# 42# CALLING CONVENTION 43# void mmcau_aes_set_key (const unsigned char *key, 44# const int key_size, 45# unsigned char *key_sch) 46# 47# # # # # # # # # # # # # # # # # # # # # # # # # # # 48# 49# REGISTER | ALLOCATION (at the start of mmcau_aes_set_key) 50# -----------+------------------------------------------------------------ 51# r0 | *key (arg0) 52# r1 | key_size (arg1) 53# r2 | *key_sch (arg2) 54# | 55# > r2 | irrelevant 56# 57# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 58 59 .global _mmcau_aes_set_key 60 .global mmcau_aes_set_key 61 .type mmcau_aes_set_key, %function 62 .align 4 63 64_mmcau_aes_set_key: 65mmcau_aes_set_key: 66 67# store regs r4-r12 and r14, we need to restore them at the end of the routine 68 push {r4-r7, lr} @ store low regs and link reg 69 mov r3, r8 70 mov r4, r9 71 mov r5, sl 72 mov r6, fp 73 mov r7, ip 74 push {r3-r7} @ store high regs 75 76 ldr r3, =set_key_reg_data @ prepare for set_key reg load 77 78 79set_key_check_size: 80 cmp r1, #128 @ if key_size != 128, 81 bne set_key_check_size_again @ then = 192 or 256, so check again 82 b set_key_128 @ else = 128, so do set_key_128 83 84 85set_key_check_size_again: 86 cmp r1, #192 @ if key_size != 192, 87 bne set_key_256 @ then = 256, so do set_key_256 88 b set_key_192 @ else = 192, so do set_key_192 89 .ltorg 90 91 92set_key_256: 93 94# REGISTER | ALLOCATION (throughout set_key_256) 95# -----------+------------------------------------------------------------ 96# r0 | scratch 97# r1 | scratch 98# r2 | *key_sch 99# r3 | key_sch[0+8i] / scratch 100# r4 | key_sch[1+8i] / scratch 101# r5 | key_sch[2+8i] / scratch 102# r6 | key_sch[3+8i] / scratch 103# r7 | scratch 104# r8 | *rcon 105# r9 | mmcau_1_cmd(AESS+CAA) 106# (sl) r10 | *mmcau_direct_cmd() 107# (fp) r11 | mmcau_indirect_cmd(LDR+CAA) 108# (ip) r12 | mmcau_indirect_cmd(STR+CAA) 109# (sp) r13 | stack pointer 110# (lr) r14 | link register 111 112# load some of the regs in preperation of the AES-256 set key calculations 113# ldmia r3, {r3-r7} 114 mov r1, r3 @ store r3 in scratch r1 to be interruptible 115 adds r3, #1<<2 @ move r3 by 4 bytes 116 ldmia r3!, {r4-r7} @ ldmia without r3 117 ldr r3, [r1] @ load to r3 from scratch address 118 mov r8, r3 @ r8 = *rcon 119 mov r9, r4 @ r9 = mmcau_1_cmd(AESS+CAA) 120 mov sl, r5 @ sl = *mmcau_direct_cmd() 121 mov fp, r6 @ fp = mmcau_indirect_cmd(LDR+CAA) 122 mov ip, r7 @ ip = mmcau_indirect_cmd(STR+CAA) 123 124# calculate key_sch[0-4] 125 ldmia r0!, {r3-r7} @ load key[0-4]; *key++ 126 rev r3, r3 @ byterev(key[0]) = key_sch[0] 127 rev r4, r4 @ byterev(key[1]) = key_sch[1] 128 rev r5, r5 @ byterev(key[2]) = key_sch[2] 129 rev r6, r6 @ byterev(key[3]) = key_sch[3] 130 rev r7, r7 @ byterev(key[4]) = key_sch[4] 131 stmia r2!, {r3-r7} @ store key_sch[0-4], key_sch++ 132 133# calculate key_sch[5-7] 134# ldmia r0, {r0-r1,r7} @ load key[5-7] 135 adds r0, #1<<2 @ move by 4 byte, make ldmia interruptible in MMCAU 136 ldmia r0!, {r1,r7} @ load key[6-7] and move r0 by 8 bytes 137 subs r0, #3<<2 @ move r0 back by 12 bytes 138 ldr r0, [r0] @ load key[5] 139 rev r0, r0 @ byterev(key[5]) = key_sch[5] 140 rev r1, r1 @ byterev(key[6]) = key_sch[6] 141 rev r7, r7 @ byterev(key[7]) = key_sch[7] 142 stmia r2!, {r0-r1, r7} @ store key_sch[5-7], key_sch++ 143 144# calculate key_sch[8-11] 145 mov r0, r8 146 ldr r1, [r0] @ load rcon[0] 147 movs r0, #24 148 rors r7, r0 @ ROTL(key_sch[7],8) 149 mov r0, fp 150 str r7, [r0] @ ROTL(key_sch[7]) -> acc 151 mov r7, r9 152 mov r0, sl 153 str r7, [r0] @ AES SubBytes 154 mov r0, ip 155 ldr r7, [r0] @ load CAA 156 eors r1, r7 @ XOR rcon[0] 157 eors r3, r1 @ XOR key_sch[0] = key_sch[8] 158 eors r4, r3 @ XOR key_sch[1] = key_sch[9] 159 eors r5, r4 @ XOR key_sch[2] = key_sch[10] 160 eors r6, r5 @ XOR key_sch[3] = key_sch[11] 161 stmia r2!, {r3-r6} @ store key_sch[8-11], *key_sch++ 162 163# calculate key_sch[12-15] 164 mov r5, fp 165 str r6, [r5] @ ROTL(key_sch[11]) -> acc 166 mov r3, r9 167 mov r4, sl 168 str r3, [r4] @ AES SubBytes 169 mov r7, ip 170 ldr r1, [r7] @ load CAA 171 subs r2, #8<<2 @ set *key_sch[4] 172 ldmia r2!, {r3-r6} @ load key_sch[4-7], *key_sch++ 173 eors r3, r1 @ XOR key_sch[4] = key_sch[12] 174 eors r4, r3 @ XOR key_sch[5] = key_sch[13] 175 eors r5, r4 @ XOR key_sch[6] = key_sch[14] 176 eors r6, r5 @ XOR key_sch[7] = key_sch[15] 177 adds r2, #4<<2 @ set *key_sch[12] 178 stmia r2!, {r3-r6} @ store key_sch[12-15], *key_sch++ 179 180# calculate key_sch[16-19] 181 mov r0, r8 182 ldr r7, [r0, #1<<2] @ load rcon[1] 183 mov r5, fp 184 movs r0, #24 185 mov r3, r9 186 rors r6, r0 @ ROTL(key_sch[15],8) 187 mov r4, sl 188 str r6, [r5] @ ROTL(key_sch[15]) -> acc 189 mov r0, ip 190 str r3, [r4] @ AES SubBytes 191 ldr r1, [r0] @ load CAA 192 eors r1, r7 @ XOR rcon[1] 193 subs r2, #8<<2 @ set *key_sch[8] 194 ldmia r2!, {r3-r6} @ load key_sch[8-11], *key_sch++ 195 eors r3, r1 @ XOR key_sch[8] = key_sch[16] 196 eors r4, r3 @ XOR key_sch[9] = key_sch[17] 197 eors r5, r4 @ XOR key_sch[10] = key_sch[18] 198 eors r6, r5 @ XOR key_sch[11] = key_sch[19] 199 adds r2, #4<<2 @ set *key_sch[16] 200 stmia r2!, {r3-r6} @ store key_sch[16-19], *key_sch++ 201 202# calculate key_sch[20-23] 203 mov r5, fp 204 str r6, [r5] @ ROTL(key_sch[19]) -> acc 205 mov r3, r9 206 mov r4, sl 207 str r3, [r4] @ AES SubBytes 208 mov r7, ip 209 ldr r1, [r7] @ load CAA 210 subs r2, #8<<2 @ set *key_sch[12] 211 ldmia r2!, {r3-r6} @ load key_sch[12-15], *key_sch++ 212 eors r3, r1 @ XOR key_sch[12] = key_sch[20] 213 eors r4, r3 @ XOR key_sch[13] = key_sch[21] 214 eors r5, r4 @ XOR key_sch[14] = key_sch[22] 215 eors r6, r5 @ XOR key_sch[15] = key_sch[23] 216 adds r2, #4<<2 @ set *key_sch[20] 217 stmia r2!, {r3-r6} @ store key_sch[20-23], *key_sch++ 218 219# calculate key_sch[24-27] 220 mov r0, r8 221 ldr r7, [r0, #2<<2] @ load rcon[2] 222 mov r5, fp 223 movs r0, #24 224 mov r3, r9 225 rors r6, r0 @ ROTL(key_sch[23],8) 226 mov r4, sl 227 str r6, [r5] @ ROTL(key_sch[23]) -> acc 228 mov r0, ip 229 str r3, [r4] @ AES SubBytes 230 ldr r1, [r0] @ load CAA 231 eors r1, r7 @ XOR rcon[2] 232 subs r2, #8<<2 @ set *key_sch[16] 233 ldmia r2!, {r3-r6} @ load key_sch[16-19], *key_sch++ 234 eors r3, r1 @ XOR key_sch[16] = key_sch[24] 235 eors r4, r3 @ XOR key_sch[17] = key_sch[25] 236 eors r5, r4 @ XOR key_sch[18] = key_sch[26] 237 eors r6, r5 @ XOR key_sch[19] = key_sch[27] 238 adds r2, #4<<2 @ set *key_sch[24] 239 stmia r2!, {r3-r6} @ store key_sch[24-27], *key_sch++ 240 241# calculate key_sch[28-31] 242 mov r5, fp 243 str r6, [r5] @ ROTL(key_sch[27]) -> acc 244 mov r3, r9 245 mov r4, sl 246 str r3, [r4] @ AES SubBytes 247 mov r7, ip 248 ldr r1, [r7] @ load CAA 249 subs r2, #8<<2 @ set *key_sch[20] 250 ldmia r2!, {r3-r6} @ load key_sch[20-23], *key_sch++ 251 eors r3, r1 @ XOR key_sch[20] = key_sch[28] 252 eors r4, r3 @ XOR key_sch[21] = key_sch[29] 253 eors r5, r4 @ XOR key_sch[22] = key_sch[30] 254 eors r6, r5 @ XOR key_sch[23] = key_sch[31] 255 adds r2, #4<<2 @ set *key_sch[28] 256 stmia r2!, {r3-r6} @ store key_sch[28-31], *key_sch++ 257 258# calculate key_sch[32-35] 259 mov r0, r8 260 ldr r7, [r0, #3<<2] @ load rcon[3] 261 mov r5, fp 262 movs r0, #24 263 mov r3, r9 264 rors r6, r0 @ ROTL(key_sch[31],8) 265 mov r4, sl 266 str r6, [r5] @ ROTL(key_sch[31]) -> acc 267 mov r0, ip 268 str r3, [r4] @ AES SubBytes 269 ldr r1, [r0] @ load CAA 270 eors r1, r7 @ XOR rcon[3] 271 subs r2, #8<<2 @ set *key_sch[24] 272 ldmia r2!, {r3-r6} @ load key_sch[24-27], *key_sch++ 273 eors r3, r1 @ XOR key_sch[24] = key_sch[32] 274 eors r4, r3 @ XOR key_sch[25] = key_sch[33] 275 eors r5, r4 @ XOR key_sch[26] = key_sch[34] 276 eors r6, r5 @ XOR key_sch[27] = key_sch[35] 277 adds r2, #4<<2 @ set *key_sch[32] 278 stmia r2!, {r3-r6} @ store key_sch[32-35], *key_sch++ 279 280# calculate key_sch[36-39] 281 mov r5, fp 282 str r6, [r5] @ ROTL(key_sch[35]) -> acc 283 mov r3, r9 284 mov r4, sl 285 str r3, [r4] @ AES SubBytes 286 mov r7, ip 287 ldr r1, [r7] @ load CAA 288 subs r2, #8<<2 @ set *key_sch[28] 289 ldmia r2!, {r3-r6} @ load key_sch[28-31], *key_sch++ 290 eors r3, r1 @ XOR key_sch[28] = key_sch[36] 291 eors r4, r3 @ XOR key_sch[29] = key_sch[37] 292 eors r5, r4 @ XOR key_sch[30] = key_sch[38] 293 eors r6, r5 @ XOR key_sch[31] = key_sch[39] 294 adds r2, #4<<2 @ set *key_sch[36] 295 stmia r2!, {r3-r6} @ store key_sch[36-39], *key_sch++ 296 297# calculate key_sch[40-43] 298 mov r0, r8 299 ldr r7, [r0, #4<<2] @ load rcon[4] 300 mov r5, fp 301 movs r0, #24 302 mov r3, r9 303 rors r6, r0 @ ROTL(key_sch[39],8) 304 mov r4, sl 305 str r6, [r5] @ ROTL(key_sch[39]) -> acc 306 mov r0, ip 307 str r3, [r4] @ AES SubBytes 308 ldr r1, [r0] @ load CAA 309 eors r1, r7 @ XOR rcon[4] 310 subs r2, #8<<2 @ set *key_sch[32] 311 ldmia r2!, {r3-r6} @ load key_sch[32-35], *key_sch++ 312 eors r3, r1 @ XOR key_sch[32] = key_sch[40] 313 eors r4, r3 @ XOR key_sch[33] = key_sch[41] 314 eors r5, r4 @ XOR key_sch[34] = key_sch[42] 315 eors r6, r5 @ XOR key_sch[35] = key_sch[43] 316 adds r2, #4<<2 @ set *key_sch[40] 317 stmia r2!, {r3-r6} @ store key_sch[40-43], *key_sch++ 318 319# calculate key_sch[44-47] 320 mov r5, fp 321 str r6, [r5] @ ROTL(key_sch[43]) -> acc 322 mov r3, r9 323 mov r4, sl 324 str r3, [r4] @ AES SubBytes 325 mov r7, ip 326 ldr r1, [r7] @ load CAA 327 subs r2, #8<<2 @ set *key_sch[36] 328 ldmia r2!, {r3-r6} @ load key_sch[36-39], *key_sch++ 329 eors r3, r1 @ XOR key_sch[36] = key_sch[44] 330 eors r4, r3 @ XOR key_sch[37] = key_sch[45] 331 eors r5, r4 @ XOR key_sch[38] = key_sch[46] 332 eors r6, r5 @ XOR key_sch[39] = key_sch[47] 333 adds r2, #4<<2 @ set *key_sch[44] 334 stmia r2!, {r3-r6} @ store key_sch[44-47], *key_sch++ 335 336# calculate key_sch[48-51] 337 mov r0, r8 338 ldr r7, [r0, #5<<2] @ load rcon[5] 339 mov r5, fp 340 movs r0, #24 341 mov r3, r9 342 rors r6, r0 @ ROTL(key_sch[47],8) 343 mov r4, sl 344 str r6, [r5] @ ROTL(key_sch[47]) -> acc 345 mov r0, ip 346 str r3, [r4] @ AES SubBytes 347 ldr r1, [r0] @ load CAA 348 eors r1, r7 @ XOR rcon[5] 349 subs r2, #8<<2 @ set *key_sch[40] 350 ldmia r2!, {r3-r6} @ load key_sch[40-43], *key_sch++ 351 eors r3, r1 @ XOR key_sch[40] = key_sch[48] 352 eors r4, r3 @ XOR key_sch[41] = key_sch[49] 353 eors r5, r4 @ XOR key_sch[42] = key_sch[50] 354 eors r6, r5 @ XOR key_sch[43] = key_sch[51] 355 adds r2, #4<<2 @ set *key_sch[48] 356 stmia r2!, {r3-r6} @ store key_sch[48-51], *key_sch++ 357 358# calculate key_sch[52-55] 359 mov r5, fp 360 str r6, [r5] @ ROTL(key_sch[51]) -> acc 361 mov r3, r9 362 mov r4, sl 363 str r3, [r4] @ AES SubBytes 364 mov r7, ip 365 ldr r1, [r7] @ load CAA 366 subs r2, #8<<2 @ set *key_sch[44] 367 ldmia r2!, {r3-r6} @ load key_sch[44-47], *key_sch++ 368 eors r3, r1 @ XOR key_sch[44] = key_sch[52] 369 eors r4, r3 @ XOR key_sch[45] = key_sch[53] 370 eors r5, r4 @ XOR key_sch[46] = key_sch[54] 371 eors r6, r5 @ XOR key_sch[47] = key_sch[55] 372 adds r2, #4<<2 @ set *key_sch[52] 373 stmia r2!, {r3-r6} @ store key_sch[52-55], *key_sch++ 374 375# calculate key_sch[56-59] 376 mov r0, r8 377 ldr r7, [r0, #6<<2] @ load rcon[6] 378 mov r5, fp 379 movs r0, #24 380 mov r3, r9 381 rors r6, r0 @ ROTL(key_sch[55],8) 382 mov r4, sl 383 str r6, [r5] @ ROTL(key_sch[55]) -> acc 384 mov r0, ip 385 str r3, [r4] @ AES SubBytes 386 ldr r1, [r0] @ load CAA 387 eors r1, r7 @ XOR rcon[6] 388 subs r2, #8<<2 @ set *key_sch[48] 389 ldmia r2!, {r3-r6} @ load key_sch[48-51], *key_sch++ 390 eors r3, r1 @ XOR key_sch[48] = key_sch[56] 391 eors r4, r3 @ XOR key_sch[49] = key_sch[57] 392 eors r5, r4 @ XOR key_sch[50] = key_sch[58] 393 eors r6, r5 @ XOR key_sch[51] = key_sch[59] 394 adds r2, #4<<2 @ set *key_sch[56] 395 stmia r2!, {r3-r6} @ store key_sch[56-59], *key_sch++ 396 397 b set_key_end @ end routine 398 399 400set_key_192: 401 402# REGISTER | ALLOCATION (throughout set_key_192) 403# -----------+------------------------------------------------------------ 404# r0 | key_sch[0+6i] 405# r1 | key_sch[1+6i] 406# r2 | *key_sch 407# r3 | key_sch[2+6i] 408# r4 | key_sch[3+6i] 409# r5 | key_sch[4+6i] / rcon[i] 410# r6 | key_sch[5+6i] / scratch 411# r7 | scratch 412# r8 | *rcon 413# r9 | mmcau_1_cmd(AESS+CAA) 414# (sl) r10 | *mmcau_direct_cmd() 415# (fp) r11 | mmcau_indirect_cmd(LDR+CAA) 416# NOTE | mmcau_indirect_cmd(STR+CAA) = mmcau_indirect_cmd(LDR+CAA)+64 417# (ip) r12 | temporary storage for key_sch[4+6i] 418# (sp) r13 | stack pointer 419# (lr) r14 | temporary storage for key_sch[5+6i] 420 421# load some of the regs in preperation of the AES-192 set key calculations 422# ldmia r3, {r3-r6} 423 mov r7, r3 @ make ldmia interuptible in MMCAU by storing r3 addr into scratch r7 424 adds r3, #1<<2 @ move r3 addr by 1 word 425 ldmia r3!, {r4-r6} @ load from r3 (r7 + 1 word) to r4-r6 with writeback 426 ldr r3, [r7] @ load to r3 from addres in scratch (r7) 427 428 mov r8, r3 @ r8 = *rcon 429 mov r9, r4 @ r9 = mmcau_1_cmd(AESS+CAA) 430 mov sl, r5 @ sl = *mmcau_direct_cmd() 431 mov fp, r6 @ fp = mmcau_indirect_cmd(LDR+CAA) 432 433# calculate key_sch[0-5] 434# ldmia r0, {r0-r1, r3-r6} @ load key[0-5] 435 @ make ldmia interuptible in MMCAU 436 adds r0, #1<<2 @ move by 4 byte 437 ldmia r0!, {r1, r3-r6} @ load key[1-5] + move by 20 byte 438 subs r0, #6<<2 @ move back by 24byte 439 ldr r0, [r0] @ load key[0] 440 441 442 rev r0, r0 @ byterev(key[0]) = key_sch[0] 443 rev r1, r1 @ byterev(key[1]) = key_sch[1] 444 rev r3, r3 @ byterev(key[2]) = key_sch[2] 445 rev r4, r4 @ byterev(key[3]) = key_sch[3] 446 rev r5, r5 @ byterev(key[4]) = key_sch[4] 447 rev r6, r6 @ byterev(key[5]) = key_sch[5] 448 stmia r2!, {r0-r1, r3-r6} @ store key_sch[0-5] 449 450# calculate key_sch[6-11] 451 mov ip, r5 @ temporarily store key_sch[4] 452 mov lr, r6 @ temporarily store key_sch[5] 453 mov r7, r8 454 ldr r5, [r7, #0<<2] @ load rcon[0] 455 movs r7, #24 456 rors r6, r7 @ ROTL(key_sch[5],8) 457 mov r7, fp 458 str r6, [r7] @ ROTL(key_sch[5],8) -> acc 459 mov r6, r9 460 mov r7, sl 461 str r6, [r7] @ AES SubBytes 462 mov r6, fp 463 adds r6, #64 464 ldr r7, [r6] @ load CAA 465 eors r7, r5 @ XOR rcon[0] 466 mov r5, ip @ restore key_sch[4] 467 mov r6, lr @ restore key_sch[5] 468 eors r0, r7 @ XOR key_sch[0] = key_sch[6] 469 eors r1, r0 @ XOR key_sch[1] = key_sch[7] 470 eors r3, r1 @ XOR key_sch[2] = key_sch[8] 471 eors r4, r3 @ XOR key_sch[3] = key_sch[9] 472 eors r5, r4 @ XOR key_sch[4] = key_sch[10] 473 eors r6, r5 @ XOR key_sch[5] = key_sch[11] 474 stmia r2!, {r0-r1, r3-r6} @ store key_sch[6-11], *key_sch++ 475 476# calculate key_sch[12-17] 477 mov ip, r5 @ temporarily store key_sch[10] 478 mov lr, r6 @ temporarily store key_sch[11] 479 mov r7, r8 480 ldr r5, [r7, #1<<2] @ load rcon[1] 481 movs r7, #24 482 rors r6, r7 @ ROTL(key_sch[11],8) 483 mov r7, fp 484 str r6, [r7] @ ROTL(key_sch[11],8) -> acc 485 mov r6, r9 486 mov r7, sl 487 str r6, [r7] @ AES SubBytes 488 mov r6, fp 489 adds r6, #64 490 ldr r7, [r6] @ load CAA 491 eors r7, r5 @ XOR rcon[1] 492 mov r5, ip @ restore key_sch[10] 493 mov r6, lr @ restore key_sch[11] 494 eors r0, r7 @ XOR key_sch[6] = key_sch[12] 495 eors r1, r0 @ XOR key_sch[7] = key_sch[13] 496 eors r3, r1 @ XOR key_sch[8] = key_sch[14] 497 eors r4, r3 @ XOR key_sch[9] = key_sch[15] 498 eors r5, r4 @ XOR key_sch[10] = key_sch[16] 499 eors r6, r5 @ XOR key_sch[11] = key_sch[17] 500 stmia r2!, {r0-r1, r3-r6} @ store key_sch[12-17], *key_sch++ 501 502# calculate key_sch[18-23] 503 mov ip, r5 @ temporarily store key_sch[16] 504 mov lr, r6 @ temporarily store key_sch[17] 505 mov r7, r8 506 ldr r5, [r7, #2<<2] @ load rcon[2] 507 movs r7, #24 508 rors r6, r7 @ ROTL(key_sch[17],8) 509 mov r7, fp 510 str r6, [r7] @ ROTL(key_sch[17],8) -> acc 511 mov r6, r9 512 mov r7, sl 513 str r6, [r7] @ AES SubBytes 514 mov r6, fp 515 adds r6, #64 516 ldr r7, [r6] @ load CAA 517 eors r7, r5 @ XOR rcon[2] 518 mov r5, ip @ restore key_sch[16] 519 mov r6, lr @ restore key_sch[17] 520 eors r0, r7 @ XOR key_sch[12] = key_sch[18] 521 eors r1, r0 @ XOR key_sch[13] = key_sch[19] 522 eors r3, r1 @ XOR key_sch[14] = key_sch[20] 523 eors r4, r3 @ XOR key_sch[15] = key_sch[21] 524 eors r5, r4 @ XOR key_sch[16] = key_sch[22] 525 eors r6, r5 @ XOR key_sch[17] = key_sch[23] 526 stmia r2!, {r0-r1, r3-r6} @ store key_sch[18-23], *key_sch++ 527 528# calculate key_sch[24-29] 529 mov ip, r5 @ temporarily store key_sch[22] 530 mov lr, r6 @ temporarily store key_sch[23] 531 mov r7, r8 532 ldr r5, [r7, #3<<2] @ load rcon[3] 533 movs r7, #24 534 rors r6, r7 @ ROTL(key_sch[23],8) 535 mov r7, fp 536 str r6, [r7] @ ROTL(key_sch[23],8) -> acc 537 mov r6, r9 538 mov r7, sl 539 str r6, [r7] @ AES SubBytes 540 mov r6, fp 541 adds r6, #64 542 ldr r7, [r6] @ load CAA 543 eors r7, r5 @ XOR rcon[3] 544 mov r5, ip @ restore key_sch[22] 545 mov r6, lr @ restore key_sch[23] 546 eors r0, r7 @ XOR key_sch[18] = key_sch[24] 547 eors r1, r0 @ XOR key_sch[19] = key_sch[25] 548 eors r3, r1 @ XOR key_sch[20] = key_sch[26] 549 eors r4, r3 @ XOR key_sch[21] = key_sch[27] 550 eors r5, r4 @ XOR key_sch[22] = key_sch[28] 551 eors r6, r5 @ XOR key_sch[23] = key_sch[29] 552 stmia r2!, {r0-r1, r3-r6} @ store key_sch[24-29], *key_sch++ 553 554# calculate key_sch[30-35] 555 mov ip, r5 @ temporarily store key_sch[28] 556 mov lr, r6 @ temporarily store key_sch[29] 557 mov r7, r8 558 ldr r5, [r7, #4<<2] @ load rcon[4] 559 movs r7, #24 560 rors r6, r7 @ ROTL(key_sch[29],8) 561 mov r7, fp 562 str r6, [r7] @ ROTL(key_sch[29],8) -> acc 563 mov r6, r9 564 mov r7, sl 565 str r6, [r7] @ AES SubBytes 566 mov r6, fp 567 adds r6, #64 568 ldr r7, [r6] @ load CAA 569 eors r7, r5 @ XOR rcon[4] 570 mov r5, ip @ restore key_sch[28] 571 mov r6, lr @ restore key_sch[29] 572 eors r0, r7 @ XOR key_sch[24] = key_sch[30] 573 eors r1, r0 @ XOR key_sch[25] = key_sch[31] 574 eors r3, r1 @ XOR key_sch[26] = key_sch[32] 575 eors r4, r3 @ XOR key_sch[27] = key_sch[33] 576 eors r5, r4 @ XOR key_sch[28] = key_sch[34] 577 eors r6, r5 @ XOR key_sch[29] = key_sch[35] 578 stmia r2!, {r0-r1, r3-r6} @ store key_sch[30-35], *key_sch++ 579 580# calculate key_sch[36-41] 581 mov ip, r5 @ temporarily store key_sch[34] 582 mov lr, r6 @ temporarily store key_sch[35] 583 mov r7, r8 584 ldr r5, [r7, #5<<2] @ load rcon[5] 585 movs r7, #24 586 rors r6, r7 @ ROTL(key_sch[35],8) 587 mov r7, fp 588 str r6, [r7] @ ROTL(key_sch[35],8) -> acc 589 mov r6, r9 590 mov r7, sl 591 str r6, [r7] @ AES SubBytes 592 mov r6, fp 593 adds r6, #64 594 ldr r7, [r6] @ load CAA 595 eors r7, r5 @ XOR rcon[5] 596 mov r5, ip @ restore key_sch[34] 597 mov r6, lr @ restore key_sch[35] 598 eors r0, r7 @ XOR key_sch[30] = key_sch[36] 599 eors r1, r0 @ XOR key_sch[31] = key_sch[37] 600 eors r3, r1 @ XOR key_sch[32] = key_sch[38] 601 eors r4, r3 @ XOR key_sch[33] = key_sch[39] 602 eors r5, r4 @ XOR key_sch[34] = key_sch[40] 603 eors r6, r5 @ XOR key_sch[35] = key_sch[41] 604 stmia r2!, {r0-r1, r3-r6} @ store key_sch[35-41], *key_sch++ 605 606# calculate key_sch[42-47] 607 mov ip, r5 @ temporarily store key_sch[40] 608 mov lr, r6 @ temporarily store key_sch[41] 609 mov r7, r8 610 ldr r5, [r7, #6<<2] @ load rcon[6] 611 movs r7, #24 612 rors r6, r7 @ ROTL(key_sch[41],8) 613 mov r7, fp 614 str r6, [r7] @ ROTL(key_sch[41],8) -> acc 615 mov r6, r9 616 mov r7, sl 617 str r6, [r7] @ AES SubBytes 618 mov r6, fp 619 adds r6, #64 620 ldr r7, [r6] @ load CAA 621 eors r7, r5 @ XOR rcon[6] 622 mov r5, ip @ restore key_sch[40] 623 mov r6, lr @ restore key_sch[41] 624 eors r0, r7 @ XOR key_sch[36] = key_sch[42] 625 eors r1, r0 @ XOR key_sch[37] = key_sch[43] 626 eors r3, r1 @ XOR key_sch[38] = key_sch[44] 627 eors r4, r3 @ XOR key_sch[39] = key_sch[45] 628 eors r5, r4 @ XOR key_sch[40] = key_sch[46] 629 eors r6, r5 @ XOR key_sch[41] = key_sch[47] 630 stmia r2!, {r0-r1, r3-r6} @ store key_sch[42-47], *key_sch++ 631 632# calculate key_sch[48-51] 633 mov r7, r8 634 ldr r5, [r7, #7<<2] @ load rcon[7] 635 movs r7, #24 636 rors r6, r7 @ ROTL(key_sch[47],8) 637 mov r7, fp 638 str r6, [r7] @ ROTL(key_sch[47],8) -> acc 639 mov r6, r9 640 mov r7, sl 641 str r6, [r7] @ AES SubBytes 642 mov r6, fp 643 adds r6, #64 644 ldr r7, [r6] @ load CAA 645 eors r7, r5 @ XOR rcon[7] 646 eors r0, r7 @ XOR key_sch[42] = key_sch[48] 647 eors r1, r0 @ XOR key_sch[43] = key_sch[49] 648 eors r3, r1 @ XOR key_sch[44] = key_sch[50] 649 eors r4, r3 @ XOR key_sch[45] = key_sch[51] 650 stmia r2!, {r0-r1, r3-r4} @ store key_sch[48-51], *key_sch++ 651 652 b set_key_end @ end routine 653 654 655set_key_128: 656 657# REGISTER | ALLOCATION (throughout set_key_128) 658# -----------+------------------------------------------------------------ 659# r0 | rcon[i] 660# r1 | scratch 661# r2 | *key_sch 662# r3 | key_sch[0+4i] 663# r4 | key_sch[1+4i] 664# r5 | key_sch[2+4i] 665# r6 | key_sch[3+4i] 666# r7 | scratch 667# r8 | *rcon 668# r9 | mmcau_1_cmd(AESS+CAA) 669# (sl) r10 | *mmcau_direct_cmd() 670# (fp) r11 | mmcau_indirect_cmd(LDR+CAA) 671# (ip) r12 | mmcau_indirect_cmd(STR+CAA) 672# (sp) r13 | stack pointer 673# (lr) r14 | link register 674 675# load some of the regs in preperation of the AES-128 set key calculations 676# ldmia r3, {r3-r7} 677 mov r1, r3 @ store r3 in r1 scratch to make ldmia interruptible 678 adds r3, #1<<2 @ move r3 by 4 bytes 679 ldmia r3!, {r4-r7} @ ldmia without r3 680 ldr r3, [r1] @ load to r3 from r1 scratch address 681 682 mov r8, r3 @ r8 = *rcon 683 mov r9, r4 @ r9 = mmcau_1_cmd(AESS+CAA) 684 mov sl, r5 @ sl = *mmcau_direct_cmd() 685 mov fp, r6 @ fp = mmcau_indirect_cmd(LDR+CAA) 686 mov ip, r7 @ ip = mmcau_indirect_cmd(STR+CAA) 687 688# calculate key_sch[0-3] 689 ldmia r0!, {r3-r6} @ load key[0-3] 690 rev r3, r3 @ byterev(key[0]) = key_sch[0] 691 rev r4, r4 @ byterev(key[1]) = key_sch[1] 692 rev r5, r5 @ byterev(key[2]) = key_sch[2] 693 rev r6, r6 @ byterev(key[3]) = key_sch[3] 694 stmia r2!, {r3-r6} @ store key_sch[0-3], *key_sch++ 695 696# calculate key_sch[4-7] 697 mov r7, r8 698 ldr r0, [r7, #0<<2] @ load rcon[0] 699 movs r7, #24 700 mov r1, r6 @ copy key_sch[3] 701 rors r1, r7 @ ROTL(key_sch[3],8) 702 mov r7, fp 703 str r1, [r7] @ ROTL(key_sch[3],8) -> acc 704 mov r1, r9 705 mov r7, sl 706 str r1, [r7] @ AES SubBytes 707 mov r1, ip 708 ldr r7, [r1] @ load CAA 709 eors r7, r0 @ XOR rcon[0] 710 eors r3, r7 @ XOR key_sch[0] = key_sch[4] 711 eors r4, r3 @ XOR key_sch[1] = key_sch[5] 712 eors r5, r4 @ XOR key_sch[2] = key_sch[6] 713 eors r6, r5 @ XOR key_sch[3] = key_sch[7] 714 stmia r2!, {r3-r6} @ store key_sch[4-7], *key_sch++ 715 716# calculate key_sch[8-11] 717 mov r7, r8 718 ldr r0, [r7, #1<<2] @ load rcon[1] 719 movs r7, #24 720 mov r1, r6 @ copy key_sch[7] 721 rors r1, r7 @ ROTL(key_sch[7],8) 722 mov r7, fp 723 str r1, [r7] @ ROTL(key_sch[7],8) -> acc 724 mov r1, r9 725 mov r7, sl 726 str r1, [r7] @ AES SubBytes 727 mov r1, ip 728 ldr r7, [r1] @ load CAA 729 eors r7, r0 @ XOR rcon[1] 730 eors r3, r7 @ XOR key_sch[4] = key_sch[8] 731 eors r4, r3 @ XOR key_sch[5] = key_sch[9] 732 eors r5, r4 @ XOR key_sch[6] = key_sch[10] 733 eors r6, r5 @ XOR key_sch[7] = key_sch[11] 734 stmia r2!, {r3-r6} @ store key_sch[8-11], *key_sch++ 735 736# calculate key_sch[12-15] 737 mov r7, r8 738 ldr r0, [r7, #2<<2] @ load rcon[2] 739 movs r7, #24 740 mov r1, r6 @ copy key_sch[11] 741 rors r1, r7 @ ROTL(key_sch[11],8) 742 mov r7, fp 743 str r1, [r7] @ ROTL(key_sch[11],8) -> acc 744 mov r1, r9 745 mov r7, sl 746 str r1, [r7] @ AES SubBytes 747 mov r1, ip 748 ldr r7, [r1] @ load CAA 749 eors r7, r0 @ XOR rcon[2] 750 eors r3, r7 @ XOR key_sch[8] = key_sch[12] 751 eors r4, r3 @ XOR key_sch[9] = key_sch[13] 752 eors r5, r4 @ XOR key_sch[10] = key_sch[14] 753 eors r6, r5 @ XOR key_sch[11] = key_sch[15] 754 stmia r2!, {r3-r6} @ store key_sch[12-15], *key_sch++ 755 756# calculate key_sch[16-19] 757 mov r7, r8 758 ldr r0, [r7, #3<<2] @ load rcon[3] 759 movs r7, #24 760 mov r1, r6 @ copy key_sch[15] 761 rors r1, r7 @ ROTL(key_sch[15],8) 762 mov r7, fp 763 str r1, [r7] @ ROTL(key_sch[15],8) -> acc 764 mov r1, r9 765 mov r7, sl 766 str r1, [r7] @ AES SubBytes 767 mov r1, ip 768 ldr r7, [r1] @ load CAA 769 eors r7, r0 @ XOR rcon[3] 770 eors r3, r7 @ XOR key_sch[12] = key_sch[16] 771 eors r4, r3 @ XOR key_sch[13] = key_sch[17] 772 eors r5, r4 @ XOR key_sch[14] = key_sch[18] 773 eors r6, r5 @ XOR key_sch[15] = key_sch[19] 774 stmia r2!, {r3-r6} @ store key_sch[16-19], *key_sch++ 775 776# calculate key_sch[20-23] 777 mov r7, r8 778 ldr r0, [r7, #4<<2] @ load rcon[4] 779 movs r7, #24 780 mov r1, r6 @ copy key_sch[19] 781 rors r1, r7 @ ROTL(key_sch[19],8) 782 mov r7, fp 783 str r1, [r7] @ ROTL(key_sch[19],8) -> acc 784 mov r1, r9 785 mov r7, sl 786 str r1, [r7] @ AES SubBytes 787 mov r1, ip 788 ldr r7, [r1] @ load CAA 789 eors r7, r0 @ XOR rcon[4] 790 eors r3, r7 @ XOR key_sch[16] = key_sch[20] 791 eors r4, r3 @ XOR key_sch[17] = key_sch[21] 792 eors r5, r4 @ XOR key_sch[18] = key_sch[22] 793 eors r6, r5 @ XOR key_sch[19] = key_sch[23] 794 stmia r2!, {r3-r6} @ store key_sch[20-23], *key_sch++ 795 796# calculate key_sch[24-27] 797 mov r7, r8 798 ldr r0, [r7, #5<<2] @ load rcon[5] 799 movs r7, #24 800 mov r1, r6 @ copy key_sch[23] 801 rors r1, r7 @ ROTL(key_sch[23],8) 802 mov r7, fp 803 str r1, [r7] @ ROTL(key_sch[23],8) -> acc 804 mov r1, r9 805 mov r7, sl 806 str r1, [r7] @ AES SubBytes 807 mov r1, ip 808 ldr r7, [r1] @ load CAA 809 eors r7, r0 @ XOR rcon[5] 810 eors r3, r7 @ XOR key_sch[20] = key_sch[24] 811 eors r4, r3 @ XOR key_sch[21] = key_sch[25] 812 eors r5, r4 @ XOR key_sch[22] = key_sch[26] 813 eors r6, r5 @ XOR key_sch[23] = key_sch[27] 814 stmia r2!, {r3-r6} @ store key_sch[24-27], *key_sch++ 815 816# calculate key_sch[28-31] 817 mov r7, r8 818 ldr r0, [r7, #6<<2] @ load rcon[6] 819 movs r7, #24 820 mov r1, r6 @ copy key_sch[27] 821 rors r1, r7 @ ROTL(key_sch[27],8) 822 mov r7, fp 823 str r1, [r7] @ ROTL(key_sch[27],8) -> acc 824 mov r1, r9 825 mov r7, sl 826 str r1, [r7] @ AES SubBytes 827 mov r1, ip 828 ldr r7, [r1] @ load CAA 829 eors r7, r0 @ XOR rcon[6] 830 eors r3, r7 @ XOR key_sch[24] = key_sch[28] 831 eors r4, r3 @ XOR key_sch[25] = key_sch[29] 832 eors r5, r4 @ XOR key_sch[26] = key_sch[30] 833 eors r6, r5 @ XOR key_sch[27] = key_sch[31] 834 stmia r2!, {r3-r6} @ store key_sch[28-31], *key_sch++ 835 836# calculate key_sch[32-35] 837 mov r7, r8 838 ldr r0, [r7, #7<<2] @ load rcon[7] 839 movs r7, #24 840 mov r1, r6 @ copy key_sch[31] 841 rors r1, r7 @ ROTL(key_sch[31],8) 842 mov r7, fp 843 str r1, [r7] @ ROTL(key_sch[31],8) -> acc 844 mov r1, r9 845 mov r7, sl 846 str r1, [r7] @ AES SubBytes 847 mov r1, ip 848 ldr r7, [r1] @ load CAA 849 eors r7, r0 @ XOR rcon[7] 850 eors r3, r7 @ XOR key_sch[28] = key_sch[32] 851 eors r4, r3 @ XOR key_sch[29] = key_sch[33] 852 eors r5, r4 @ XOR key_sch[30] = key_sch[34] 853 eors r6, r5 @ XOR key_sch[31] = key_sch[35] 854 stmia r2!, {r3-r6} @ store key_sch[32-35], *key_sch++ 855 856# calculate key_sch[36-39] 857 mov r7, r8 858 ldr r0, [r7, #8<<2] @ load rcon[8] 859 movs r7, #24 860 mov r1, r6 @ copy key_sch[35] 861 rors r1, r7 @ ROTL(key_sch[35],8) 862 mov r7, fp 863 str r1, [r7] @ ROTL(key_sch[35],8) -> acc 864 mov r1, r9 865 mov r7, sl 866 str r1, [r7] @ AES SubBytes 867 mov r1, ip 868 ldr r7, [r1] @ load CAA 869 eors r7, r0 @ XOR rcon[8] 870 eors r3, r7 @ XOR key_sch[32] = key_sch[36] 871 eors r4, r3 @ XOR key_sch[33] = key_sch[37] 872 eors r5, r4 @ XOR key_sch[34] = key_sch[38] 873 eors r6, r5 @ XOR key_sch[35] = key_sch[39] 874 stmia r2!, {r3-r6} @ store key_sch[36-39], *key_sch++ 875 876# calculate key_sch[40-43] 877 mov r7, r8 878 ldr r0, [r7, #9<<2] @ load rcon[9] 879 movs r7, #24 880 mov r1, r6 @ copy key_sch[39] 881 rors r1, r7 @ ROTL(key_sch[39],8) 882 mov r7, fp 883 str r1, [r7] @ ROTL(key_sch[39],8) -> acc 884 mov r1, r9 885 mov r7, sl 886 str r1, [r7] @ AES SubBytes 887 mov r1, ip 888 ldr r7, [r1] @ load CAA 889 eors r7, r0 @ XOR rcon[9] 890 eors r3, r7 @ XOR key_sch[36] = key_sch[40] 891 eors r4, r3 @ XOR key_sch[37] = key_sch[41] 892 eors r5, r4 @ XOR key_sch[38] = key_sch[42] 893 eors r6, r5 @ XOR key_sch[39] = key_sch[43] 894 stmia r2!, {r3-r6} @ store key_sch[40-43], *key_sch++ 895 896 897set_key_end: 898 899 pop {r3-r7} @ restore high regs 900 mov r8, r3 901 mov r9, r4 902 mov sl, r5 903 mov fp, r6 904 mov ip, r7 905 pop {r4-r7, pc} @ restore low regs, exit routine 906 907 908# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 909# 910# MMCAU_AES_ENCRYPT 911# Encrypts a single 16-byte block 912# 913# ARGUMENTS 914# *in pointer to 16-byte block of input plaintext 915# *key_sch pointer to key schedule (44, 52, 60 longwords) 916# nr number of AES rounds (10, 12, 14 = f(key_schedule)) 917# *out pointer to 16-byte block of output ciphertext 918# 919# 920# CALLING CONVENTION 921# void mmcau_aes_encrypt (const unsigned char *in, 922# const unsigned char *key_sch, 923# const int nr, 924# unsigned char *out) 925# 926# # # # # # # # # # # # # # # # # # # # # # # # # # # 927# 928# REGISTER | ALLOCATION (at the start of mmcau_aes_encrypt) 929# -----------+------------------------------------------------------------ 930# r0 | *in (arg0) 931# r1 | *key_sch (arg1) 932# r2 | nr (arg2) 933# r3 | *out (arg3) 934# | 935# > r3 | irrelevant 936# 937# 938# REGISTER | ALLOCATION (throughout mmcau_aes_encrypt) 939# -----------+------------------------------------------------------------ 940# r0 | mmcau_3_cmds(AESS+CA0,AESS+CA1,AESS+CA2) 941# r1 | *key_sch 942# r2 | *mmcau_direct_cmd() 943# r3 | scratch 944# r4 | key_sch[0+4i] 945# r5 | key_sch[1+4i] 946# r6 | key_sch[2+4i] 947# r7 | key_sch[3+4i] 948# r8 | mmcau_indirect_cmd(AESC+CA0) 949# r9 | not used 950# (sl) r10 | not used 951# (fp) r11 | not used 952# (ip) r12 | not used 953# (sp) r13 | stack pointer 954# (lr) r14 | mmcau_2_cmds(AESS+CA3,AESR) 955# 956# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 957 958 .global _mmcau_aes_encrypt 959 .global mmcau_aes_encrypt 960 .type mmcau_aes_encrypt, %function 961 .align 4 962 963_mmcau_aes_encrypt: 964mmcau_aes_encrypt: 965 966# store nr and *out, we need them later in the routine 967# store regs r4-r8, we need to restore them at the end of the routine 968 push {r2-r7, lr} @ store nr, *out, low regs, and lr 969 mov r4, r8 970 push {r4} @ store high reg 971 972# XOR the first 4 keys into the 4 words of plaintext 973 ldmia r1!, {r4-r7} @ load first 4 keys, *key_sch++ 974 mov lr, r1 @ temporarily store *key_sch[4] 975# ldmia r0, {r0-r3} @ load plaintext 976 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 977 ldmia r0!, {r1-r3} @ load plaintext and move r0 by 12 byte 978 subs r0, #1<<4 @ move r0 back by 16 bytes 979 ldr r0, [r0] @ load the rest of plaintext 980 981 rev r0, r0 982 rev r1, r1 983 rev r2, r2 984 rev r3, r3 985 eors r4, r0 986 eors r5, r1 987 eors r6, r2 988 eors r7, r3 989 ldr r1, =MMCAU_PPB_INDIRECT+(LDR+CA0)<<2 990 stmia r1!, {r4-r7} @ store XOR results in CA[0-3] 991 992# load some of the regs in preperation of the encryption 993 ldr r0, =encrypt_reg_data 994# ldmia r0, {r0-r3} 995 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 996 ldmia r0!, {r1-r3} @ load plaintext and move r0 by 12 byte 997 subs r0, #1<<4 @ move r0 back by 16 bytes 998 ldr r0, [r0] @ load the rest of plaintext 999 1000 mov r8, r1 @ r8 = mmcau_indirect_cmd(AESC+CA0) 1001 mov r1, lr @ restore r1 = *key_sch[4] 1002 mov lr, r3 @ lr = mmcau_2_cmds(AESS+CA3,AESR) 1003 1004# send a series of cau commands to perform the encryption 1005 str r0, [r2] @ SubBytes 1006 str r3, [r2] @ SubBytes, ShiftRows 1007 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1008 mov r3, r8 1009# stmia r3!, {r4-r7} @ MixColumns 1010 str r4, [r3, #0<<2] @ MixColumns 1011 str r5, [r3, #1<<2] 1012 str r6, [r3, #2<<2] 1013 str r7, [r3, #3<<2] 1014 adds r3, #4<<2 1015 1016 str r0, [r2] @ SubBytes 1017 mov r3, lr 1018 str r3, [r2] @ SubBytes, ShiftRows 1019 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1020 mov r3, r8 1021# stmia r3!, {r4-r7} @ MixColumns 1022 str r4, [r3, #0<<2] @ MixColumns without stmia 1023 str r5, [r3, #1<<2] 1024 str r6, [r3, #2<<2] 1025 str r7, [r3, #3<<2] 1026 adds r3, #4<<2 1027 1028 str r0, [r2] @ SubBytes 1029 mov r3, lr 1030 str r3, [r2] @ SubBytes, ShiftRows 1031 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1032 mov r3, r8 1033# stmia r3!, {r4-r7} @ MixColumns 1034 str r4, [r3, #0<<2] @ MixColumns without stmia 1035 str r5, [r3, #1<<2] 1036 str r6, [r3, #2<<2] 1037 str r7, [r3, #3<<2] 1038 adds r3, #4<<2 1039 1040 str r0, [r2] @ SubBytes 1041 mov r3, lr 1042 str r3, [r2] @ SubBytes, ShiftRows 1043 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1044 mov r3, r8 1045# stmia r3!, {r4-r7} @ MixColumns 1046 str r4, [r3, #0<<2] @ MixColumns without stmia 1047 str r5, [r3, #1<<2] 1048 str r6, [r3, #2<<2] 1049 str r7, [r3, #3<<2] 1050 adds r3, #4<<2 1051 1052 str r0, [r2] @ SubBytes 1053 mov r3, lr 1054 str r3, [r2] @ SubBytes, ShiftRows 1055 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1056 mov r3, r8 1057# stmia r3!, {r4-r7} @ MixColumns 1058 str r4, [r3, #0<<2] @ MixColumns without stmia 1059 str r5, [r3, #1<<2] 1060 str r6, [r3, #2<<2] 1061 str r7, [r3, #3<<2] 1062 adds r3, #4<<2 1063 1064 str r0, [r2] @ SubBytes 1065 mov r3, lr 1066 str r3, [r2] @ SubBytes, ShiftRows 1067 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1068 mov r3, r8 1069# stmia r3!, {r4-r7} @ MixColumns 1070 str r4, [r3, #0<<2] @ MixColumns without stmia 1071 str r5, [r3, #1<<2] 1072 str r6, [r3, #2<<2] 1073 str r7, [r3, #3<<2] 1074 adds r3, #4<<2 1075 1076 str r0, [r2] @ SubBytes 1077 mov r3, lr 1078 str r3, [r2] @ SubBytes, ShiftRows 1079 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1080 mov r3, r8 1081# stmia r3!, {r4-r7} @ MixColumns 1082 str r4, [r3, #0<<2] @ MixColumns without stmia 1083 str r5, [r3, #1<<2] 1084 str r6, [r3, #2<<2] 1085 str r7, [r3, #3<<2] 1086 adds r3, #4<<2 1087 1088 str r0, [r2] @ SubBytes 1089 mov r3, lr 1090 str r3, [r2] @ SubBytes, ShiftRows 1091 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1092 mov r3, r8 1093# stmia r3!, {r4-r7} @ MixColumns 1094 str r4, [r3, #0<<2] @ MixColumns without stmia 1095 str r5, [r3, #1<<2] 1096 str r6, [r3, #2<<2] 1097 str r7, [r3, #3<<2] 1098 adds r3, #4<<2 1099 1100 str r0, [r2] @ SubBytes 1101 mov r3, lr 1102 str r3, [r2] @ SubBytes, ShiftRows 1103 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1104 mov r3, r8 1105# stmia r3!, {r4-r7} @ MixColumns 1106 str r4, [r3, #0<<2] @ MixColumns without stmia 1107 str r5, [r3, #1<<2] 1108 str r6, [r3, #2<<2] 1109 str r7, [r3, #3<<2] 1110 adds r3, #4<<2 1111 1112 1113 ldr r3, [sp, #1<<2] @ load nr 1114 cmp r3, #10 @ check nr 1115 beq encrypt_end @ if aes128, end routine 1116 @ else, continue on 1117 1118 str r0, [r2] @ SubBytes 1119 mov r3, lr 1120 str r3, [r2] @ SubBytes, ShiftRows 1121 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1122 mov r3, r8 1123# stmia r3!, {r4-r7} @ MixColumns 1124 str r4, [r3, #0<<2] @ MixColumns without stmia 1125 str r5, [r3, #1<<2] 1126 str r6, [r3, #2<<2] 1127 str r7, [r3, #3<<2] 1128 adds r3, #4<<2 1129 1130 str r0, [r2] @ SubBytes 1131 mov r3, lr 1132 str r3, [r2] @ SubBytes, ShiftRows 1133 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1134 mov r3, r8 1135 # stmia r3!, {r4-r7} @ MixColumns 1136 str r4, [r3, #0<<2] @ MixColumns without stmia 1137 str r5, [r3, #1<<2] 1138 str r6, [r3, #2<<2] 1139 str r7, [r3, #3<<2] 1140 adds r3, #4<<2 1141 1142 1143 ldr r3, [sp, #1<<2] @ load nr 1144 cmp r3, #12 @ check nr 1145 beq encrypt_end @ if aes192, end routine 1146 @ else, continue on 1147 1148 str r0, [r2] @ SubBytes 1149 mov r3, lr 1150 str r3, [r2] @ SubBytes, ShiftRows 1151 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1152 mov r3, r8 1153# stmia r3!, {r4-r7} @ MixColumns 1154 str r4, [r3, #0<<2] @ MixColumns without stmia 1155 str r5, [r3, #1<<2] 1156 str r6, [r3, #2<<2] 1157 str r7, [r3, #3<<2] 1158 adds r3, #4<<2 1159 1160 str r0, [r2] @ SubBytes 1161 mov r3, lr 1162 str r3, [r2] @ SubBytes, ShiftRows 1163 ldmia r1!, {r4-r7} @ load next 4 keys, *key_sch++ 1164 mov r3, r8 1165# stmia r3!, {r4-r7} @ MixColumns 1166 str r4, [r3, #0<<2] @ MixColumns without stmia 1167 str r5, [r3, #1<<2] 1168 str r6, [r3, #2<<2] 1169 str r7, [r3, #3<<2] 1170 adds r3, #4<<2 1171 1172 1173encrypt_end: 1174 1175 str r0, [r2] @ SubBytes 1176 mov r3, lr 1177 str r3, [r2] @ SubBytes, ShiftRows 1178 1179# XOR the last 4 keys with the 4 words of ciphertext 1180 ldr r0, =MMCAU_PPB_INDIRECT+(STR+CA0)<<2 1181 ldmia r1!, {r4-r7} @ load last 4 keys 1182# ldmia r0, {r0-r3} @ load ciphertext 1183 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 1184 ldmia r0!, {r1-r3} @ load ciphertext and move r0 by 12 byte 1185 subs r0, #1<<4 @ move r0 back by 16 bytes 1186 ldr r0, [r0] @ load the rest of ciphertext 1187 eors r4, r0 1188 eors r5, r1 1189 eors r6, r2 1190 eors r7, r3 1191 rev r4, r4 1192 rev r5, r5 1193 rev r6, r6 1194 rev r7, r7 1195 ldr r1, [sp, #2<<2] @ get *out 1196 stmia r1!, {r4-r7} @ store XOR results in out[0-3] 1197 1198 pop {r4} @ restore high reg 1199 mov r8, r4 1200 add sp, #2<<2 @ set sp = *{r4-r7} 1201 pop {r4-r7, pc} @ restore low regs, exit routine 1202 1203 1204# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 1205# 1206# MMCAU_AES_DECRYPT 1207# Decrypts a single 16-byte block 1208# 1209# ARGUMENTS 1210# *in pointer to 16-byte block of input chiphertext 1211# *key_sch pointer to key schedule (44, 52, 60 longwords) 1212# nr number of AES rounds (10, 12, 14 = f(key_schedule)) 1213# *out pointer to 16-byte block of output plaintext 1214# 1215# 1216# CALLING CONVENTION 1217# void mmcau_aes_decrypt (const unsigned char *in, 1218# const unsigned char *key_sch, 1219# const int nr, 1220# unsigned char *out) 1221# 1222# # # # # # # # # # # # # # # # # # # # # # # # # # # 1223# 1224# REGISTER | ALLOCATION (at the start of mmcau_aes_decrypt) 1225# -----------+------------------------------------------------------------ 1226# r0 | *in (arg0) 1227# r1 | *key_sch (arg1) 1228# r2 | nr (arg2) 1229# r3 | *out (arg3) 1230# | 1231# > r3 | irrelevant 1232# 1233# 1234# REGISTER | ALLOCATION (throughout mmcau_aes_decrypt) 1235# -----------+------------------------------------------------------------ 1236# r0 | mmcau_3_cmds(AESIR,AESIS+CA3,AESIS+CA) 1237# r1 | *key_sch 1238# r2 | *mmcau_direct_cmd() 1239# r3 | scratch 1240# r4 | *key_sch[0-4i] 1241# r5 | *key_sch[1-4i] 1242# r6 | *key_sch[2-4i] 1243# r7 | *key_sch[3-4i] 1244# r8 | mmcau_indirect_cmd(AESIC+CA0) 1245# r9 | not used 1246# (sl) r10 | not used 1247# (fp) r11 | not used 1248# (ip) r12 | not used 1249# (sp) r13 | stack pointer 1250# (lr) r14 | mmcau_2_cmds(AESIS+CA1,AESIS+CA0) 1251# 1252# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 1253 1254 .global _mmcau_aes_decrypt 1255 .global mmcau_aes_decrypt 1256 .type mmcau_aes_decrypt, %function 1257 .align 4 1258 1259_mmcau_aes_decrypt: 1260mmcau_aes_decrypt: 1261 1262# store nr and *out, we need them later in the routine 1263# store regs r4-r8, we need to restore them at the end of the routine 1264 push {r2-r7, lr} @ store nr, *out, low regs, and lr 1265 mov r4, r8 1266 push {r4} @ store high reg 1267 1268# *key_sch is adjusted to define the end of the elements, such that 1269# the adjustment factor = f(nr) is defined by the expression: 1270# end of key_sch = 4 * (nr + 1), where nr = {10, 12, 14} 1271 movs r3, #28 1272 rors r2, r3 1273 add r1, r2 @ calculate end of key_sch 1274 mov lr, r1 @ temporarily store end of key_sch 1275 1276# XOR the last 4 keys into the 4 words of ciphertext 1277 ldmia r1!, {r4-r7} @ load last 4 keys 1278# ldmia r0, {r0-r3} @ load ciphertext 1279 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 1280 ldmia r0!, {r1-r3} @ load ciphertext and move r0 by 12 byte 1281 subs r0, #1<<4 @ move r0 back by 16 bytes 1282 ldr r0, [r0] @ load the rest of ciphertext 1283 rev r0, r0 1284 rev r1, r1 1285 rev r2, r2 1286 rev r3, r3 1287 eors r4, r0 1288 eors r5, r1 1289 eors r6, r2 1290 eors r7, r3 1291 ldr r1, =MMCAU_PPB_INDIRECT+(LDR+CA0)<<2 1292 stmia r1!, {r4-r7} @ store XOR results in CA[0-3] 1293 1294# load some of the regs in preperation of the decryption 1295 ldr r0, =decrypt_reg_data 1296# ldmia r0, {r0-r3} 1297 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 1298 ldmia r0!, {r1-r3} @ load and move r0 by 12 byte 1299 subs r0, #1<<4 @ move r0 back by 16 bytes 1300 ldr r0, [r0] @ load the rest 1301 1302 mov r8, r1 @ r8 = mmcau_indirect_cmd(AESC+CA0) 1303 mov r1, lr @ restore end of key_sch 1304 subs r1, #4<<2 @ *key_sch-- 1305 mov lr, r3 @ lr = mmcau_2_cmds(AESS+CA3,AESR) 1306 1307# send a series of cau commands to perform the decryption 1308 ldmia r1!, {r4-r7} @ load previous 4 keys 1309 str r0, [r2] @ InvShiftRows, InvSubBytes 1310 subs r1, #8<<2 @ *key_sch-- 1311 str r3, [r2] @ InvSubBytes 1312 mov r3, r8 1313# stmia r3!, {r4-r7} @ MixColumns 1314 str r4, [r3, #0<<2] @ MixColumns without stmia 1315 str r5, [r3, #1<<2] 1316 str r6, [r3, #2<<2] 1317 str r7, [r3, #3<<2] 1318 adds r3, #4<<2 1319 1320 ldmia r1!, {r4-r7} @ load previous 4 keys 1321 str r0, [r2] @ InvShiftRows, InvSubBytes 1322 mov r3, lr 1323 subs r1, #8<<2 @ *key_sch-- 1324 str r3, [r2] @ InvSubBytes 1325 mov r3, r8 1326# stmia r3!, {r4-r7} @ MixColumns 1327 str r4, [r3, #0<<2] @ MixColumns without stmia 1328 str r5, [r3, #1<<2] 1329 str r6, [r3, #2<<2] 1330 str r7, [r3, #3<<2] 1331 adds r3, #4<<2 1332 1333 ldmia r1!, {r4-r7} @ load previous 4 keys 1334 str r0, [r2] @ InvShiftRows, InvSubBytes 1335 mov r3, lr 1336 subs r1, #8<<2 @ *key_sch-- 1337 str r3, [r2] @ InvSubBytes 1338 mov r3, r8 1339# stmia r3!, {r4-r7} @ MixColumns 1340 str r4, [r3, #0<<2] @ MixColumns without stmia 1341 str r5, [r3, #1<<2] 1342 str r6, [r3, #2<<2] 1343 str r7, [r3, #3<<2] 1344 adds r3, #4<<2 1345 1346 ldmia r1!, {r4-r7} @ load previous 4 keys 1347 str r0, [r2] @ InvShiftRows, InvSubBytes 1348 mov r3, lr 1349 subs r1, #8<<2 @ *key_sch-- 1350 str r3, [r2] @ InvSubBytes 1351 mov r3, r8 1352# stmia r3!, {r4-r7} @ MixColumns 1353 str r4, [r3, #0<<2] @ MixColumns without stmia 1354 str r5, [r3, #1<<2] 1355 str r6, [r3, #2<<2] 1356 str r7, [r3, #3<<2] 1357 adds r3, #4<<2 1358 1359 ldmia r1!, {r4-r7} @ load previous 4 keys 1360 str r0, [r2] @ InvShiftRows, InvSubBytes 1361 mov r3, lr 1362 subs r1, #8<<2 @ *key_sch-- 1363 str r3, [r2] @ InvSubBytes 1364 mov r3, r8 1365# stmia r3!, {r4-r7} @ MixColumns 1366 str r4, [r3, #0<<2] @ MixColumns without stmia 1367 str r5, [r3, #1<<2] 1368 str r6, [r3, #2<<2] 1369 str r7, [r3, #3<<2] 1370 adds r3, #4<<2 1371 1372 ldmia r1!, {r4-r7} @ load previous 4 keys 1373 str r0, [r2] @ InvShiftRows, InvSubBytes 1374 mov r3, lr 1375 subs r1, #8<<2 @ *key_sch-- 1376 str r3, [r2] @ InvSubBytes 1377 mov r3, r8 1378# stmia r3!, {r4-r7} @ MixColumns 1379 str r4, [r3, #0<<2] @ MixColumns without stmia 1380 str r5, [r3, #1<<2] 1381 str r6, [r3, #2<<2] 1382 str r7, [r3, #3<<2] 1383 adds r3, #4<<2 1384 1385 ldmia r1!, {r4-r7} @ load previous 4 keys 1386 str r0, [r2] @ InvShiftRows, InvSubBytes 1387 mov r3, lr 1388 subs r1, #8<<2 @ *key_sch-- 1389 str r3, [r2] @ InvSubBytes 1390 mov r3, r8 1391# stmia r3!, {r4-r7} @ MixColumns 1392 str r4, [r3, #0<<2] @ MixColumns without stmia 1393 str r5, [r3, #1<<2] 1394 str r6, [r3, #2<<2] 1395 str r7, [r3, #3<<2] 1396 adds r3, #4<<2 1397 1398 ldmia r1!, {r4-r7} @ load previous 4 keys 1399 str r0, [r2] @ InvShiftRows, InvSubBytes 1400 mov r3, lr 1401 subs r1, #8<<2 @ *key_sch-- 1402 str r3, [r2] @ InvSubBytes 1403 mov r3, r8 1404# stmia r3!, {r4-r7} @ MixColumns 1405 str r4, [r3, #0<<2] @ MixColumns without stmia 1406 str r5, [r3, #1<<2] 1407 str r6, [r3, #2<<2] 1408 str r7, [r3, #3<<2] 1409 adds r3, #4<<2 1410 1411 ldmia r1!, {r4-r7} @ load previous 4 keys 1412 str r0, [r2] @ InvShiftRows, InvSubBytes 1413 mov r3, lr 1414 subs r1, #8<<2 @ *key_sch-- 1415 str r3, [r2] @ InvSubBytes 1416 mov r3, r8 1417# stmia r3!, {r4-r7} @ MixColumns 1418 str r4, [r3, #0<<2] @ MixColumns without stmia 1419 str r5, [r3, #1<<2] 1420 str r6, [r3, #2<<2] 1421 str r7, [r3, #3<<2] 1422 adds r3, #4<<2 1423 1424 1425 ldr r3, [sp, #1<<2] @ restore nr 1426 cmp r3, #10 @ check nr 1427 beq decrypt_end @ if aes128, end routine 1428 @ else, continue on 1429 1430 ldmia r1!, {r4-r7} @ load previous 4 keys 1431 str r0, [r2] @ InvShiftRows, InvSubBytes 1432 mov r3, lr 1433 subs r1, #8<<2 @ *key_sch-- 1434 str r3, [r2] @ InvSubBytes 1435 mov r3, r8 1436# stmia r3!, {r4-r7} @ MixColumns 1437 str r4, [r3, #0<<2] @ MixColumns without stmia 1438 str r5, [r3, #1<<2] 1439 str r6, [r3, #2<<2] 1440 str r7, [r3, #3<<2] 1441 adds r3, #4<<2 1442 1443 ldmia r1!, {r4-r7} @ load previous 4 keys 1444 str r0, [r2] @ InvShiftRows, InvSubBytes 1445 mov r3, lr 1446 subs r1, #8<<2 @ *key_sch-- 1447 str r3, [r2] @ InvSubBytes 1448 mov r3, r8 1449# stmia r3!, {r4-r7} @ MixColumns 1450 str r4, [r3, #0<<2] @ MixColumns without stmia 1451 str r5, [r3, #1<<2] 1452 str r6, [r3, #2<<2] 1453 str r7, [r3, #3<<2] 1454 adds r3, #4<<2 1455 1456 1457 ldr r3, [sp, #1<<2] @ restore nr 1458 cmp r3, #12 @ check nr 1459 beq decrypt_end @ if aes192, end routine 1460 @ else, continue on 1461 1462 ldmia r1!, {r4-r7} @ load previous 4 keys 1463 str r0, [r2] @ InvShiftRows, InvSubBytes 1464 mov r3, lr 1465 subs r1, #8<<2 @ *key_sch-- 1466 str r3, [r2] @ InvSubBytes 1467 mov r3, r8 1468# stmia r3!, {r4-r7} @ MixColumns 1469 str r4, [r3, #0<<2] @ MixColumns without stmia 1470 str r5, [r3, #1<<2] 1471 str r6, [r3, #2<<2] 1472 str r7, [r3, #3<<2] 1473 adds r3, #4<<2 1474 1475 ldmia r1!, {r4-r7} @ load previous 4 keys 1476 str r0, [r2] @ InvShiftRows, InvSubBytes 1477 mov r3, lr 1478 subs r1, #8<<2 @ *key_sch-- 1479 str r3, [r2] @ InvSubBytes 1480 mov r3, r8 1481# stmia r3!, {r4-r7} @ MixColumns 1482 str r4, [r3, #0<<2] @ MixColumns without stmia 1483 str r5, [r3, #1<<2] 1484 str r6, [r3, #2<<2] 1485 str r7, [r3, #3<<2] 1486 adds r3, #4<<2 1487 1488 1489decrypt_end: 1490 1491 str r0, [r2] @ InvShiftRows, InvSubBytes 1492 mov r3, lr 1493 str r3, [r2] @ InvSubBytes 1494 1495# XOR the first 4 keys with the 4 words of plaintext 1496 ldr r0, =MMCAU_PPB_INDIRECT+(STR+CA0)<<2 1497 ldmia r1!, {r4-r7} @ load first 4 keys 1498# ldmia r0, {r0-r3} @ load plaintext 1499 adds r0, #1<<2 @ move by 4 byte to make ldmia interuptible 1500 ldmia r0!, {r1-r3} @ load plaintext and move r0 by 12 byte 1501 subs r0, #1<<4 @ move r0 back by 16 bytes 1502 ldr r0, [r0] @ load the rest of plaintext 1503 eors r4, r0 1504 eors r5, r1 1505 eors r6, r2 1506 eors r7, r3 1507 rev r4, r4 1508 rev r5, r5 1509 rev r6, r6 1510 rev r7, r7 1511 ldr r1, [sp, #2<<2] @ get *out 1512 stmia r1!, {r4-r7} @ store XOR results in out[0-3] 1513 1514 pop {r4} @ restore high reg 1515 mov r8, r4 1516 add sp, #2<<2 @ set sp = *{r4-r7} 1517 pop {r4-r7, pc} @ restore low regs, exit routine 1518 1519 1520# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 1521 1522 .data 1523 1524 1525 .type set_key_reg_data, %object 1526 .align 4 1527 1528set_key_reg_data: 1529 .word rcon @ r8 1530 .word MMCAU_1_CMD+(AESS+CAA)<<22 @ r9 1531 .word MMCAU_PPB_DIRECT @ sl 1532 .word MMCAU_PPB_INDIRECT+(LDR+CAA)<<2 @ fp 1533 .word MMCAU_PPB_INDIRECT+(STR+CAA)<<2 @ ip 1534 1535 1536 .type encrypt_reg_data, %object 1537 .align 4 1538 1539encrypt_reg_data: 1540 .word MMCAU_3_CMDS+(AESS+CA0)<<22+(AESS+CA1)<<11+AESS+CA2 @ r0 1541 .word MMCAU_PPB_INDIRECT+(AESC+CA0)<<2 @ r8 1542 .word MMCAU_PPB_DIRECT @ r2 1543 .word MMCAU_2_CMDS+(AESS+CA3)<<22+(AESR)<<11 @ lr 1544 1545 1546 .type decrypt_reg_data, %object 1547 .align 4 1548 1549decrypt_reg_data: 1550 .word MMCAU_3_CMDS+(AESIR)<<22+(AESIS+CA3)<<11+AESIS+CA2 @ r0 1551 .word MMCAU_PPB_INDIRECT+(AESIC+CA0)<<2 @ r8 1552 .word MMCAU_PPB_DIRECT @ r2 1553 .word MMCAU_2_CMDS+(AESIS+CA1)<<22+(AESIS+CA0)<<11 @ lr 1554 1555 1556 .type rcon, %object 1557 .align 4 1558 1559rcon: 1560 .word 0x01000000 1561 .word 0x02000000 1562 .word 0x04000000 1563 .word 0x08000000 1564 .word 0x10000000 1565 .word 0x20000000 1566 .word 0x40000000 1567 .word 0x80000000 1568 .word 0x1b000000 1569 .word 0x36000000 1570