Lines Matching +full:ip +full:- +full:block

1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
12 .arch armv8-a
13 .fpu crypto-neon-fp-armv8
102 vld1.32 {q10-q11}, [ip]!
104 vld1.32 {q12-q13}, [ip]!
106 vld1.32 {q10-q11}, [ip]!
108 vld1.32 {q12-q13}, [ip]!
110 blo 0f @ AES-128: 10 rounds
111 vld1.32 {q10-q11}, [ip]!
113 beq 1f @ AES-192: 12 rounds
114 vld1.32 {q12-q13}, [ip]
124 * Internal, non-AAPCS compliant functions that implement the core AES
125 * transforms. These should preserve all registers except q0 - q2 and ip
127 * q0 : first in/output block
128 * q1 : second in/output block (_4x version only)
129 * q2 : third in/output block (_4x version only)
130 * q3 : fourth in/output block (_4x version only)
139 add ip, r2, #32 @ 3rd round key
146 add ip, r2, #32 @ 3rd round key
152 add ip, r2, #32 @ 3rd round key
158 add ip, r2, #32 @ 3rd round key
163 add ip, \rk, \rounds, lsl #4
164 vld1.32 {q8-q9}, [\rk] @ load first 2 round keys
165 vld1.32 {q14}, [ip] @ load last round key
181 vld1.8 {q0-q1}, [r1]!
182 vld1.8 {q2-q3}, [r1]!
184 vst1.8 {q0-q1}, [r0]!
185 vst1.8 {q2-q3}, [r0]!
207 vld1.8 {q0-q1}, [r1]!
208 vld1.8 {q2-q3}, [r1]!
210 vst1.8 {q0-q1}, [r0]!
211 vst1.8 {q2-q3}, [r0]!
233 push {r4-r6, lr}
238 vld1.8 {q1}, [r1]! @ get next pt block
245 pop {r4-r6, pc}
249 push {r4-r6, lr}
256 vld1.8 {q0-q1}, [r1]!
257 vld1.8 {q2-q3}, [r1]!
268 vst1.8 {q0-q1}, [r0]!
269 vst1.8 {q2-q3}, [r0]!
276 vld1.8 {q0}, [r1]! @ get next ct block
285 pop {r4-r6, pc}
297 push {r4-r6, lr}
300 movw ip, :lower16:.Lcts_permute_table
301 movt ip, :upper16:.Lcts_permute_table
303 add lr, ip, #32
304 add ip, ip, r4
306 vld1.8 {q5}, [ip]
309 add ip, r1, r4
311 vld1.8 {q3}, [ip]
319 vtbl.8 d4, {d0-d1}, d10
320 vtbl.8 d5, {d0-d1}, d11
321 vtbl.8 d2, {d6-d7}, d12
322 vtbl.8 d3, {d6-d7}, d13
331 pop {r4-r6, pc}
335 push {r4-r6, lr}
338 movw ip, :lower16:.Lcts_permute_table
339 movt ip, :upper16:.Lcts_permute_table
341 add lr, ip, #32
342 add ip, ip, r4
344 vld1.8 {q5}, [ip]
347 add ip, r1, r4
349 vld1.8 {q1}, [ip]
356 vtbl.8 d4, {d0-d1}, d10
357 vtbl.8 d5, {d0-d1}, d11
358 vtbx.8 d0, {d2-d3}, d12
359 vtbx.8 d1, {d2-d3}, d13
369 pop {r4-r6, pc}
378 push {r4-r6, lr}
392 rev ip, r6
395 vmov s7, ip
396 rev ip, r6
399 vmov s11, ip
400 rev ip, r6
402 vmov s15, ip
403 vld1.8 {q4-q5}, [r1]!
411 rev ip, r6
412 vst1.8 {q0-q1}, [r0]!
413 vst1.8 {q2-q3}, [r0]!
414 vmov s31, ip
424 rev ip, r6
425 vmov s31, ip
430 bmi .Lctrtailblock @ blocks < 0 means tail block
438 pop {r4-r6, pc}
446 vmov ip, \sreg @ load next word of ctr
447 rev ip, ip @ ... to handle the carry
448 adds ip, ip, #1
449 rev ip, ip
450 vmov \sreg, ip
479 teq r6, #1 @ start of a block?
483 @ be done at the start of a block.
486 add ip, r6, #32 @ 3rd round key of key 2
491 push {r4-r6, lr}
497 teq r6, #0 @ start of a block?
505 vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
506 vld1.8 {q2-q3}, [r1]!
519 vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
520 vst1.8 {q2-q3}, [r0]!
547 pop {r4-r6, pc}
553 movw ip, :lower16:.Lcts_permute_table
554 movt ip, :upper16:.Lcts_permute_table
557 add r4, r4, #16 @ # bytes in final block
558 add lr, ip, #32
559 add ip, ip, r4
561 add r4, r0, r4 @ output address of final block
563 vld1.8 {q1}, [r1] @ load final partial block
564 vld1.8 {q2}, [ip]
567 vtbl.8 d4, {d0-d1}, d4
568 vtbl.8 d5, {d0-d1}, d5
569 vtbx.8 d0, {d2-d3}, d6
570 vtbx.8 d1, {d2-d3}, d7
579 push {r4-r6, lr}
589 teq r6, #0 @ start of a block?
597 vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
598 vld1.8 {q2-q3}, [r1]!
611 vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
612 vst1.8 {q2-q3}, [r0]!
636 pop {r4-r6, pc}
639 movw ip, :lower16:.Lcts_permute_table
640 movt ip, :upper16:.Lcts_permute_table
643 add r4, r4, #16 @ # bytes in final block
644 add lr, ip, #32
645 add ip, ip, r4
647 add r4, r0, r4 @ output address of final block
651 vld1.8 {q1}, [r1] @ load final partial block
652 vld1.8 {q2}, [ip]
659 vtbl.8 d4, {d0-d1}, d4
660 vtbl.8 d5, {d0-d1}, d5
661 vtbx.8 d0, {d2-d3}, d6
662 vtbx.8 d1, {d2-d3}, d7
670 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
683 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns