1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11.extern crypto_ft_tab
12.extern crypto_it_tab
13.extern crypto_fl_tab
14.extern crypto_il_tab
15
16.text
17
18#include <linux/linkage.h>
19#include <asm/asm-offsets.h>
20
21#define R1	%rax
22#define R1E	%eax
23#define R1X	%ax
24#define R1H	%ah
25#define R1L	%al
26#define R2	%rbx
27#define R2E	%ebx
28#define R2X	%bx
29#define R2H	%bh
30#define R2L	%bl
31#define R3	%rcx
32#define R3E	%ecx
33#define R3X	%cx
34#define R3H	%ch
35#define R3L	%cl
36#define R4	%rdx
37#define R4E	%edx
38#define R4X	%dx
39#define R4H	%dh
40#define R4L	%dl
41#define R5	%rsi
42#define R5E	%esi
43#define R6	%rdi
44#define R6E	%edi
45#define R7	%r9	/* don't use %rbp; it breaks stack traces */
46#define R7E	%r9d
47#define R8	%r8
48#define R10	%r10
49#define R11	%r11
50
51#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
52	ENTRY(FUNC);			\
53	movq	r1,r2;			\
54	leaq	KEY+48(r8),r9;		\
55	movq	r10,r11;		\
56	movl	(r7),r5 ## E;		\
57	movl	4(r7),r1 ## E;		\
58	movl	8(r7),r6 ## E;		\
59	movl	12(r7),r7 ## E;		\
60	movl	480(r8),r10 ## E;	\
61	xorl	-48(r9),r5 ## E;	\
62	xorl	-44(r9),r1 ## E;	\
63	xorl	-40(r9),r6 ## E;	\
64	xorl	-36(r9),r7 ## E;	\
65	cmpl	$24,r10 ## E;		\
66	jb	B128;			\
67	leaq	32(r9),r9;		\
68	je	B192;			\
69	leaq	32(r9),r9;
70
71#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
72	movq	r1,r2;			\
73	movl	r5 ## E,(r9);		\
74	movl	r6 ## E,4(r9);		\
75	movl	r7 ## E,8(r9);		\
76	movl	r8 ## E,12(r9);		\
77	ret;				\
78	ENDPROC(FUNC);
79
80#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
81	movzbl	r2 ## H,r5 ## E;	\
82	movzbl	r2 ## L,r6 ## E;	\
83	movl	TAB+1024(,r5,4),r5 ## E;\
84	movw	r4 ## X,r2 ## X;	\
85	movl	TAB(,r6,4),r6 ## E;	\
86	roll	$16,r2 ## E;		\
87	shrl	$16,r4 ## E;		\
88	movzbl	r4 ## L,r7 ## E;	\
89	movzbl	r4 ## H,r4 ## E;	\
90	xorl	OFFSET(r8),ra ## E;	\
91	xorl	OFFSET+4(r8),rb ## E;	\
92	xorl	TAB+3072(,r4,4),r5 ## E;\
93	xorl	TAB+2048(,r7,4),r6 ## E;\
94	movzbl	r1 ## L,r7 ## E;	\
95	movzbl	r1 ## H,r4 ## E;	\
96	movl	TAB+1024(,r4,4),r4 ## E;\
97	movw	r3 ## X,r1 ## X;	\
98	roll	$16,r1 ## E;		\
99	shrl	$16,r3 ## E;		\
100	xorl	TAB(,r7,4),r5 ## E;	\
101	movzbl	r3 ## L,r7 ## E;	\
102	movzbl	r3 ## H,r3 ## E;	\
103	xorl	TAB+3072(,r3,4),r4 ## E;\
104	xorl	TAB+2048(,r7,4),r5 ## E;\
105	movzbl	r1 ## L,r7 ## E;	\
106	movzbl	r1 ## H,r3 ## E;	\
107	shrl	$16,r1 ## E;		\
108	xorl	TAB+3072(,r3,4),r6 ## E;\
109	movl	TAB+2048(,r7,4),r3 ## E;\
110	movzbl	r1 ## L,r7 ## E;	\
111	movzbl	r1 ## H,r1 ## E;	\
112	xorl	TAB+1024(,r1,4),r6 ## E;\
113	xorl	TAB(,r7,4),r3 ## E;	\
114	movzbl	r2 ## H,r1 ## E;	\
115	movzbl	r2 ## L,r7 ## E;	\
116	shrl	$16,r2 ## E;		\
117	xorl	TAB+3072(,r1,4),r3 ## E;\
118	xorl	TAB+2048(,r7,4),r4 ## E;\
119	movzbl	r2 ## H,r1 ## E;	\
120	movzbl	r2 ## L,r2 ## E;	\
121	xorl	OFFSET+8(r8),rc ## E;	\
122	xorl	OFFSET+12(r8),rd ## E;	\
123	xorl	TAB+1024(,r1,4),r3 ## E;\
124	xorl	TAB(,r2,4),r4 ## E;
125
126#define move_regs(r1,r2,r3,r4) \
127	movl	r3 ## E,r1 ## E;	\
128	movl	r4 ## E,r2 ## E;
129
130#define entry(FUNC,KEY,B128,B192) \
131	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
132
133#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
134
135#define encrypt_round(TAB,OFFSET) \
136	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
137	move_regs(R1,R2,R5,R6)
138
139#define encrypt_final(TAB,OFFSET) \
140	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
141
142#define decrypt_round(TAB,OFFSET) \
143	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
144	move_regs(R1,R2,R5,R6)
145
146#define decrypt_final(TAB,OFFSET) \
147	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
148
149/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
150
151	entry(aes_enc_blk,0,.Le128,.Le192)
152	encrypt_round(crypto_ft_tab,-96)
153	encrypt_round(crypto_ft_tab,-80)
154.Le192:	encrypt_round(crypto_ft_tab,-64)
155	encrypt_round(crypto_ft_tab,-48)
156.Le128:	encrypt_round(crypto_ft_tab,-32)
157	encrypt_round(crypto_ft_tab,-16)
158	encrypt_round(crypto_ft_tab,  0)
159	encrypt_round(crypto_ft_tab, 16)
160	encrypt_round(crypto_ft_tab, 32)
161	encrypt_round(crypto_ft_tab, 48)
162	encrypt_round(crypto_ft_tab, 64)
163	encrypt_round(crypto_ft_tab, 80)
164	encrypt_round(crypto_ft_tab, 96)
165	encrypt_final(crypto_fl_tab,112)
166	return(aes_enc_blk)
167
168/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
169
170	entry(aes_dec_blk,240,.Ld128,.Ld192)
171	decrypt_round(crypto_it_tab,-96)
172	decrypt_round(crypto_it_tab,-80)
173.Ld192:	decrypt_round(crypto_it_tab,-64)
174	decrypt_round(crypto_it_tab,-48)
175.Ld128:	decrypt_round(crypto_it_tab,-32)
176	decrypt_round(crypto_it_tab,-16)
177	decrypt_round(crypto_it_tab,  0)
178	decrypt_round(crypto_it_tab, 16)
179	decrypt_round(crypto_it_tab, 32)
180	decrypt_round(crypto_it_tab, 48)
181	decrypt_round(crypto_it_tab, 64)
182	decrypt_round(crypto_it_tab, 80)
183	decrypt_round(crypto_it_tab, 96)
184	decrypt_final(crypto_il_tab,112)
185	return(aes_dec_blk)
186