1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 0
36 #  define HAVE_MEMCPY
37 #  include <string.h>
38 #  if defined( _MSC_VER )
39 #    include <intrin.h>
40 #    pragma intrinsic( memcpy )
41 #  endif
42 #endif
43 
44 
45 #include <stdlib.h>
46 #include <stdint.h>
47 
48 /* define if you have fast 32-bit types on your system */
49 #if ( __CORTEX_M != 0 ) // if Cortex is different from M0/M0+
50 #  define HAVE_UINT_32T
51 #endif
52 
53 /* define if you don't want any tables */
54 #if 1
55 #  define USE_TABLES
56 #endif
57 
58 /*  On Intel Core 2 duo VERSION_1 is faster */
59 
60 /* alternative versions (test for performance on your system) */
61 #if 1
62 #  define VERSION_1
63 #endif
64 
65 #include "aes.h"
66 
67 //#if defined( HAVE_UINT_32T )
68 //  typedef unsigned long uint32_t;
69 //#endif
70 
71 /* functions for finite field multiplication in the AES Galois field    */
72 
73 #define WPOLY   0x011b
74 #define BPOLY     0x1b
75 #define DPOLY   0x008d
76 
77 #define f1(x)   (x)
78 #define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
79 #define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
80 #define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
81                           ^ (((x >> 5) & 4) * WPOLY))
82 #define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
83 
84 #define f3(x)   (f2(x) ^ x)
85 #define f9(x)   (f8(x) ^ x)
86 #define fb(x)   (f8(x) ^ f2(x) ^ x)
87 #define fd(x)   (f8(x) ^ f4(x) ^ x)
88 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
89 
90 #if defined( USE_TABLES )
91 
92 #define sb_data(w) {    /* S Box data values */                            \
93     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
94     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
95     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
96     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
97     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
98     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
99     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
100     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
101     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
102     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
103     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
104     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
105     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
106     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
107     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
108     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
109     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
110     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
111     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
112     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
113     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
114     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
115     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
116     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
117     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
118     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
119     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
120     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
121     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
122     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
123     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
124     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
125 
126 #define isb_data(w) {   /* inverse S Box data values */                    \
127     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
128     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
129     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
130     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
131     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
132     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
133     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
134     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
135     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
136     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
137     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
138     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
139     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
140     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
141     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
142     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
143     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
144     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
145     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
146     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
147     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
148     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
149     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
150     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
151     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
152     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
153     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
154     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
155     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
156     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
157     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
158     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
159 
160 #define mm_data(w) {    /* basic data for forming finite field tables */   \
161     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
162     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
163     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
164     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
165     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
166     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
167     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
168     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
169     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
170     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
171     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
172     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
173     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
174     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
175     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
176     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
177     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
178     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
179     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
180     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
181     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
182     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
183     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
184     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
185     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
186     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
187     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
188     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
189     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
190     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
191     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
192     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
193 
194 static const uint8_t sbox[256]  =  sb_data(f1);
195 
196 #if defined( AES_DEC_PREKEYED )
197 static const uint8_t isbox[256] = isb_data(f1);
198 #endif
199 
200 static const uint8_t gfm2_sbox[256] = sb_data(f2);
201 static const uint8_t gfm3_sbox[256] = sb_data(f3);
202 
203 #if defined( AES_DEC_PREKEYED )
204 static const uint8_t gfmul_9[256] = mm_data(f9);
205 static const uint8_t gfmul_b[256] = mm_data(fb);
206 static const uint8_t gfmul_d[256] = mm_data(fd);
207 static const uint8_t gfmul_e[256] = mm_data(fe);
208 #endif
209 
210 #define s_box(x)     sbox[(x)]
211 #if defined( AES_DEC_PREKEYED )
212 #define is_box(x)    isbox[(x)]
213 #endif
214 #define gfm2_sb(x)   gfm2_sbox[(x)]
215 #define gfm3_sb(x)   gfm3_sbox[(x)]
216 #if defined( AES_DEC_PREKEYED )
217 #define gfm_9(x)     gfmul_9[(x)]
218 #define gfm_b(x)     gfmul_b[(x)]
219 #define gfm_d(x)     gfmul_d[(x)]
220 #define gfm_e(x)     gfmul_e[(x)]
221 #endif
222 #else
223 
224 /* this is the high bit of x right shifted by 1 */
225 /* position. Since the starting polynomial has  */
226 /* 9 bits (0x11b), this right shift keeps the   */
227 /* values of all top bits within a byte         */
228 
hibit(const uint8_t x)229 static uint8_t hibit(const uint8_t x)
230 {   uint8_t r = (uint8_t)((x >> 1) | (x >> 2));
231 
232     r |= (r >> 2);
233     r |= (r >> 4);
234     return (r + 1) >> 1;
235 }
236 
237 /* return the inverse of the finite field element x */
238 
gf_inv(const uint8_t x)239 static uint8_t gf_inv(const uint8_t x)
240 {   uint8_t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
241 
242     if(x < 2)
243         return x;
244 
245     for( ; ; )
246     {
247         if(n1)
248             while(n2 >= n1)             /* divide polynomial p2 by p1    */
249             {
250                 n2 /= n1;               /* shift smaller polynomial left */
251                 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
252                 v2 ^= (v1 * n2);        /* shift accumulated value and   */
253                 n2 = hibit(p2);         /* add into result               */
254             }
255         else
256             return v1;
257 
258         if(n2)                          /* repeat with values swapped    */
259             while(n1 >= n2)
260             {
261                 n1 /= n2;
262                 p1 ^= p2 * n1;
263                 v1 ^= v2 * n1;
264                 n1 = hibit(p1);
265             }
266         else
267             return v2;
268     }
269 }
270 
271 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint8_t x)272 uint8_t fwd_affine(const uint8_t x)
273 {
274 #if defined( HAVE_UINT_32T )
275     uint32_t w = x;
276     w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
277     return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
278 #else
279     return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
280                     ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
281 #endif
282 }
283 
inv_affine(const uint8_t x)284 uint8_t inv_affine(const uint8_t x)
285 {
286 #if defined( HAVE_UINT_32T )
287     uint32_t w = x;
288     w = (w << 1) ^ (w << 3) ^ (w << 6);
289     return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
290 #else
291     return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
292                 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
293 #endif
294 }
295 
296 #define s_box(x)   fwd_affine(gf_inv(x))
297 #define is_box(x)  gf_inv(inv_affine(x))
298 #define gfm2_sb(x) f2(s_box(x))
299 #define gfm3_sb(x) f3(s_box(x))
300 #define gfm_9(x)   f9(x)
301 #define gfm_b(x)   fb(x)
302 #define gfm_d(x)   fd(x)
303 #define gfm_e(x)   fe(x)
304 
305 #endif
306 
307 #if defined( HAVE_MEMCPY )
308 #  define block_copy_nn(d, s, l)    memcpy(d, s, l)
309 #  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
310 #else
311 #  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
312 #  define block_copy(d, s)          copy_block(d, s)
313 #endif
314 
copy_block(void * d,const void * s)315 static void copy_block( void *d, const void *s )
316 {
317 #if defined( HAVE_UINT_32T )
318     ((uint32_t*)d)[ 0] = ((uint32_t*)s)[ 0];
319     ((uint32_t*)d)[ 1] = ((uint32_t*)s)[ 1];
320     ((uint32_t*)d)[ 2] = ((uint32_t*)s)[ 2];
321     ((uint32_t*)d)[ 3] = ((uint32_t*)s)[ 3];
322 #else
323     ((uint8_t*)d)[ 0] = ((uint8_t*)s)[ 0];
324     ((uint8_t*)d)[ 1] = ((uint8_t*)s)[ 1];
325     ((uint8_t*)d)[ 2] = ((uint8_t*)s)[ 2];
326     ((uint8_t*)d)[ 3] = ((uint8_t*)s)[ 3];
327     ((uint8_t*)d)[ 4] = ((uint8_t*)s)[ 4];
328     ((uint8_t*)d)[ 5] = ((uint8_t*)s)[ 5];
329     ((uint8_t*)d)[ 6] = ((uint8_t*)s)[ 6];
330     ((uint8_t*)d)[ 7] = ((uint8_t*)s)[ 7];
331     ((uint8_t*)d)[ 8] = ((uint8_t*)s)[ 8];
332     ((uint8_t*)d)[ 9] = ((uint8_t*)s)[ 9];
333     ((uint8_t*)d)[10] = ((uint8_t*)s)[10];
334     ((uint8_t*)d)[11] = ((uint8_t*)s)[11];
335     ((uint8_t*)d)[12] = ((uint8_t*)s)[12];
336     ((uint8_t*)d)[13] = ((uint8_t*)s)[13];
337     ((uint8_t*)d)[14] = ((uint8_t*)s)[14];
338     ((uint8_t*)d)[15] = ((uint8_t*)s)[15];
339 #endif
340 }
341 
copy_block_nn(uint8_t * d,const uint8_t * s,uint8_t nn)342 static void copy_block_nn( uint8_t * d, const uint8_t *s, uint8_t nn )
343 {
344     while( nn-- )
345         //*((uint8_t*)d)++ = *((uint8_t*)s)++;
346         *d++ = *s++;
347 }
348 
xor_block(void * d,const void * s)349 static void xor_block( void *d, const void *s )
350 {
351 #if defined( HAVE_UINT_32T )
352     ((uint32_t*)d)[ 0] ^= ((uint32_t*)s)[ 0];
353     ((uint32_t*)d)[ 1] ^= ((uint32_t*)s)[ 1];
354     ((uint32_t*)d)[ 2] ^= ((uint32_t*)s)[ 2];
355     ((uint32_t*)d)[ 3] ^= ((uint32_t*)s)[ 3];
356 #else
357     ((uint8_t*)d)[ 0] ^= ((uint8_t*)s)[ 0];
358     ((uint8_t*)d)[ 1] ^= ((uint8_t*)s)[ 1];
359     ((uint8_t*)d)[ 2] ^= ((uint8_t*)s)[ 2];
360     ((uint8_t*)d)[ 3] ^= ((uint8_t*)s)[ 3];
361     ((uint8_t*)d)[ 4] ^= ((uint8_t*)s)[ 4];
362     ((uint8_t*)d)[ 5] ^= ((uint8_t*)s)[ 5];
363     ((uint8_t*)d)[ 6] ^= ((uint8_t*)s)[ 6];
364     ((uint8_t*)d)[ 7] ^= ((uint8_t*)s)[ 7];
365     ((uint8_t*)d)[ 8] ^= ((uint8_t*)s)[ 8];
366     ((uint8_t*)d)[ 9] ^= ((uint8_t*)s)[ 9];
367     ((uint8_t*)d)[10] ^= ((uint8_t*)s)[10];
368     ((uint8_t*)d)[11] ^= ((uint8_t*)s)[11];
369     ((uint8_t*)d)[12] ^= ((uint8_t*)s)[12];
370     ((uint8_t*)d)[13] ^= ((uint8_t*)s)[13];
371     ((uint8_t*)d)[14] ^= ((uint8_t*)s)[14];
372     ((uint8_t*)d)[15] ^= ((uint8_t*)s)[15];
373 #endif
374 }
375 
copy_and_key(void * d,const void * s,const void * k)376 static void copy_and_key( void *d, const void *s, const void *k )
377 {
378 #if defined( HAVE_UINT_32T )
379     ((uint32_t*)d)[ 0] = ((uint32_t*)s)[ 0] ^ ((uint32_t*)k)[ 0];
380     ((uint32_t*)d)[ 1] = ((uint32_t*)s)[ 1] ^ ((uint32_t*)k)[ 1];
381     ((uint32_t*)d)[ 2] = ((uint32_t*)s)[ 2] ^ ((uint32_t*)k)[ 2];
382     ((uint32_t*)d)[ 3] = ((uint32_t*)s)[ 3] ^ ((uint32_t*)k)[ 3];
383 #elif 1
384     ((uint8_t*)d)[ 0] = ((uint8_t*)s)[ 0] ^ ((uint8_t*)k)[ 0];
385     ((uint8_t*)d)[ 1] = ((uint8_t*)s)[ 1] ^ ((uint8_t*)k)[ 1];
386     ((uint8_t*)d)[ 2] = ((uint8_t*)s)[ 2] ^ ((uint8_t*)k)[ 2];
387     ((uint8_t*)d)[ 3] = ((uint8_t*)s)[ 3] ^ ((uint8_t*)k)[ 3];
388     ((uint8_t*)d)[ 4] = ((uint8_t*)s)[ 4] ^ ((uint8_t*)k)[ 4];
389     ((uint8_t*)d)[ 5] = ((uint8_t*)s)[ 5] ^ ((uint8_t*)k)[ 5];
390     ((uint8_t*)d)[ 6] = ((uint8_t*)s)[ 6] ^ ((uint8_t*)k)[ 6];
391     ((uint8_t*)d)[ 7] = ((uint8_t*)s)[ 7] ^ ((uint8_t*)k)[ 7];
392     ((uint8_t*)d)[ 8] = ((uint8_t*)s)[ 8] ^ ((uint8_t*)k)[ 8];
393     ((uint8_t*)d)[ 9] = ((uint8_t*)s)[ 9] ^ ((uint8_t*)k)[ 9];
394     ((uint8_t*)d)[10] = ((uint8_t*)s)[10] ^ ((uint8_t*)k)[10];
395     ((uint8_t*)d)[11] = ((uint8_t*)s)[11] ^ ((uint8_t*)k)[11];
396     ((uint8_t*)d)[12] = ((uint8_t*)s)[12] ^ ((uint8_t*)k)[12];
397     ((uint8_t*)d)[13] = ((uint8_t*)s)[13] ^ ((uint8_t*)k)[13];
398     ((uint8_t*)d)[14] = ((uint8_t*)s)[14] ^ ((uint8_t*)k)[14];
399     ((uint8_t*)d)[15] = ((uint8_t*)s)[15] ^ ((uint8_t*)k)[15];
400 #else
401     block_copy(d, s);
402     xor_block(d, k);
403 #endif
404 }
405 
add_round_key(uint8_t d[N_BLOCK],const uint8_t k[N_BLOCK])406 static void add_round_key( uint8_t d[N_BLOCK], const uint8_t k[N_BLOCK] )
407 {
408     xor_block(d, k);
409 }
410 
shift_sub_rows(uint8_t st[N_BLOCK])411 static void shift_sub_rows( uint8_t st[N_BLOCK] )
412 {   uint8_t tt;
413 
414     st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
415     st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
416 
417     tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
418     st[ 9] = s_box(st[13]); st[13] = s_box( tt );
419 
420     tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
421     tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
422 
423     tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
424     st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
425 }
426 
427 #if defined( AES_DEC_PREKEYED )
428 
inv_shift_sub_rows(uint8_t st[N_BLOCK])429 static void inv_shift_sub_rows( uint8_t st[N_BLOCK] )
430 {   uint8_t tt;
431 
432     st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
433     st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
434 
435     tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
436     st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
437 
438     tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
439     tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
440 
441     tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
442     st[11] = is_box(st[15]); st[15] = is_box( tt );
443 }
444 
445 #endif
446 
447 #if defined( VERSION_1 )
mix_sub_columns(uint8_t dt[N_BLOCK])448   static void mix_sub_columns( uint8_t dt[N_BLOCK] )
449   { uint8_t st[N_BLOCK];
450     block_copy(st, dt);
451 #else
452   static void mix_sub_columns( uint8_t dt[N_BLOCK], uint8_t st[N_BLOCK] )
453   {
454 #endif
455     dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
456     dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
457     dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
458     dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
459 
460     dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
461     dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
462     dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
463     dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
464 
465     dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
466     dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
467     dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
468     dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
469 
470     dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
471     dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
472     dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
473     dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
474   }
475 
476 #if defined( AES_DEC_PREKEYED )
477 
478 #if defined( VERSION_1 )
479   static void inv_mix_sub_columns( uint8_t dt[N_BLOCK] )
480   { uint8_t st[N_BLOCK];
481     block_copy(st, dt);
482 #else
483   static void inv_mix_sub_columns( uint8_t dt[N_BLOCK], uint8_t st[N_BLOCK] )
484   {
485 #endif
486     dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
487     dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
488     dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
489     dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
490 
491     dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
492     dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
493     dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
494     dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
495 
496     dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
497     dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
498     dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
499     dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
500 
501     dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
502     dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
503     dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
504     dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
505   }
506 
507 #endif
508 
509 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
510 
511 /*  Set the cipher key for the pre-keyed version */
512 
513 return_type aes_set_key( const uint8_t key[], length_type keylen, aes_context ctx[1] )
514 {
515     uint8_t cc, rc, hi;
516 
517     switch( keylen )
518     {
519     case 16:
520     case 24:
521     case 32:
522         break;
523     default:
524         ctx->rnd = 0;
525         return ( uint8_t )-1;
526     }
527     block_copy_nn(ctx->ksch, key, keylen);
528     hi = (keylen + 28) << 2;
529     ctx->rnd = (hi >> 4) - 1;
530     for( cc = keylen, rc = 1; cc < hi; cc += 4 )
531     {   uint8_t tt, t0, t1, t2, t3;
532 
533         t0 = ctx->ksch[cc - 4];
534         t1 = ctx->ksch[cc - 3];
535         t2 = ctx->ksch[cc - 2];
536         t3 = ctx->ksch[cc - 1];
537         if( cc % keylen == 0 )
538         {
539             tt = t0;
540             t0 = s_box(t1) ^ rc;
541             t1 = s_box(t2);
542             t2 = s_box(t3);
543             t3 = s_box(tt);
544             rc = f2(rc);
545         }
546         else if( keylen > 24 && cc % keylen == 16 )
547         {
548             t0 = s_box(t0);
549             t1 = s_box(t1);
550             t2 = s_box(t2);
551             t3 = s_box(t3);
552         }
553         tt = cc - keylen;
554         ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
555         ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
556         ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
557         ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
558     }
559     return 0;
560 }
561 
562 #endif
563 
564 #if defined( AES_ENC_PREKEYED )
565 
566 /*  Encrypt a single block of 16 bytes */
567 
568 return_type aes_encrypt( const uint8_t in[N_BLOCK], uint8_t  out[N_BLOCK], const aes_context ctx[1] )
569 {
570     if( ctx->rnd )
571     {
572         uint8_t s1[N_BLOCK], r;
573         copy_and_key( s1, in, ctx->ksch );
574 
575         for( r = 1 ; r < ctx->rnd ; ++r )
576 #if defined( VERSION_1 )
577         {
578             mix_sub_columns( s1 );
579             add_round_key( s1, ctx->ksch + r * N_BLOCK);
580         }
581 #else
582         {   uint8_t s2[N_BLOCK];
583             mix_sub_columns( s2, s1 );
584             copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
585         }
586 #endif
587         shift_sub_rows( s1 );
588         copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
589     }
590     else
591         return ( uint8_t )-1;
592     return 0;
593 }
594 
595 /* CBC encrypt a number of blocks (input and return an IV) */
596 
597 return_type aes_cbc_encrypt( const uint8_t *in, uint8_t *out,
598                          int32_t n_block, uint8_t iv[N_BLOCK], const aes_context ctx[1] )
599 {
600 
601     while(n_block--)
602     {
603         xor_block(iv, in);
604         if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
605             return EXIT_FAILURE;
606         //memcpy(out, iv, N_BLOCK);
607         block_copy(out, iv);
608         in += N_BLOCK;
609         out += N_BLOCK;
610     }
611     return EXIT_SUCCESS;
612 }
613 
614 #endif
615 
616 #if defined( AES_DEC_PREKEYED )
617 
618 /*  Decrypt a single block of 16 bytes */
619 
620 return_type aes_decrypt( const uint8_t in[N_BLOCK], uint8_t out[N_BLOCK], const aes_context ctx[1] )
621 {
622     if( ctx->rnd )
623     {
624         uint8_t s1[N_BLOCK], r;
625         copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
626         inv_shift_sub_rows( s1 );
627 
628         for( r = ctx->rnd ; --r ; )
629 #if defined( VERSION_1 )
630         {
631             add_round_key( s1, ctx->ksch + r * N_BLOCK );
632             inv_mix_sub_columns( s1 );
633         }
634 #else
635         {   uint8_t s2[N_BLOCK];
636             copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
637             inv_mix_sub_columns( s1, s2 );
638         }
639 #endif
640         copy_and_key( out, s1, ctx->ksch );
641     }
642     else
643         return -1;
644     return 0;
645 }
646 
647 /* CBC decrypt a number of blocks (input and return an IV) */
648 
649 return_type aes_cbc_decrypt( const uint8_t *in, uint8_t *out,
650                          int32_t n_block, uint8_t iv[N_BLOCK], const aes_context ctx[1] )
651 {
652     while(n_block--)
653     {   uint8_t tmp[N_BLOCK];
654 
655         //memcpy(tmp, in, N_BLOCK);
656         block_copy(tmp, in);
657         if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
658             return EXIT_FAILURE;
659         xor_block(out, iv);
660         //memcpy(iv, tmp, N_BLOCK);
661         block_copy(iv, tmp);
662         in += N_BLOCK;
663         out += N_BLOCK;
664     }
665     return EXIT_SUCCESS;
666 }
667 
668 #endif
669 
670 #if defined( AES_ENC_128_OTFK )
671 
672 /*  The 'on the fly' encryption key update for for 128 bit keys */
673 
674 static void update_encrypt_key_128( uint8_t k[N_BLOCK], uint8_t *rc )
675 {   uint8_t cc;
676 
677     k[0] ^= s_box(k[13]) ^ *rc;
678     k[1] ^= s_box(k[14]);
679     k[2] ^= s_box(k[15]);
680     k[3] ^= s_box(k[12]);
681     *rc = f2( *rc );
682 
683     for(cc = 4; cc < 16; cc += 4 )
684     {
685         k[cc + 0] ^= k[cc - 4];
686         k[cc + 1] ^= k[cc - 3];
687         k[cc + 2] ^= k[cc - 2];
688         k[cc + 3] ^= k[cc - 1];
689     }
690 }
691 
692 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
693 
694 void aes_encrypt_128( const uint8_t in[N_BLOCK], uint8_t out[N_BLOCK],
695                      const uint8_t key[N_BLOCK], uint8_t o_key[N_BLOCK] )
696 {   uint8_t s1[N_BLOCK], r, rc = 1;
697 
698     if(o_key != key)
699         block_copy( o_key, key );
700     copy_and_key( s1, in, o_key );
701 
702     for( r = 1 ; r < 10 ; ++r )
703 #if defined( VERSION_1 )
704     {
705         mix_sub_columns( s1 );
706         update_encrypt_key_128( o_key, &rc );
707         add_round_key( s1, o_key );
708     }
709 #else
710     {   uint8_t s2[N_BLOCK];
711         mix_sub_columns( s2, s1 );
712         update_encrypt_key_128( o_key, &rc );
713         copy_and_key( s1, s2, o_key );
714     }
715 #endif
716 
717     shift_sub_rows( s1 );
718     update_encrypt_key_128( o_key, &rc );
719     copy_and_key( out, s1, o_key );
720 }
721 
722 #endif
723 
724 #if defined( AES_DEC_128_OTFK )
725 
726 /*  The 'on the fly' decryption key update for for 128 bit keys */
727 
728 static void update_decrypt_key_128( uint8_t k[N_BLOCK], uint8_t *rc )
729 {   uint8_t cc;
730 
731     for( cc = 12; cc > 0; cc -= 4 )
732     {
733         k[cc + 0] ^= k[cc - 4];
734         k[cc + 1] ^= k[cc - 3];
735         k[cc + 2] ^= k[cc - 2];
736         k[cc + 3] ^= k[cc - 1];
737     }
738     *rc = d2(*rc);
739     k[0] ^= s_box(k[13]) ^ *rc;
740     k[1] ^= s_box(k[14]);
741     k[2] ^= s_box(k[15]);
742     k[3] ^= s_box(k[12]);
743 }
744 
745 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
746 
747 void aes_decrypt_128( const uint8_t in[N_BLOCK], uint8_t out[N_BLOCK],
748                       const uint8_t key[N_BLOCK], uint8_t o_key[N_BLOCK] )
749 {
750     uint8_t s1[N_BLOCK], r, rc = 0x6c;
751     if(o_key != key)
752         block_copy( o_key, key );
753 
754     copy_and_key( s1, in, o_key );
755     inv_shift_sub_rows( s1 );
756 
757     for( r = 10 ; --r ; )
758 #if defined( VERSION_1 )
759     {
760         update_decrypt_key_128( o_key, &rc );
761         add_round_key( s1, o_key );
762         inv_mix_sub_columns( s1 );
763     }
764 #else
765     {   uint8_t s2[N_BLOCK];
766         update_decrypt_key_128( o_key, &rc );
767         copy_and_key( s2, s1, o_key );
768         inv_mix_sub_columns( s1, s2 );
769     }
770 #endif
771     update_decrypt_key_128( o_key, &rc );
772     copy_and_key( out, s1, o_key );
773 }
774 
775 #endif
776 
777 #if defined( AES_ENC_256_OTFK )
778 
779 /*  The 'on the fly' encryption key update for for 256 bit keys */
780 
781 static void update_encrypt_key_256( uint8_t k[2 * N_BLOCK], uint8_t *rc )
782 {   uint8_t cc;
783 
784     k[0] ^= s_box(k[29]) ^ *rc;
785     k[1] ^= s_box(k[30]);
786     k[2] ^= s_box(k[31]);
787     k[3] ^= s_box(k[28]);
788     *rc = f2( *rc );
789 
790     for(cc = 4; cc < 16; cc += 4)
791     {
792         k[cc + 0] ^= k[cc - 4];
793         k[cc + 1] ^= k[cc - 3];
794         k[cc + 2] ^= k[cc - 2];
795         k[cc + 3] ^= k[cc - 1];
796     }
797 
798     k[16] ^= s_box(k[12]);
799     k[17] ^= s_box(k[13]);
800     k[18] ^= s_box(k[14]);
801     k[19] ^= s_box(k[15]);
802 
803     for( cc = 20; cc < 32; cc += 4 )
804     {
805         k[cc + 0] ^= k[cc - 4];
806         k[cc + 1] ^= k[cc - 3];
807         k[cc + 2] ^= k[cc - 2];
808         k[cc + 3] ^= k[cc - 1];
809     }
810 }
811 
812 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
813 
814 void aes_encrypt_256( const uint8_t in[N_BLOCK], uint8_t out[N_BLOCK],
815                       const uint8_t key[2 * N_BLOCK], uint8_t o_key[2 * N_BLOCK] )
816 {
817     uint8_t s1[N_BLOCK], r, rc = 1;
818     if(o_key != key)
819     {
820         block_copy( o_key, key );
821         block_copy( o_key + 16, key + 16 );
822     }
823     copy_and_key( s1, in, o_key );
824 
825     for( r = 1 ; r < 14 ; ++r )
826 #if defined( VERSION_1 )
827     {
828         mix_sub_columns(s1);
829         if( r & 1 )
830             add_round_key( s1, o_key + 16 );
831         else
832         {
833             update_encrypt_key_256( o_key, &rc );
834             add_round_key( s1, o_key );
835         }
836     }
837 #else
838     {   uint8_t s2[N_BLOCK];
839         mix_sub_columns( s2, s1 );
840         if( r & 1 )
841             copy_and_key( s1, s2, o_key + 16 );
842         else
843         {
844             update_encrypt_key_256( o_key, &rc );
845             copy_and_key( s1, s2, o_key );
846         }
847     }
848 #endif
849 
850     shift_sub_rows( s1 );
851     update_encrypt_key_256( o_key, &rc );
852     copy_and_key( out, s1, o_key );
853 }
854 
855 #endif
856 
857 #if defined( AES_DEC_256_OTFK )
858 
859 /*  The 'on the fly' encryption key update for for 256 bit keys */
860 
861 static void update_decrypt_key_256( uint8_t k[2 * N_BLOCK], uint8_t *rc )
862 {   uint8_t cc;
863 
864     for(cc = 28; cc > 16; cc -= 4)
865     {
866         k[cc + 0] ^= k[cc - 4];
867         k[cc + 1] ^= k[cc - 3];
868         k[cc + 2] ^= k[cc - 2];
869         k[cc + 3] ^= k[cc - 1];
870     }
871 
872     k[16] ^= s_box(k[12]);
873     k[17] ^= s_box(k[13]);
874     k[18] ^= s_box(k[14]);
875     k[19] ^= s_box(k[15]);
876 
877     for(cc = 12; cc > 0; cc -= 4)
878     {
879         k[cc + 0] ^= k[cc - 4];
880         k[cc + 1] ^= k[cc - 3];
881         k[cc + 2] ^= k[cc - 2];
882         k[cc + 3] ^= k[cc - 1];
883     }
884 
885     *rc = d2(*rc);
886     k[0] ^= s_box(k[29]) ^ *rc;
887     k[1] ^= s_box(k[30]);
888     k[2] ^= s_box(k[31]);
889     k[3] ^= s_box(k[28]);
890 }
891 
892 /*  Decrypt a single block of 16 bytes with 'on the fly'
893     256 bit keying
894 */
895 void aes_decrypt_256( const uint8_t in[N_BLOCK], uint8_t out[N_BLOCK],
896                       const uint8_t key[2 * N_BLOCK], uint8_t o_key[2 * N_BLOCK] )
897 {
898     uint8_t s1[N_BLOCK], r, rc = 0x80;
899 
900     if(o_key != key)
901     {
902         block_copy( o_key, key );
903         block_copy( o_key + 16, key + 16 );
904     }
905 
906     copy_and_key( s1, in, o_key );
907     inv_shift_sub_rows( s1 );
908 
909     for( r = 14 ; --r ; )
910 #if defined( VERSION_1 )
911     {
912         if( ( r & 1 ) )
913         {
914             update_decrypt_key_256( o_key, &rc );
915             add_round_key( s1, o_key + 16 );
916         }
917         else
918             add_round_key( s1, o_key );
919         inv_mix_sub_columns( s1 );
920     }
921 #else
922     {   uint8_t s2[N_BLOCK];
923         if( ( r & 1 ) )
924         {
925             update_decrypt_key_256( o_key, &rc );
926             copy_and_key( s2, s1, o_key + 16 );
927         }
928         else
929             copy_and_key( s2, s1, o_key );
930         inv_mix_sub_columns( s1, s2 );
931     }
932 #endif
933     copy_and_key( out, s1, o_key );
934 }
935 
936 #endif
937