blob: 6095f41a6816531b6cb20d69664bf8de3774c116 [file] [log] [blame]
Alexandre Lision7fd5d3d2013-12-04 13:06:40 -05001/*
2---------------------------------------------------------------------------
3Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
4
5The redistribution and use of this software (with or without changes)
6is allowed without the payment of fees or royalties provided that:
7
8 source code distributions include the above copyright notice, this
9 list of conditions and the following disclaimer;
10
11 binary distributions include the above copyright notice, this list
12 of conditions and the following disclaimer in their documentation.
13
14This software is provided 'as is' with no explicit or implied warranties
15in respect of its operation, including, but not limited to, correctness
16and fitness for purpose.
17---------------------------------------------------------------------------
18Issue Date: 20/12/2007
19*/
20
21#include "aesopt.h"
22#include "aestab.h"
23
24#if defined(__cplusplus)
25extern "C"
26{
27#endif
28
29#define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
30#define so(y,x,c) word_out(y, c, s(x,c))
31
32#if defined(ARRAYS)
33#define locals(y,x) x[4],y[4]
34#else
35#define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
36#endif
37
38#define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
39 s(y,2) = s(x,2); s(y,3) = s(x,3);
40#define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
41#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
42#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
43
44#if ( FUNCS_IN_C & ENCRYPTION_IN_C )
45
46/* Visual C++ .Net v7.1 provides the fastest encryption code when using
47 Pentium optimiation with small code but this is poor for decryption
48 so we need to control this with the following VC++ pragmas
49*/
50
51#if defined( _MSC_VER ) && !defined( _WIN64 )
52#pragma optimize( "s", on )
53#endif
54
55/* Given the column (c) of the output state variable, the following
56 macros give the input state variables which are needed in its
57 computation for each row (r) of the state. All the alternative
58 macros give the same end values but expand into different ways
59 of calculating these values. In particular the complex macro
60 used for dynamically variable block sizes is designed to expand
61 to a compile time constant whenever possible but will expand to
62 conditional clauses on some branches (I am grateful to Frank
63 Yellin for this construction)
64*/
65
66#define fwd_var(x,r,c)\
67 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
68 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
69 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
70 : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
71
72#if defined(FT4_SET)
73#undef dec_fmvars
74#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
75#elif defined(FT1_SET)
76#undef dec_fmvars
77#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
78#else
79#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
80#endif
81
82#if defined(FL4_SET)
83#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
84#elif defined(FL1_SET)
85#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
86#else
87#define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
88#endif
89
90AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
91{ uint_32t locals(b0, b1);
92 const uint_32t *kp;
93#if defined( dec_fmvars )
94 dec_fmvars; /* declare variables for fwd_mcol() if needed */
95#endif
96
97 if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
98 return EXIT_FAILURE;
99
100 kp = cx->ks;
101 state_in(b0, in, kp);
102
103#if (ENC_UNROLL == FULL)
104
105 switch(cx->inf.b[0])
106 {
107 case 14 * 16:
108 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
109 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
110 kp += 2 * N_COLS;
111 case 12 * 16:
112 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
113 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
114 kp += 2 * N_COLS;
115 case 10 * 16:
116 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
117 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
118 round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
119 round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
120 round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
121 round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
122 round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
123 round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
124 round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
125 round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
126 }
127
128#else
129
130#if (ENC_UNROLL == PARTIAL)
131 { uint_32t rnd;
132 for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
133 {
134 kp += N_COLS;
135 round(fwd_rnd, b1, b0, kp);
136 kp += N_COLS;
137 round(fwd_rnd, b0, b1, kp);
138 }
139 kp += N_COLS;
140 round(fwd_rnd, b1, b0, kp);
141#else
142 { uint_32t rnd;
143 for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
144 {
145 kp += N_COLS;
146 round(fwd_rnd, b1, b0, kp);
147 l_copy(b0, b1);
148 }
149#endif
150 kp += N_COLS;
151 round(fwd_lrnd, b0, b1, kp);
152 }
153#endif
154
155 state_out(out, b0);
156 return EXIT_SUCCESS;
157}
158
159#endif
160
161#if ( FUNCS_IN_C & DECRYPTION_IN_C)
162
163/* Visual C++ .Net v7.1 provides the fastest encryption code when using
164 Pentium optimiation with small code but this is poor for decryption
165 so we need to control this with the following VC++ pragmas
166*/
167
168#if defined( _MSC_VER ) && !defined( _WIN64 )
169#pragma optimize( "t", on )
170#endif
171
172/* Given the column (c) of the output state variable, the following
173 macros give the input state variables which are needed in its
174 computation for each row (r) of the state. All the alternative
175 macros give the same end values but expand into different ways
176 of calculating these values. In particular the complex macro
177 used for dynamically variable block sizes is designed to expand
178 to a compile time constant whenever possible but will expand to
179 conditional clauses on some branches (I am grateful to Frank
180 Yellin for this construction)
181*/
182
183#define inv_var(x,r,c)\
184 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
185 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
186 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
187 : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
188
189#if defined(IT4_SET)
190#undef dec_imvars
191#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
192#elif defined(IT1_SET)
193#undef dec_imvars
194#define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
195#else
196#define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
197#endif
198
199#if defined(IL4_SET)
200#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
201#elif defined(IL1_SET)
202#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
203#else
204#define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
205#endif
206
207/* This code can work with the decryption key schedule in the */
208/* order that is used for encrytpion (where the 1st decryption */
209/* round key is at the high end ot the schedule) or with a key */
210/* schedule that has been reversed to put the 1st decryption */
211/* round key at the low end of the schedule in memory (when */
212/* AES_REV_DKS is defined) */
213
214#ifdef AES_REV_DKS
215#define key_ofs 0
216#define rnd_key(n) (kp + n * N_COLS)
217#else
218#define key_ofs 1
219#define rnd_key(n) (kp - n * N_COLS)
220#endif
221
222AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
223{ uint_32t locals(b0, b1);
224#if defined( dec_imvars )
225 dec_imvars; /* declare variables for inv_mcol() if needed */
226#endif
227 const uint_32t *kp;
228
229 if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 )
230 return EXIT_FAILURE;
231
232 kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
233 state_in(b0, in, kp);
234
235#if (DEC_UNROLL == FULL)
236
237 kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
238 switch(cx->inf.b[0])
239 {
240 case 14 * 16:
241 round(inv_rnd, b1, b0, rnd_key(-13));
242 round(inv_rnd, b0, b1, rnd_key(-12));
243 case 12 * 16:
244 round(inv_rnd, b1, b0, rnd_key(-11));
245 round(inv_rnd, b0, b1, rnd_key(-10));
246 case 10 * 16:
247 round(inv_rnd, b1, b0, rnd_key(-9));
248 round(inv_rnd, b0, b1, rnd_key(-8));
249 round(inv_rnd, b1, b0, rnd_key(-7));
250 round(inv_rnd, b0, b1, rnd_key(-6));
251 round(inv_rnd, b1, b0, rnd_key(-5));
252 round(inv_rnd, b0, b1, rnd_key(-4));
253 round(inv_rnd, b1, b0, rnd_key(-3));
254 round(inv_rnd, b0, b1, rnd_key(-2));
255 round(inv_rnd, b1, b0, rnd_key(-1));
256 round(inv_lrnd, b0, b1, rnd_key( 0));
257 }
258
259#else
260
261#if (DEC_UNROLL == PARTIAL)
262 { uint_32t rnd;
263 for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
264 {
265 kp = rnd_key(1);
266 round(inv_rnd, b1, b0, kp);
267 kp = rnd_key(1);
268 round(inv_rnd, b0, b1, kp);
269 }
270 kp = rnd_key(1);
271 round(inv_rnd, b1, b0, kp);
272#else
273 { uint_32t rnd;
274 for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
275 {
276 kp = rnd_key(1);
277 round(inv_rnd, b1, b0, kp);
278 l_copy(b0, b1);
279 }
280#endif
281 kp = rnd_key(1);
282 round(inv_lrnd, b0, b1, kp);
283 }
284#endif
285
286 state_out(out, b0);
287 return EXIT_SUCCESS;
288}
289
290#endif
291
292#if defined(__cplusplus)
293}
294#endif