Alexandre Lision | 7fd5d3d | 2013-12-04 13:06:40 -0500 | [diff] [blame] | 1 | /* |
| 2 | --------------------------------------------------------------------------- |
| 3 | Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved. |
| 4 | |
| 5 | The redistribution and use of this software (with or without changes) |
| 6 | is allowed without the payment of fees or royalties provided that: |
| 7 | |
| 8 | source code distributions include the above copyright notice, this |
| 9 | list of conditions and the following disclaimer; |
| 10 | |
| 11 | binary distributions include the above copyright notice, this list |
| 12 | of conditions and the following disclaimer in their documentation. |
| 13 | |
| 14 | This software is provided 'as is' with no explicit or implied warranties |
| 15 | in respect of its operation, including, but not limited to, correctness |
| 16 | and fitness for purpose. |
| 17 | --------------------------------------------------------------------------- |
| 18 | Issue Date: 20/12/2007 |
| 19 | */ |
| 20 | |
| 21 | #include "aesopt.h" |
| 22 | #include "aestab.h" |
| 23 | |
| 24 | #if defined(__cplusplus) |
| 25 | extern "C" |
| 26 | { |
| 27 | #endif |
| 28 | |
| 29 | #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c]) |
| 30 | #define so(y,x,c) word_out(y, c, s(x,c)) |
| 31 | |
| 32 | #if defined(ARRAYS) |
| 33 | #define locals(y,x) x[4],y[4] |
| 34 | #else |
| 35 | #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3 |
| 36 | #endif |
| 37 | |
| 38 | #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \ |
| 39 | s(y,2) = s(x,2); s(y,3) = s(x,3); |
| 40 | #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3) |
| 41 | #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) |
| 42 | #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) |
| 43 | |
| 44 | #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) |
| 45 | |
| 46 | /* Visual C++ .Net v7.1 provides the fastest encryption code when using |
| 47 | Pentium optimiation with small code but this is poor for decryption |
| 48 | so we need to control this with the following VC++ pragmas |
| 49 | */ |
| 50 | |
| 51 | #if defined( _MSC_VER ) && !defined( _WIN64 ) |
| 52 | #pragma optimize( "s", on ) |
| 53 | #endif |
| 54 | |
| 55 | /* Given the column (c) of the output state variable, the following |
| 56 | macros give the input state variables which are needed in its |
| 57 | computation for each row (r) of the state. All the alternative |
| 58 | macros give the same end values but expand into different ways |
| 59 | of calculating these values. In particular the complex macro |
| 60 | used for dynamically variable block sizes is designed to expand |
| 61 | to a compile time constant whenever possible but will expand to |
| 62 | conditional clauses on some branches (I am grateful to Frank |
| 63 | Yellin for this construction) |
| 64 | */ |
| 65 | |
| 66 | #define fwd_var(x,r,c)\ |
| 67 | ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ |
| 68 | : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ |
| 69 | : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ |
| 70 | : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) |
| 71 | |
| 72 | #if defined(FT4_SET) |
| 73 | #undef dec_fmvars |
| 74 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) |
| 75 | #elif defined(FT1_SET) |
| 76 | #undef dec_fmvars |
| 77 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c)) |
| 78 | #else |
| 79 | #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c))) |
| 80 | #endif |
| 81 | |
| 82 | #if defined(FL4_SET) |
| 83 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c)) |
| 84 | #elif defined(FL1_SET) |
| 85 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c)) |
| 86 | #else |
| 87 | #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c)) |
| 88 | #endif |
| 89 | |
| 90 | AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) |
| 91 | { uint_32t locals(b0, b1); |
| 92 | const uint_32t *kp; |
| 93 | #if defined( dec_fmvars ) |
| 94 | dec_fmvars; /* declare variables for fwd_mcol() if needed */ |
| 95 | #endif |
| 96 | |
| 97 | if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) |
| 98 | return EXIT_FAILURE; |
| 99 | |
| 100 | kp = cx->ks; |
| 101 | state_in(b0, in, kp); |
| 102 | |
| 103 | #if (ENC_UNROLL == FULL) |
| 104 | |
| 105 | switch(cx->inf.b[0]) |
| 106 | { |
| 107 | case 14 * 16: |
| 108 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
| 109 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
| 110 | kp += 2 * N_COLS; |
| 111 | case 12 * 16: |
| 112 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
| 113 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
| 114 | kp += 2 * N_COLS; |
| 115 | case 10 * 16: |
| 116 | round(fwd_rnd, b1, b0, kp + 1 * N_COLS); |
| 117 | round(fwd_rnd, b0, b1, kp + 2 * N_COLS); |
| 118 | round(fwd_rnd, b1, b0, kp + 3 * N_COLS); |
| 119 | round(fwd_rnd, b0, b1, kp + 4 * N_COLS); |
| 120 | round(fwd_rnd, b1, b0, kp + 5 * N_COLS); |
| 121 | round(fwd_rnd, b0, b1, kp + 6 * N_COLS); |
| 122 | round(fwd_rnd, b1, b0, kp + 7 * N_COLS); |
| 123 | round(fwd_rnd, b0, b1, kp + 8 * N_COLS); |
| 124 | round(fwd_rnd, b1, b0, kp + 9 * N_COLS); |
| 125 | round(fwd_lrnd, b0, b1, kp +10 * N_COLS); |
| 126 | } |
| 127 | |
| 128 | #else |
| 129 | |
| 130 | #if (ENC_UNROLL == PARTIAL) |
| 131 | { uint_32t rnd; |
| 132 | for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) |
| 133 | { |
| 134 | kp += N_COLS; |
| 135 | round(fwd_rnd, b1, b0, kp); |
| 136 | kp += N_COLS; |
| 137 | round(fwd_rnd, b0, b1, kp); |
| 138 | } |
| 139 | kp += N_COLS; |
| 140 | round(fwd_rnd, b1, b0, kp); |
| 141 | #else |
| 142 | { uint_32t rnd; |
| 143 | for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) |
| 144 | { |
| 145 | kp += N_COLS; |
| 146 | round(fwd_rnd, b1, b0, kp); |
| 147 | l_copy(b0, b1); |
| 148 | } |
| 149 | #endif |
| 150 | kp += N_COLS; |
| 151 | round(fwd_lrnd, b0, b1, kp); |
| 152 | } |
| 153 | #endif |
| 154 | |
| 155 | state_out(out, b0); |
| 156 | return EXIT_SUCCESS; |
| 157 | } |
| 158 | |
| 159 | #endif |
| 160 | |
| 161 | #if ( FUNCS_IN_C & DECRYPTION_IN_C) |
| 162 | |
| 163 | /* Visual C++ .Net v7.1 provides the fastest encryption code when using |
| 164 | Pentium optimiation with small code but this is poor for decryption |
| 165 | so we need to control this with the following VC++ pragmas |
| 166 | */ |
| 167 | |
| 168 | #if defined( _MSC_VER ) && !defined( _WIN64 ) |
| 169 | #pragma optimize( "t", on ) |
| 170 | #endif |
| 171 | |
| 172 | /* Given the column (c) of the output state variable, the following |
| 173 | macros give the input state variables which are needed in its |
| 174 | computation for each row (r) of the state. All the alternative |
| 175 | macros give the same end values but expand into different ways |
| 176 | of calculating these values. In particular the complex macro |
| 177 | used for dynamically variable block sizes is designed to expand |
| 178 | to a compile time constant whenever possible but will expand to |
| 179 | conditional clauses on some branches (I am grateful to Frank |
| 180 | Yellin for this construction) |
| 181 | */ |
| 182 | |
| 183 | #define inv_var(x,r,c)\ |
| 184 | ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ |
| 185 | : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\ |
| 186 | : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ |
| 187 | : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))) |
| 188 | |
| 189 | #if defined(IT4_SET) |
| 190 | #undef dec_imvars |
| 191 | #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c)) |
| 192 | #elif defined(IT1_SET) |
| 193 | #undef dec_imvars |
| 194 | #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c)) |
| 195 | #else |
| 196 | #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))) |
| 197 | #endif |
| 198 | |
| 199 | #if defined(IL4_SET) |
| 200 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c)) |
| 201 | #elif defined(IL1_SET) |
| 202 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c)) |
| 203 | #else |
| 204 | #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)) |
| 205 | #endif |
| 206 | |
| 207 | /* This code can work with the decryption key schedule in the */ |
| 208 | /* order that is used for encrytpion (where the 1st decryption */ |
| 209 | /* round key is at the high end ot the schedule) or with a key */ |
| 210 | /* schedule that has been reversed to put the 1st decryption */ |
| 211 | /* round key at the low end of the schedule in memory (when */ |
| 212 | /* AES_REV_DKS is defined) */ |
| 213 | |
| 214 | #ifdef AES_REV_DKS |
| 215 | #define key_ofs 0 |
| 216 | #define rnd_key(n) (kp + n * N_COLS) |
| 217 | #else |
| 218 | #define key_ofs 1 |
| 219 | #define rnd_key(n) (kp - n * N_COLS) |
| 220 | #endif |
| 221 | |
| 222 | AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) |
| 223 | { uint_32t locals(b0, b1); |
| 224 | #if defined( dec_imvars ) |
| 225 | dec_imvars; /* declare variables for inv_mcol() if needed */ |
| 226 | #endif |
| 227 | const uint_32t *kp; |
| 228 | |
| 229 | if( cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16 ) |
| 230 | return EXIT_FAILURE; |
| 231 | |
| 232 | kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0); |
| 233 | state_in(b0, in, kp); |
| 234 | |
| 235 | #if (DEC_UNROLL == FULL) |
| 236 | |
| 237 | kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2)); |
| 238 | switch(cx->inf.b[0]) |
| 239 | { |
| 240 | case 14 * 16: |
| 241 | round(inv_rnd, b1, b0, rnd_key(-13)); |
| 242 | round(inv_rnd, b0, b1, rnd_key(-12)); |
| 243 | case 12 * 16: |
| 244 | round(inv_rnd, b1, b0, rnd_key(-11)); |
| 245 | round(inv_rnd, b0, b1, rnd_key(-10)); |
| 246 | case 10 * 16: |
| 247 | round(inv_rnd, b1, b0, rnd_key(-9)); |
| 248 | round(inv_rnd, b0, b1, rnd_key(-8)); |
| 249 | round(inv_rnd, b1, b0, rnd_key(-7)); |
| 250 | round(inv_rnd, b0, b1, rnd_key(-6)); |
| 251 | round(inv_rnd, b1, b0, rnd_key(-5)); |
| 252 | round(inv_rnd, b0, b1, rnd_key(-4)); |
| 253 | round(inv_rnd, b1, b0, rnd_key(-3)); |
| 254 | round(inv_rnd, b0, b1, rnd_key(-2)); |
| 255 | round(inv_rnd, b1, b0, rnd_key(-1)); |
| 256 | round(inv_lrnd, b0, b1, rnd_key( 0)); |
| 257 | } |
| 258 | |
| 259 | #else |
| 260 | |
| 261 | #if (DEC_UNROLL == PARTIAL) |
| 262 | { uint_32t rnd; |
| 263 | for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd) |
| 264 | { |
| 265 | kp = rnd_key(1); |
| 266 | round(inv_rnd, b1, b0, kp); |
| 267 | kp = rnd_key(1); |
| 268 | round(inv_rnd, b0, b1, kp); |
| 269 | } |
| 270 | kp = rnd_key(1); |
| 271 | round(inv_rnd, b1, b0, kp); |
| 272 | #else |
| 273 | { uint_32t rnd; |
| 274 | for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd) |
| 275 | { |
| 276 | kp = rnd_key(1); |
| 277 | round(inv_rnd, b1, b0, kp); |
| 278 | l_copy(b0, b1); |
| 279 | } |
| 280 | #endif |
| 281 | kp = rnd_key(1); |
| 282 | round(inv_lrnd, b0, b1, kp); |
| 283 | } |
| 284 | #endif |
| 285 | |
| 286 | state_out(out, b0); |
| 287 | return EXIT_SUCCESS; |
| 288 | } |
| 289 | |
| 290 | #endif |
| 291 | |
| 292 | #if defined(__cplusplus) |
| 293 | } |
| 294 | #endif |