Alexandre Lision | 7fd5d3d | 2013-12-04 13:06:40 -0500 | [diff] [blame^] | 1 | /* |
| 2 | --------------------------------------------------------------------------- |
| 3 | Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved. |
| 4 | |
| 5 | The redistribution and use of this software (with or without changes) |
| 6 | is allowed without the payment of fees or royalties provided that: |
| 7 | |
| 8 | source code distributions include the above copyright notice, this |
| 9 | list of conditions and the following disclaimer; |
| 10 | |
| 11 | binary distributions include the above copyright notice, this list |
| 12 | of conditions and the following disclaimer in their documentation. |
| 13 | |
| 14 | This software is provided 'as is' with no explicit or implied warranties |
| 15 | in respect of its operation, including, but not limited to, correctness |
| 16 | and fitness for purpose. |
| 17 | --------------------------------------------------------------------------- |
| 18 | Issue Date: 20/12/2007 |
| 19 | |
| 20 | This file contains the compilation options for AES (Rijndael) and code |
| 21 | that is common across encryption, key scheduling and table generation. |
| 22 | |
| 23 | OPERATION |
| 24 | |
| 25 | These source code files implement the AES algorithm Rijndael designed by |
| 26 | Joan Daemen and Vincent Rijmen. This version is designed for the standard |
| 27 | block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24 |
| 28 | and 32 bytes). |
| 29 | |
| 30 | This version is designed for flexibility and speed using operations on |
| 31 | 32-bit words rather than operations on bytes. It can be compiled with |
| 32 | either big or little endian internal byte order but is faster when the |
| 33 | native byte order for the processor is used. |
| 34 | |
| 35 | THE CIPHER INTERFACE |
| 36 | |
| 37 | The cipher interface is implemented as an array of bytes in which lower |
| 38 | AES bit sequence indexes map to higher numeric significance within bytes. |
| 39 | |
| 40 | uint_8t (an unsigned 8-bit type) |
| 41 | uint_32t (an unsigned 32-bit type) |
| 42 | struct aes_encrypt_ctx (structure for the cipher encryption context) |
| 43 | struct aes_decrypt_ctx (structure for the cipher decryption context) |
| 44 | AES_RETURN the function return type |
| 45 | |
| 46 | C subroutine calls: |
| 47 | |
| 48 | AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]); |
| 49 | AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]); |
| 50 | AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]); |
| 51 | AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out, |
| 52 | const aes_encrypt_ctx cx[1]); |
| 53 | |
| 54 | AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]); |
| 55 | AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]); |
| 56 | AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]); |
| 57 | AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out, |
| 58 | const aes_decrypt_ctx cx[1]); |
| 59 | |
| 60 | IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that |
| 61 | you call aes_init() before AES is used so that the tables are initialised. |
| 62 | |
| 63 | C++ aes class subroutines: |
| 64 | |
| 65 | Class AESencrypt for encryption |
| 66 | |
| 67 | Construtors: |
| 68 | AESencrypt(void) |
| 69 | AESencrypt(const unsigned char *key) - 128 bit key |
| 70 | Members: |
| 71 | AES_RETURN key128(const unsigned char *key) |
| 72 | AES_RETURN key192(const unsigned char *key) |
| 73 | AES_RETURN key256(const unsigned char *key) |
| 74 | AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const |
| 75 | |
| 76 | Class AESdecrypt for encryption |
| 77 | Construtors: |
| 78 | AESdecrypt(void) |
| 79 | AESdecrypt(const unsigned char *key) - 128 bit key |
| 80 | Members: |
| 81 | AES_RETURN key128(const unsigned char *key) |
| 82 | AES_RETURN key192(const unsigned char *key) |
| 83 | AES_RETURN key256(const unsigned char *key) |
| 84 | AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const |
| 85 | */ |
| 86 | |
| 87 | #if !defined( _AESOPT_H ) |
| 88 | #define _AESOPT_H |
| 89 | |
| 90 | #if defined( __cplusplus ) |
| 91 | #include "aescpp.h" |
| 92 | #else |
| 93 | #include "aes.h" |
| 94 | #endif |
| 95 | |
| 96 | /* PLATFORM SPECIFIC INCLUDES */ |
| 97 | |
| 98 | #include "brg_endian.h" |
| 99 | |
| 100 | /* CONFIGURATION - THE USE OF DEFINES |
| 101 | |
| 102 | Later in this section there are a number of defines that control the |
| 103 | operation of the code. In each section, the purpose of each define is |
| 104 | explained so that the relevant form can be included or excluded by |
| 105 | setting either 1's or 0's respectively on the branches of the related |
| 106 | #if clauses. The following local defines should not be changed. |
| 107 | */ |
| 108 | |
| 109 | #define ENCRYPTION_IN_C 1 |
| 110 | #define DECRYPTION_IN_C 2 |
| 111 | #define ENC_KEYING_IN_C 4 |
| 112 | #define DEC_KEYING_IN_C 8 |
| 113 | |
| 114 | #define NO_TABLES 0 |
| 115 | #define ONE_TABLE 1 |
| 116 | #define FOUR_TABLES 4 |
| 117 | #define NONE 0 |
| 118 | #define PARTIAL 1 |
| 119 | #define FULL 2 |
| 120 | |
| 121 | /* --- START OF USER CONFIGURED OPTIONS --- */ |
| 122 | |
| 123 | /* 1. BYTE ORDER WITHIN 32 BIT WORDS |
| 124 | |
| 125 | The fundamental data processing units in Rijndael are 8-bit bytes. The |
| 126 | input, output and key input are all enumerated arrays of bytes in which |
| 127 | bytes are numbered starting at zero and increasing to one less than the |
| 128 | number of bytes in the array in question. This enumeration is only used |
| 129 | for naming bytes and does not imply any adjacency or order relationship |
| 130 | from one byte to another. When these inputs and outputs are considered |
| 131 | as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to |
| 132 | byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte. |
| 133 | In this implementation bits are numbered from 0 to 7 starting at the |
| 134 | numerically least significant end of each byte (bit n represents 2^n). |
| 135 | |
| 136 | However, Rijndael can be implemented more efficiently using 32-bit |
| 137 | words by packing bytes into words so that bytes 4*n to 4*n+3 are placed |
| 138 | into word[n]. While in principle these bytes can be assembled into words |
| 139 | in any positions, this implementation only supports the two formats in |
| 140 | which bytes in adjacent positions within words also have adjacent byte |
| 141 | numbers. This order is called big-endian if the lowest numbered bytes |
| 142 | in words have the highest numeric significance and little-endian if the |
| 143 | opposite applies. |
| 144 | |
| 145 | This code can work in either order irrespective of the order used by the |
| 146 | machine on which it runs. Normally the internal byte order will be set |
| 147 | to the order of the processor on which the code is to be run but this |
| 148 | define can be used to reverse this in special situations |
| 149 | |
| 150 | WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set. |
| 151 | This define will hence be redefined later (in section 4) if necessary |
| 152 | */ |
| 153 | |
| 154 | #if 1 |
| 155 | # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER |
| 156 | #elif 0 |
| 157 | # define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN |
| 158 | #elif 0 |
| 159 | # define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN |
| 160 | #else |
| 161 | # error The algorithm byte order is not defined |
| 162 | #endif |
| 163 | |
| 164 | /* 2. VIA ACE SUPPORT */ |
| 165 | |
| 166 | #if defined( __GNUC__ ) && defined( __i386__ ) \ |
| 167 | || defined( _WIN32 ) && defined( _M_IX86 ) \ |
| 168 | && !(defined( _WIN64 ) || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 )) |
| 169 | # define VIA_ACE_POSSIBLE |
| 170 | #endif |
| 171 | |
| 172 | /* Define this option if support for the VIA ACE is required. This uses |
| 173 | inline assembler instructions and is only implemented for the Microsoft, |
| 174 | Intel and GCC compilers. If VIA ACE is known to be present, then defining |
| 175 | ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption |
| 176 | code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if |
| 177 | it is detected (both present and enabled) but the normal AES code will |
| 178 | also be present. |
| 179 | |
| 180 | When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte |
| 181 | aligned; other input/output buffers do not need to be 16 byte aligned |
| 182 | but there are very large performance gains if this can be arranged. |
| 183 | VIA ACE also requires the decryption key schedule to be in reverse |
| 184 | order (which later checks below ensure). |
| 185 | */ |
| 186 | |
| 187 | #if 1 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT ) |
| 188 | # define USE_VIA_ACE_IF_PRESENT |
| 189 | #endif |
| 190 | |
| 191 | #if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT ) |
| 192 | # define ASSUME_VIA_ACE_PRESENT |
| 193 | # endif |
| 194 | |
| 195 | /* 3. ASSEMBLER SUPPORT |
| 196 | |
| 197 | This define (which can be on the command line) enables the use of the |
| 198 | assembler code routines for encryption, decryption and key scheduling |
| 199 | as follows: |
| 200 | |
| 201 | ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for |
| 202 | encryption and decryption and but with key scheduling in C |
| 203 | ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for |
| 204 | encryption, decryption and key scheduling |
| 205 | ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for |
| 206 | encryption and decryption and but with key scheduling in C |
| 207 | ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for |
| 208 | encryption and decryption and but with key scheduling in C |
| 209 | |
| 210 | Change one 'if 0' below to 'if 1' to select the version or define |
| 211 | as a compilation option. |
| 212 | */ |
| 213 | |
| 214 | #if 0 && !defined( ASM_X86_V1C ) |
| 215 | # define ASM_X86_V1C |
| 216 | #elif 0 && !defined( ASM_X86_V2 ) |
| 217 | # define ASM_X86_V2 |
| 218 | #elif 0 && !defined( ASM_X86_V2C ) |
| 219 | # define ASM_X86_V2C |
| 220 | #elif 0 && !defined( ASM_AMD64_C ) |
| 221 | # define ASM_AMD64_C |
| 222 | #endif |
| 223 | |
| 224 | #if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \ |
| 225 | && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 ) |
| 226 | # error Assembler code is only available for x86 and AMD64 systems |
| 227 | #endif |
| 228 | |
| 229 | /* 4. FAST INPUT/OUTPUT OPERATIONS. |
| 230 | |
| 231 | On some machines it is possible to improve speed by transferring the |
| 232 | bytes in the input and output arrays to and from the internal 32-bit |
| 233 | variables by addressing these arrays as if they are arrays of 32-bit |
| 234 | words. On some machines this will always be possible but there may |
| 235 | be a large performance penalty if the byte arrays are not aligned on |
| 236 | the normal word boundaries. On other machines this technique will |
| 237 | lead to memory access errors when such 32-bit word accesses are not |
| 238 | properly aligned. The option SAFE_IO avoids such problems but will |
| 239 | often be slower on those machines that support misaligned access |
| 240 | (especially so if care is taken to align the input and output byte |
| 241 | arrays on 32-bit word boundaries). If SAFE_IO is not defined it is |
| 242 | assumed that access to byte arrays as if they are arrays of 32-bit |
| 243 | words will not cause problems when such accesses are misaligned. |
| 244 | */ |
| 245 | #if 1 && !defined( _MSC_VER ) |
| 246 | # define SAFE_IO |
| 247 | #endif |
| 248 | |
| 249 | /* 5. LOOP UNROLLING |
| 250 | |
| 251 | The code for encryption and decrytpion cycles through a number of rounds |
| 252 | that can be implemented either in a loop or by expanding the code into a |
| 253 | long sequence of instructions, the latter producing a larger program but |
| 254 | one that will often be much faster. The latter is called loop unrolling. |
| 255 | There are also potential speed advantages in expanding two iterations in |
| 256 | a loop with half the number of iterations, which is called partial loop |
| 257 | unrolling. The following options allow partial or full loop unrolling |
| 258 | to be set independently for encryption and decryption |
| 259 | */ |
| 260 | #if 1 |
| 261 | # define ENC_UNROLL FULL |
| 262 | #elif 0 |
| 263 | # define ENC_UNROLL PARTIAL |
| 264 | #else |
| 265 | # define ENC_UNROLL NONE |
| 266 | #endif |
| 267 | |
| 268 | #if 1 |
| 269 | # define DEC_UNROLL FULL |
| 270 | #elif 0 |
| 271 | # define DEC_UNROLL PARTIAL |
| 272 | #else |
| 273 | # define DEC_UNROLL NONE |
| 274 | #endif |
| 275 | |
| 276 | #if 1 |
| 277 | # define ENC_KS_UNROLL |
| 278 | #endif |
| 279 | |
| 280 | #if 1 |
| 281 | # define DEC_KS_UNROLL |
| 282 | #endif |
| 283 | |
| 284 | /* 6. FAST FINITE FIELD OPERATIONS |
| 285 | |
| 286 | If this section is included, tables are used to provide faster finite |
| 287 | field arithmetic (this has no effect if FIXED_TABLES is defined). |
| 288 | */ |
| 289 | #if 1 |
| 290 | # define FF_TABLES |
| 291 | #endif |
| 292 | |
| 293 | /* 7. INTERNAL STATE VARIABLE FORMAT |
| 294 | |
| 295 | The internal state of Rijndael is stored in a number of local 32-bit |
| 296 | word varaibles which can be defined either as an array or as individual |
| 297 | names variables. Include this section if you want to store these local |
| 298 | varaibles in arrays. Otherwise individual local variables will be used. |
| 299 | */ |
| 300 | #if 1 |
| 301 | # define ARRAYS |
| 302 | #endif |
| 303 | |
| 304 | /* 8. FIXED OR DYNAMIC TABLES |
| 305 | |
| 306 | When this section is included the tables used by the code are compiled |
| 307 | statically into the binary file. Otherwise the subroutine aes_init() |
| 308 | must be called to compute them before the code is first used. |
| 309 | */ |
| 310 | #if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 )) |
| 311 | # define FIXED_TABLES |
| 312 | #endif |
| 313 | |
| 314 | /* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES |
| 315 | |
| 316 | In some systems it is better to mask longer values to extract bytes |
| 317 | rather than using a cast. This option allows this choice. |
| 318 | */ |
| 319 | #if 0 |
| 320 | # define to_byte(x) ((uint_8t)(x)) |
| 321 | #else |
| 322 | # define to_byte(x) ((x) & 0xff) |
| 323 | #endif |
| 324 | |
| 325 | /* 10. TABLE ALIGNMENT |
| 326 | |
| 327 | On some sytsems speed will be improved by aligning the AES large lookup |
| 328 | tables on particular boundaries. This define should be set to a power of |
| 329 | two giving the desired alignment. It can be left undefined if alignment |
| 330 | is not needed. This option is specific to the Microsft VC++ compiler - |
| 331 | it seems to sometimes cause trouble for the VC++ version 6 compiler. |
| 332 | */ |
| 333 | |
| 334 | #if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 ) |
| 335 | # define TABLE_ALIGN 32 |
| 336 | #endif |
| 337 | |
| 338 | /* 11. REDUCE CODE AND TABLE SIZE |
| 339 | |
| 340 | This replaces some expanded macros with function calls if AES_ASM_V2 or |
| 341 | AES_ASM_V2C are defined |
| 342 | */ |
| 343 | |
| 344 | #if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) |
| 345 | # define REDUCE_CODE_SIZE |
| 346 | #endif |
| 347 | |
| 348 | /* 12. TABLE OPTIONS |
| 349 | |
| 350 | This cipher proceeds by repeating in a number of cycles known as 'rounds' |
| 351 | which are implemented by a round function which can optionally be speeded |
| 352 | up using tables. The basic tables are each 256 32-bit words, with either |
| 353 | one or four tables being required for each round function depending on |
| 354 | how much speed is required. The encryption and decryption round functions |
| 355 | are different and the last encryption and decrytpion round functions are |
| 356 | different again making four different round functions in all. |
| 357 | |
| 358 | This means that: |
| 359 | 1. Normal encryption and decryption rounds can each use either 0, 1 |
| 360 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each. |
| 361 | 2. The last encryption and decryption rounds can also use either 0, 1 |
| 362 | or 4 tables and table spaces of 0, 1024 or 4096 bytes each. |
| 363 | |
| 364 | Include or exclude the appropriate definitions below to set the number |
| 365 | of tables used by this implementation. |
| 366 | */ |
| 367 | |
| 368 | #if 1 /* set tables for the normal encryption round */ |
| 369 | # define ENC_ROUND FOUR_TABLES |
| 370 | #elif 0 |
| 371 | # define ENC_ROUND ONE_TABLE |
| 372 | #else |
| 373 | # define ENC_ROUND NO_TABLES |
| 374 | #endif |
| 375 | |
| 376 | #if 1 /* set tables for the last encryption round */ |
| 377 | # define LAST_ENC_ROUND FOUR_TABLES |
| 378 | #elif 0 |
| 379 | # define LAST_ENC_ROUND ONE_TABLE |
| 380 | #else |
| 381 | # define LAST_ENC_ROUND NO_TABLES |
| 382 | #endif |
| 383 | |
| 384 | #if 1 /* set tables for the normal decryption round */ |
| 385 | # define DEC_ROUND FOUR_TABLES |
| 386 | #elif 0 |
| 387 | # define DEC_ROUND ONE_TABLE |
| 388 | #else |
| 389 | # define DEC_ROUND NO_TABLES |
| 390 | #endif |
| 391 | |
| 392 | #if 1 /* set tables for the last decryption round */ |
| 393 | # define LAST_DEC_ROUND FOUR_TABLES |
| 394 | #elif 0 |
| 395 | # define LAST_DEC_ROUND ONE_TABLE |
| 396 | #else |
| 397 | # define LAST_DEC_ROUND NO_TABLES |
| 398 | #endif |
| 399 | |
| 400 | /* The decryption key schedule can be speeded up with tables in the same |
| 401 | way that the round functions can. Include or exclude the following |
| 402 | defines to set this requirement. |
| 403 | */ |
| 404 | #if 1 |
| 405 | # define KEY_SCHED FOUR_TABLES |
| 406 | #elif 0 |
| 407 | # define KEY_SCHED ONE_TABLE |
| 408 | #else |
| 409 | # define KEY_SCHED NO_TABLES |
| 410 | #endif |
| 411 | |
| 412 | /* ---- END OF USER CONFIGURED OPTIONS ---- */ |
| 413 | |
| 414 | /* VIA ACE support is only available for VC++ and GCC */ |
| 415 | |
| 416 | #if !defined( _MSC_VER ) && !defined( __GNUC__ ) |
| 417 | # if defined( ASSUME_VIA_ACE_PRESENT ) |
| 418 | # undef ASSUME_VIA_ACE_PRESENT |
| 419 | # endif |
| 420 | # if defined( USE_VIA_ACE_IF_PRESENT ) |
| 421 | # undef USE_VIA_ACE_IF_PRESENT |
| 422 | # endif |
| 423 | #endif |
| 424 | |
| 425 | #if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT ) |
| 426 | # define USE_VIA_ACE_IF_PRESENT |
| 427 | #endif |
| 428 | |
| 429 | #if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS ) |
| 430 | # define AES_REV_DKS |
| 431 | #endif |
| 432 | |
| 433 | /* ********** UNDEF - we don't use VIA stuff ****************** */ |
| 434 | #undef USE_VIA_ACE_IF_PRESENT |
| 435 | |
| 436 | /* Assembler support requires the use of platform byte order */ |
| 437 | |
| 438 | #if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \ |
| 439 | && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER) |
| 440 | # undef ALGORITHM_BYTE_ORDER |
| 441 | # define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER |
| 442 | #endif |
| 443 | |
| 444 | /* In this implementation the columns of the state array are each held in |
| 445 | 32-bit words. The state array can be held in various ways: in an array |
| 446 | of words, in a number of individual word variables or in a number of |
| 447 | processor registers. The following define maps a variable name x and |
| 448 | a column number c to the way the state array variable is to be held. |
| 449 | The first define below maps the state into an array x[c] whereas the |
| 450 | second form maps the state into a number of individual variables x0, |
| 451 | x1, etc. Another form could map individual state colums to machine |
| 452 | register names. |
| 453 | */ |
| 454 | |
| 455 | #if defined( ARRAYS ) |
| 456 | # define s(x,c) x[c] |
| 457 | #else |
| 458 | # define s(x,c) x##c |
| 459 | #endif |
| 460 | |
| 461 | /* This implementation provides subroutines for encryption, decryption |
| 462 | and for setting the three key lengths (separately) for encryption |
| 463 | and decryption. Since not all functions are needed, masks are set |
| 464 | up here to determine which will be implemented in C |
| 465 | */ |
| 466 | |
| 467 | #if !defined( AES_ENCRYPT ) |
| 468 | # define EFUNCS_IN_C 0 |
| 469 | #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ |
| 470 | || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) |
| 471 | # define EFUNCS_IN_C ENC_KEYING_IN_C |
| 472 | #elif !defined( ASM_X86_V2 ) |
| 473 | # define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C ) |
| 474 | #else |
| 475 | # define EFUNCS_IN_C 0 |
| 476 | #endif |
| 477 | |
| 478 | #if !defined( AES_DECRYPT ) |
| 479 | # define DFUNCS_IN_C 0 |
| 480 | #elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \ |
| 481 | || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) |
| 482 | # define DFUNCS_IN_C DEC_KEYING_IN_C |
| 483 | #elif !defined( ASM_X86_V2 ) |
| 484 | # define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C ) |
| 485 | #else |
| 486 | # define DFUNCS_IN_C 0 |
| 487 | #endif |
| 488 | |
| 489 | #define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C ) |
| 490 | |
| 491 | /* END OF CONFIGURATION OPTIONS */ |
| 492 | |
| 493 | #define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) |
| 494 | |
| 495 | /* Disable or report errors on some combinations of options */ |
| 496 | |
| 497 | #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES |
| 498 | # undef LAST_ENC_ROUND |
| 499 | # define LAST_ENC_ROUND NO_TABLES |
| 500 | #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES |
| 501 | # undef LAST_ENC_ROUND |
| 502 | # define LAST_ENC_ROUND ONE_TABLE |
| 503 | #endif |
| 504 | |
| 505 | #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE |
| 506 | # undef ENC_UNROLL |
| 507 | # define ENC_UNROLL NONE |
| 508 | #endif |
| 509 | |
| 510 | #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES |
| 511 | # undef LAST_DEC_ROUND |
| 512 | # define LAST_DEC_ROUND NO_TABLES |
| 513 | #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES |
| 514 | # undef LAST_DEC_ROUND |
| 515 | # define LAST_DEC_ROUND ONE_TABLE |
| 516 | #endif |
| 517 | |
| 518 | #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE |
| 519 | # undef DEC_UNROLL |
| 520 | # define DEC_UNROLL NONE |
| 521 | #endif |
| 522 | |
| 523 | #if defined( bswap32 ) |
| 524 | # define aes_sw32 bswap32 |
| 525 | #elif defined( bswap_32 ) |
| 526 | # define aes_sw32 bswap_32 |
| 527 | #else |
| 528 | # define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n))) |
| 529 | # define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00)) |
| 530 | #endif |
| 531 | |
| 532 | /* upr(x,n): rotates bytes within words by n positions, moving bytes to |
| 533 | higher index positions with wrap around into low positions |
| 534 | ups(x,n): moves bytes by n positions to higher index positions in |
| 535 | words but without wrap around |
| 536 | bval(x,n): extracts a byte from a word |
| 537 | |
| 538 | WARNING: The definitions given here are intended only for use with |
| 539 | unsigned variables and with shift counts that are compile |
| 540 | time constants |
| 541 | */ |
| 542 | |
| 543 | #if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN ) |
| 544 | # define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n)))) |
| 545 | # define ups(x,n) ((uint_32t) (x) << (8 * (n))) |
| 546 | # define bval(x,n) to_byte((x) >> (8 * (n))) |
| 547 | # define bytes2word(b0, b1, b2, b3) \ |
| 548 | (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0)) |
| 549 | #endif |
| 550 | |
| 551 | #if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN ) |
| 552 | # define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n)))) |
| 553 | # define ups(x,n) ((uint_32t) (x) >> (8 * (n))) |
| 554 | # define bval(x,n) to_byte((x) >> (24 - 8 * (n))) |
| 555 | # define bytes2word(b0, b1, b2, b3) \ |
| 556 | (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3)) |
| 557 | #endif |
| 558 | |
| 559 | #if defined( SAFE_IO ) |
| 560 | # define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \ |
| 561 | ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3]) |
| 562 | # define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \ |
| 563 | ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); } |
| 564 | #elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER ) |
| 565 | # define word_in(x,c) (*((uint_32t*)(x)+(c))) |
| 566 | # define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v)) |
| 567 | #else |
| 568 | # define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c))) |
| 569 | # define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v)) |
| 570 | #endif |
| 571 | |
| 572 | /* the finite field modular polynomial and elements */ |
| 573 | |
| 574 | #define WPOLY 0x011b |
| 575 | #define BPOLY 0x1b |
| 576 | |
| 577 | /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ |
| 578 | |
| 579 | #define gf_c1 0x80808080 |
| 580 | #define gf_c2 0x7f7f7f7f |
| 581 | #define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY)) |
| 582 | |
| 583 | /* The following defines provide alternative definitions of gf_mulx that might |
| 584 | give improved performance if a fast 32-bit multiply is not available. Note |
| 585 | that a temporary variable u needs to be defined where gf_mulx is used. |
| 586 | |
| 587 | #define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6)) |
| 588 | #define gf_c4 (0x01010101 * BPOLY) |
| 589 | #define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4) |
| 590 | */ |
| 591 | |
| 592 | /* Work out which tables are needed for the different options */ |
| 593 | |
| 594 | #if defined( ASM_X86_V1C ) |
| 595 | # if defined( ENC_ROUND ) |
| 596 | # undef ENC_ROUND |
| 597 | # endif |
| 598 | # define ENC_ROUND FOUR_TABLES |
| 599 | # if defined( LAST_ENC_ROUND ) |
| 600 | # undef LAST_ENC_ROUND |
| 601 | # endif |
| 602 | # define LAST_ENC_ROUND FOUR_TABLES |
| 603 | # if defined( DEC_ROUND ) |
| 604 | # undef DEC_ROUND |
| 605 | # endif |
| 606 | # define DEC_ROUND FOUR_TABLES |
| 607 | # if defined( LAST_DEC_ROUND ) |
| 608 | # undef LAST_DEC_ROUND |
| 609 | # endif |
| 610 | # define LAST_DEC_ROUND FOUR_TABLES |
| 611 | # if defined( KEY_SCHED ) |
| 612 | # undef KEY_SCHED |
| 613 | # define KEY_SCHED FOUR_TABLES |
| 614 | # endif |
| 615 | #endif |
| 616 | |
| 617 | #if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C ) |
| 618 | # if ENC_ROUND == ONE_TABLE |
| 619 | # define FT1_SET |
| 620 | # elif ENC_ROUND == FOUR_TABLES |
| 621 | # define FT4_SET |
| 622 | # else |
| 623 | # define SBX_SET |
| 624 | # endif |
| 625 | # if LAST_ENC_ROUND == ONE_TABLE |
| 626 | # define FL1_SET |
| 627 | # elif LAST_ENC_ROUND == FOUR_TABLES |
| 628 | # define FL4_SET |
| 629 | # elif !defined( SBX_SET ) |
| 630 | # define SBX_SET |
| 631 | # endif |
| 632 | #endif |
| 633 | |
| 634 | #if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C ) |
| 635 | # if DEC_ROUND == ONE_TABLE |
| 636 | # define IT1_SET |
| 637 | # elif DEC_ROUND == FOUR_TABLES |
| 638 | # define IT4_SET |
| 639 | # else |
| 640 | # define ISB_SET |
| 641 | # endif |
| 642 | # if LAST_DEC_ROUND == ONE_TABLE |
| 643 | # define IL1_SET |
| 644 | # elif LAST_DEC_ROUND == FOUR_TABLES |
| 645 | # define IL4_SET |
| 646 | # elif !defined(ISB_SET) |
| 647 | # define ISB_SET |
| 648 | # endif |
| 649 | #endif |
| 650 | |
| 651 | #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) |
| 652 | # if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C)) |
| 653 | # if KEY_SCHED == ONE_TABLE |
| 654 | # if !defined( FL1_SET ) && !defined( FL4_SET ) |
| 655 | # define LS1_SET |
| 656 | # endif |
| 657 | # elif KEY_SCHED == FOUR_TABLES |
| 658 | # if !defined( FL4_SET ) |
| 659 | # define LS4_SET |
| 660 | # endif |
| 661 | # elif !defined( SBX_SET ) |
| 662 | # define SBX_SET |
| 663 | # endif |
| 664 | # endif |
| 665 | # if (FUNCS_IN_C & DEC_KEYING_IN_C) |
| 666 | # if KEY_SCHED == ONE_TABLE |
| 667 | # define IM1_SET |
| 668 | # elif KEY_SCHED == FOUR_TABLES |
| 669 | # define IM4_SET |
| 670 | # elif !defined( SBX_SET ) |
| 671 | # define SBX_SET |
| 672 | # endif |
| 673 | # endif |
| 674 | #endif |
| 675 | |
| 676 | /* generic definitions of Rijndael macros that use tables */ |
| 677 | |
| 678 | #define no_table(x,box,vf,rf,c) bytes2word( \ |
| 679 | box[bval(vf(x,0,c),rf(0,c))], \ |
| 680 | box[bval(vf(x,1,c),rf(1,c))], \ |
| 681 | box[bval(vf(x,2,c),rf(2,c))], \ |
| 682 | box[bval(vf(x,3,c),rf(3,c))]) |
| 683 | |
| 684 | #define one_table(x,op,tab,vf,rf,c) \ |
| 685 | ( tab[bval(vf(x,0,c),rf(0,c))] \ |
| 686 | ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \ |
| 687 | ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \ |
| 688 | ^ op(tab[bval(vf(x,3,c),rf(3,c))],3)) |
| 689 | |
| 690 | #define four_tables(x,tab,vf,rf,c) \ |
| 691 | ( tab[0][bval(vf(x,0,c),rf(0,c))] \ |
| 692 | ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ |
| 693 | ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ |
| 694 | ^ tab[3][bval(vf(x,3,c),rf(3,c))]) |
| 695 | |
| 696 | #define vf1(x,r,c) (x) |
| 697 | #define rf1(r,c) (r) |
| 698 | #define rf2(r,c) ((8+r-c)&3) |
| 699 | |
| 700 | /* perform forward and inverse column mix operation on four bytes in long word x in */ |
| 701 | /* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */ |
| 702 | |
| 703 | #if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))) |
| 704 | |
| 705 | #if defined( FM4_SET ) /* not currently used */ |
| 706 | # define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0) |
| 707 | #elif defined( FM1_SET ) /* not currently used */ |
| 708 | # define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0) |
| 709 | #else |
| 710 | # define dec_fmvars uint_32t g2 |
| 711 | # define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1)) |
| 712 | #endif |
| 713 | |
| 714 | #if defined( IM4_SET ) |
| 715 | # define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0) |
| 716 | #elif defined( IM1_SET ) |
| 717 | # define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0) |
| 718 | #else |
| 719 | # define dec_imvars uint_32t g2, g4, g9 |
| 720 | # define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \ |
| 721 | (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1)) |
| 722 | #endif |
| 723 | |
| 724 | #if defined( FL4_SET ) |
| 725 | # define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c) |
| 726 | #elif defined( LS4_SET ) |
| 727 | # define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c) |
| 728 | #elif defined( FL1_SET ) |
| 729 | # define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c) |
| 730 | #elif defined( LS1_SET ) |
| 731 | # define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c) |
| 732 | #else |
| 733 | # define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c) |
| 734 | #endif |
| 735 | |
| 736 | #endif |
| 737 | |
| 738 | #if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET ) |
| 739 | # define ISB_SET |
| 740 | #endif |
| 741 | |
| 742 | #endif |