blob: 471e0c9d38d949e247d10700a378b99367a7ff7a [file] [log] [blame]
Alexandre Lision7fd5d3d2013-12-04 13:06:40 -05001/*
2---------------------------------------------------------------------------
3Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
4
5The redistribution and use of this software (with or without changes)
6is allowed without the payment of fees or royalties provided that:
7
8 source code distributions include the above copyright notice, this
9 list of conditions and the following disclaimer;
10
11 binary distributions include the above copyright notice, this list
12 of conditions and the following disclaimer in their documentation.
13
14This software is provided 'as is' with no explicit or implied warranties
15in respect of its operation, including, but not limited to, correctness
16and fitness for purpose.
17---------------------------------------------------------------------------
18Issue Date: 20/12/2007
19
20 This file contains the compilation options for AES (Rijndael) and code
21 that is common across encryption, key scheduling and table generation.
22
23 OPERATION
24
25 These source code files implement the AES algorithm Rijndael designed by
26 Joan Daemen and Vincent Rijmen. This version is designed for the standard
27 block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
28 and 32 bytes).
29
30 This version is designed for flexibility and speed using operations on
31 32-bit words rather than operations on bytes. It can be compiled with
32 either big or little endian internal byte order but is faster when the
33 native byte order for the processor is used.
34
35 THE CIPHER INTERFACE
36
37 The cipher interface is implemented as an array of bytes in which lower
38 AES bit sequence indexes map to higher numeric significance within bytes.
39
40 uint_8t (an unsigned 8-bit type)
41 uint_32t (an unsigned 32-bit type)
42 struct aes_encrypt_ctx (structure for the cipher encryption context)
43 struct aes_decrypt_ctx (structure for the cipher decryption context)
44 AES_RETURN the function return type
45
46 C subroutine calls:
47
48 AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
49 AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
50 AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
51 AES_RETURN aes_encrypt(const unsigned char *in, unsigned char *out,
52 const aes_encrypt_ctx cx[1]);
53
54 AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
55 AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
56 AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
57 AES_RETURN aes_decrypt(const unsigned char *in, unsigned char *out,
58 const aes_decrypt_ctx cx[1]);
59
60 IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
61 you call aes_init() before AES is used so that the tables are initialised.
62
63 C++ aes class subroutines:
64
65 Class AESencrypt for encryption
66
67 Construtors:
68 AESencrypt(void)
69 AESencrypt(const unsigned char *key) - 128 bit key
70 Members:
71 AES_RETURN key128(const unsigned char *key)
72 AES_RETURN key192(const unsigned char *key)
73 AES_RETURN key256(const unsigned char *key)
74 AES_RETURN encrypt(const unsigned char *in, unsigned char *out) const
75
76 Class AESdecrypt for encryption
77 Construtors:
78 AESdecrypt(void)
79 AESdecrypt(const unsigned char *key) - 128 bit key
80 Members:
81 AES_RETURN key128(const unsigned char *key)
82 AES_RETURN key192(const unsigned char *key)
83 AES_RETURN key256(const unsigned char *key)
84 AES_RETURN decrypt(const unsigned char *in, unsigned char *out) const
85*/
86
87#if !defined( _AESOPT_H )
88#define _AESOPT_H
89
90#if defined( __cplusplus )
91#include "aescpp.h"
92#else
93#include "aes.h"
94#endif
95
96/* PLATFORM SPECIFIC INCLUDES */
97
98#include "brg_endian.h"
99
100/* CONFIGURATION - THE USE OF DEFINES
101
102 Later in this section there are a number of defines that control the
103 operation of the code. In each section, the purpose of each define is
104 explained so that the relevant form can be included or excluded by
105 setting either 1's or 0's respectively on the branches of the related
106 #if clauses. The following local defines should not be changed.
107*/
108
109#define ENCRYPTION_IN_C 1
110#define DECRYPTION_IN_C 2
111#define ENC_KEYING_IN_C 4
112#define DEC_KEYING_IN_C 8
113
114#define NO_TABLES 0
115#define ONE_TABLE 1
116#define FOUR_TABLES 4
117#define NONE 0
118#define PARTIAL 1
119#define FULL 2
120
121/* --- START OF USER CONFIGURED OPTIONS --- */
122
123/* 1. BYTE ORDER WITHIN 32 BIT WORDS
124
125 The fundamental data processing units in Rijndael are 8-bit bytes. The
126 input, output and key input are all enumerated arrays of bytes in which
127 bytes are numbered starting at zero and increasing to one less than the
128 number of bytes in the array in question. This enumeration is only used
129 for naming bytes and does not imply any adjacency or order relationship
130 from one byte to another. When these inputs and outputs are considered
131 as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
132 byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
133 In this implementation bits are numbered from 0 to 7 starting at the
134 numerically least significant end of each byte (bit n represents 2^n).
135
136 However, Rijndael can be implemented more efficiently using 32-bit
137 words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
138 into word[n]. While in principle these bytes can be assembled into words
139 in any positions, this implementation only supports the two formats in
140 which bytes in adjacent positions within words also have adjacent byte
141 numbers. This order is called big-endian if the lowest numbered bytes
142 in words have the highest numeric significance and little-endian if the
143 opposite applies.
144
145 This code can work in either order irrespective of the order used by the
146 machine on which it runs. Normally the internal byte order will be set
147 to the order of the processor on which the code is to be run but this
148 define can be used to reverse this in special situations
149
150 WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
151 This define will hence be redefined later (in section 4) if necessary
152*/
153
154#if 1
155# define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
156#elif 0
157# define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
158#elif 0
159# define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
160#else
161# error The algorithm byte order is not defined
162#endif
163
164/* 2. VIA ACE SUPPORT */
165
166#if defined( __GNUC__ ) && defined( __i386__ ) \
167 || defined( _WIN32 ) && defined( _M_IX86 ) \
168 && !(defined( _WIN64 ) || defined( _WIN32_WCE ) || defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
169# define VIA_ACE_POSSIBLE
170#endif
171
172/* Define this option if support for the VIA ACE is required. This uses
173 inline assembler instructions and is only implemented for the Microsoft,
174 Intel and GCC compilers. If VIA ACE is known to be present, then defining
175 ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
176 code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
177 it is detected (both present and enabled) but the normal AES code will
178 also be present.
179
180 When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
181 aligned; other input/output buffers do not need to be 16 byte aligned
182 but there are very large performance gains if this can be arranged.
183 VIA ACE also requires the decryption key schedule to be in reverse
184 order (which later checks below ensure).
185*/
186
187#if 1 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
188# define USE_VIA_ACE_IF_PRESENT
189#endif
190
191#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
192# define ASSUME_VIA_ACE_PRESENT
193# endif
194
195/* 3. ASSEMBLER SUPPORT
196
197 This define (which can be on the command line) enables the use of the
198 assembler code routines for encryption, decryption and key scheduling
199 as follows:
200
201 ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
202 encryption and decryption and but with key scheduling in C
203 ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
204 encryption, decryption and key scheduling
205 ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
206 encryption and decryption and but with key scheduling in C
207 ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
208 encryption and decryption and but with key scheduling in C
209
210 Change one 'if 0' below to 'if 1' to select the version or define
211 as a compilation option.
212*/
213
214#if 0 && !defined( ASM_X86_V1C )
215# define ASM_X86_V1C
216#elif 0 && !defined( ASM_X86_V2 )
217# define ASM_X86_V2
218#elif 0 && !defined( ASM_X86_V2C )
219# define ASM_X86_V2C
220#elif 0 && !defined( ASM_AMD64_C )
221# define ASM_AMD64_C
222#endif
223
224#if (defined ( ASM_X86_V1C ) || defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )) \
225 && !defined( _M_IX86 ) || defined( ASM_AMD64_C ) && !defined( _M_X64 )
226# error Assembler code is only available for x86 and AMD64 systems
227#endif
228
229/* 4. FAST INPUT/OUTPUT OPERATIONS.
230
231 On some machines it is possible to improve speed by transferring the
232 bytes in the input and output arrays to and from the internal 32-bit
233 variables by addressing these arrays as if they are arrays of 32-bit
234 words. On some machines this will always be possible but there may
235 be a large performance penalty if the byte arrays are not aligned on
236 the normal word boundaries. On other machines this technique will
237 lead to memory access errors when such 32-bit word accesses are not
238 properly aligned. The option SAFE_IO avoids such problems but will
239 often be slower on those machines that support misaligned access
240 (especially so if care is taken to align the input and output byte
241 arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
242 assumed that access to byte arrays as if they are arrays of 32-bit
243 words will not cause problems when such accesses are misaligned.
244*/
245#if 1 && !defined( _MSC_VER )
246# define SAFE_IO
247#endif
248
249/* 5. LOOP UNROLLING
250
251 The code for encryption and decrytpion cycles through a number of rounds
252 that can be implemented either in a loop or by expanding the code into a
253 long sequence of instructions, the latter producing a larger program but
254 one that will often be much faster. The latter is called loop unrolling.
255 There are also potential speed advantages in expanding two iterations in
256 a loop with half the number of iterations, which is called partial loop
257 unrolling. The following options allow partial or full loop unrolling
258 to be set independently for encryption and decryption
259*/
260#if 1
261# define ENC_UNROLL FULL
262#elif 0
263# define ENC_UNROLL PARTIAL
264#else
265# define ENC_UNROLL NONE
266#endif
267
268#if 1
269# define DEC_UNROLL FULL
270#elif 0
271# define DEC_UNROLL PARTIAL
272#else
273# define DEC_UNROLL NONE
274#endif
275
276#if 1
277# define ENC_KS_UNROLL
278#endif
279
280#if 1
281# define DEC_KS_UNROLL
282#endif
283
284/* 6. FAST FINITE FIELD OPERATIONS
285
286 If this section is included, tables are used to provide faster finite
287 field arithmetic (this has no effect if FIXED_TABLES is defined).
288*/
289#if 1
290# define FF_TABLES
291#endif
292
293/* 7. INTERNAL STATE VARIABLE FORMAT
294
295 The internal state of Rijndael is stored in a number of local 32-bit
296 word varaibles which can be defined either as an array or as individual
297 names variables. Include this section if you want to store these local
298 varaibles in arrays. Otherwise individual local variables will be used.
299*/
300#if 1
301# define ARRAYS
302#endif
303
304/* 8. FIXED OR DYNAMIC TABLES
305
306 When this section is included the tables used by the code are compiled
307 statically into the binary file. Otherwise the subroutine aes_init()
308 must be called to compute them before the code is first used.
309*/
310#if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
311# define FIXED_TABLES
312#endif
313
314/* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
315
316 In some systems it is better to mask longer values to extract bytes
317 rather than using a cast. This option allows this choice.
318*/
319#if 0
320# define to_byte(x) ((uint_8t)(x))
321#else
322# define to_byte(x) ((x) & 0xff)
323#endif
324
325/* 10. TABLE ALIGNMENT
326
327 On some sytsems speed will be improved by aligning the AES large lookup
328 tables on particular boundaries. This define should be set to a power of
329 two giving the desired alignment. It can be left undefined if alignment
330 is not needed. This option is specific to the Microsft VC++ compiler -
331 it seems to sometimes cause trouble for the VC++ version 6 compiler.
332*/
333
334#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
335# define TABLE_ALIGN 32
336#endif
337
338/* 11. REDUCE CODE AND TABLE SIZE
339
340 This replaces some expanded macros with function calls if AES_ASM_V2 or
341 AES_ASM_V2C are defined
342*/
343
344#if 1 && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C ))
345# define REDUCE_CODE_SIZE
346#endif
347
348/* 12. TABLE OPTIONS
349
350 This cipher proceeds by repeating in a number of cycles known as 'rounds'
351 which are implemented by a round function which can optionally be speeded
352 up using tables. The basic tables are each 256 32-bit words, with either
353 one or four tables being required for each round function depending on
354 how much speed is required. The encryption and decryption round functions
355 are different and the last encryption and decrytpion round functions are
356 different again making four different round functions in all.
357
358 This means that:
359 1. Normal encryption and decryption rounds can each use either 0, 1
360 or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
361 2. The last encryption and decryption rounds can also use either 0, 1
362 or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
363
364 Include or exclude the appropriate definitions below to set the number
365 of tables used by this implementation.
366*/
367
368#if 1 /* set tables for the normal encryption round */
369# define ENC_ROUND FOUR_TABLES
370#elif 0
371# define ENC_ROUND ONE_TABLE
372#else
373# define ENC_ROUND NO_TABLES
374#endif
375
376#if 1 /* set tables for the last encryption round */
377# define LAST_ENC_ROUND FOUR_TABLES
378#elif 0
379# define LAST_ENC_ROUND ONE_TABLE
380#else
381# define LAST_ENC_ROUND NO_TABLES
382#endif
383
384#if 1 /* set tables for the normal decryption round */
385# define DEC_ROUND FOUR_TABLES
386#elif 0
387# define DEC_ROUND ONE_TABLE
388#else
389# define DEC_ROUND NO_TABLES
390#endif
391
392#if 1 /* set tables for the last decryption round */
393# define LAST_DEC_ROUND FOUR_TABLES
394#elif 0
395# define LAST_DEC_ROUND ONE_TABLE
396#else
397# define LAST_DEC_ROUND NO_TABLES
398#endif
399
400/* The decryption key schedule can be speeded up with tables in the same
401 way that the round functions can. Include or exclude the following
402 defines to set this requirement.
403*/
404#if 1
405# define KEY_SCHED FOUR_TABLES
406#elif 0
407# define KEY_SCHED ONE_TABLE
408#else
409# define KEY_SCHED NO_TABLES
410#endif
411
412/* ---- END OF USER CONFIGURED OPTIONS ---- */
413
414/* VIA ACE support is only available for VC++ and GCC */
415
416#if !defined( _MSC_VER ) && !defined( __GNUC__ )
417# if defined( ASSUME_VIA_ACE_PRESENT )
418# undef ASSUME_VIA_ACE_PRESENT
419# endif
420# if defined( USE_VIA_ACE_IF_PRESENT )
421# undef USE_VIA_ACE_IF_PRESENT
422# endif
423#endif
424
425#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
426# define USE_VIA_ACE_IF_PRESENT
427#endif
428
429#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
430# define AES_REV_DKS
431#endif
432
433/* ********** UNDEF - we don't use VIA stuff ****************** */
434#undef USE_VIA_ACE_IF_PRESENT
435
436/* Assembler support requires the use of platform byte order */
437
438#if ( defined( ASM_X86_V1C ) || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C ) ) \
439 && (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
440# undef ALGORITHM_BYTE_ORDER
441# define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
442#endif
443
444/* In this implementation the columns of the state array are each held in
445 32-bit words. The state array can be held in various ways: in an array
446 of words, in a number of individual word variables or in a number of
447 processor registers. The following define maps a variable name x and
448 a column number c to the way the state array variable is to be held.
449 The first define below maps the state into an array x[c] whereas the
450 second form maps the state into a number of individual variables x0,
451 x1, etc. Another form could map individual state colums to machine
452 register names.
453*/
454
455#if defined( ARRAYS )
456# define s(x,c) x[c]
457#else
458# define s(x,c) x##c
459#endif
460
461/* This implementation provides subroutines for encryption, decryption
462 and for setting the three key lengths (separately) for encryption
463 and decryption. Since not all functions are needed, masks are set
464 up here to determine which will be implemented in C
465*/
466
467#if !defined( AES_ENCRYPT )
468# define EFUNCS_IN_C 0
469#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
470 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
471# define EFUNCS_IN_C ENC_KEYING_IN_C
472#elif !defined( ASM_X86_V2 )
473# define EFUNCS_IN_C ( ENCRYPTION_IN_C | ENC_KEYING_IN_C )
474#else
475# define EFUNCS_IN_C 0
476#endif
477
478#if !defined( AES_DECRYPT )
479# define DFUNCS_IN_C 0
480#elif defined( ASSUME_VIA_ACE_PRESENT ) || defined( ASM_X86_V1C ) \
481 || defined( ASM_X86_V2C ) || defined( ASM_AMD64_C )
482# define DFUNCS_IN_C DEC_KEYING_IN_C
483#elif !defined( ASM_X86_V2 )
484# define DFUNCS_IN_C ( DECRYPTION_IN_C | DEC_KEYING_IN_C )
485#else
486# define DFUNCS_IN_C 0
487#endif
488
489#define FUNCS_IN_C ( EFUNCS_IN_C | DFUNCS_IN_C )
490
491/* END OF CONFIGURATION OPTIONS */
492
493#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
494
495/* Disable or report errors on some combinations of options */
496
497#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
498# undef LAST_ENC_ROUND
499# define LAST_ENC_ROUND NO_TABLES
500#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
501# undef LAST_ENC_ROUND
502# define LAST_ENC_ROUND ONE_TABLE
503#endif
504
505#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
506# undef ENC_UNROLL
507# define ENC_UNROLL NONE
508#endif
509
510#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
511# undef LAST_DEC_ROUND
512# define LAST_DEC_ROUND NO_TABLES
513#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
514# undef LAST_DEC_ROUND
515# define LAST_DEC_ROUND ONE_TABLE
516#endif
517
518#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
519# undef DEC_UNROLL
520# define DEC_UNROLL NONE
521#endif
522
523#if defined( bswap32 )
524# define aes_sw32 bswap32
525#elif defined( bswap_32 )
526# define aes_sw32 bswap_32
527#else
528# define brot(x,n) (((uint_32t)(x) << n) | ((uint_32t)(x) >> (32 - n)))
529# define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) | (brot((x),24) & 0xff00ff00))
530#endif
531
532/* upr(x,n): rotates bytes within words by n positions, moving bytes to
533 higher index positions with wrap around into low positions
534 ups(x,n): moves bytes by n positions to higher index positions in
535 words but without wrap around
536 bval(x,n): extracts a byte from a word
537
538 WARNING: The definitions given here are intended only for use with
539 unsigned variables and with shift counts that are compile
540 time constants
541*/
542
543#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
544# define upr(x,n) (((uint_32t)(x) << (8 * (n))) | ((uint_32t)(x) >> (32 - 8 * (n))))
545# define ups(x,n) ((uint_32t) (x) << (8 * (n)))
546# define bval(x,n) to_byte((x) >> (8 * (n)))
547# define bytes2word(b0, b1, b2, b3) \
548 (((uint_32t)(b3) << 24) | ((uint_32t)(b2) << 16) | ((uint_32t)(b1) << 8) | (b0))
549#endif
550
551#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
552# define upr(x,n) (((uint_32t)(x) >> (8 * (n))) | ((uint_32t)(x) << (32 - 8 * (n))))
553# define ups(x,n) ((uint_32t) (x) >> (8 * (n)))
554# define bval(x,n) to_byte((x) >> (24 - 8 * (n)))
555# define bytes2word(b0, b1, b2, b3) \
556 (((uint_32t)(b0) << 24) | ((uint_32t)(b1) << 16) | ((uint_32t)(b2) << 8) | (b3))
557#endif
558
559#if defined( SAFE_IO )
560# define word_in(x,c) bytes2word(((const uint_8t*)(x)+4*c)[0], ((const uint_8t*)(x)+4*c)[1], \
561 ((const uint_8t*)(x)+4*c)[2], ((const uint_8t*)(x)+4*c)[3])
562# define word_out(x,c,v) { ((uint_8t*)(x)+4*c)[0] = bval(v,0); ((uint_8t*)(x)+4*c)[1] = bval(v,1); \
563 ((uint_8t*)(x)+4*c)[2] = bval(v,2); ((uint_8t*)(x)+4*c)[3] = bval(v,3); }
564#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
565# define word_in(x,c) (*((uint_32t*)(x)+(c)))
566# define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = (v))
567#else
568# define word_in(x,c) aes_sw32(*((uint_32t*)(x)+(c)))
569# define word_out(x,c,v) (*((uint_32t*)(x)+(c)) = aes_sw32(v))
570#endif
571
572/* the finite field modular polynomial and elements */
573
574#define WPOLY 0x011b
575#define BPOLY 0x1b
576
577/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
578
579#define gf_c1 0x80808080
580#define gf_c2 0x7f7f7f7f
581#define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY))
582
583/* The following defines provide alternative definitions of gf_mulx that might
584 give improved performance if a fast 32-bit multiply is not available. Note
585 that a temporary variable u needs to be defined where gf_mulx is used.
586
587#define gf_mulx(x) (u = (x) & gf_c1, u |= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) | (u >> 6))
588#define gf_c4 (0x01010101 * BPOLY)
589#define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4)
590*/
591
592/* Work out which tables are needed for the different options */
593
594#if defined( ASM_X86_V1C )
595# if defined( ENC_ROUND )
596# undef ENC_ROUND
597# endif
598# define ENC_ROUND FOUR_TABLES
599# if defined( LAST_ENC_ROUND )
600# undef LAST_ENC_ROUND
601# endif
602# define LAST_ENC_ROUND FOUR_TABLES
603# if defined( DEC_ROUND )
604# undef DEC_ROUND
605# endif
606# define DEC_ROUND FOUR_TABLES
607# if defined( LAST_DEC_ROUND )
608# undef LAST_DEC_ROUND
609# endif
610# define LAST_DEC_ROUND FOUR_TABLES
611# if defined( KEY_SCHED )
612# undef KEY_SCHED
613# define KEY_SCHED FOUR_TABLES
614# endif
615#endif
616
617#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) || defined( ASM_X86_V1C )
618# if ENC_ROUND == ONE_TABLE
619# define FT1_SET
620# elif ENC_ROUND == FOUR_TABLES
621# define FT4_SET
622# else
623# define SBX_SET
624# endif
625# if LAST_ENC_ROUND == ONE_TABLE
626# define FL1_SET
627# elif LAST_ENC_ROUND == FOUR_TABLES
628# define FL4_SET
629# elif !defined( SBX_SET )
630# define SBX_SET
631# endif
632#endif
633
634#if ( FUNCS_IN_C & DECRYPTION_IN_C ) || defined( ASM_X86_V1C )
635# if DEC_ROUND == ONE_TABLE
636# define IT1_SET
637# elif DEC_ROUND == FOUR_TABLES
638# define IT4_SET
639# else
640# define ISB_SET
641# endif
642# if LAST_DEC_ROUND == ONE_TABLE
643# define IL1_SET
644# elif LAST_DEC_ROUND == FOUR_TABLES
645# define IL4_SET
646# elif !defined(ISB_SET)
647# define ISB_SET
648# endif
649#endif
650
651#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
652# if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
653# if KEY_SCHED == ONE_TABLE
654# if !defined( FL1_SET ) && !defined( FL4_SET )
655# define LS1_SET
656# endif
657# elif KEY_SCHED == FOUR_TABLES
658# if !defined( FL4_SET )
659# define LS4_SET
660# endif
661# elif !defined( SBX_SET )
662# define SBX_SET
663# endif
664# endif
665# if (FUNCS_IN_C & DEC_KEYING_IN_C)
666# if KEY_SCHED == ONE_TABLE
667# define IM1_SET
668# elif KEY_SCHED == FOUR_TABLES
669# define IM4_SET
670# elif !defined( SBX_SET )
671# define SBX_SET
672# endif
673# endif
674#endif
675
676/* generic definitions of Rijndael macros that use tables */
677
678#define no_table(x,box,vf,rf,c) bytes2word( \
679 box[bval(vf(x,0,c),rf(0,c))], \
680 box[bval(vf(x,1,c),rf(1,c))], \
681 box[bval(vf(x,2,c),rf(2,c))], \
682 box[bval(vf(x,3,c),rf(3,c))])
683
684#define one_table(x,op,tab,vf,rf,c) \
685 ( tab[bval(vf(x,0,c),rf(0,c))] \
686 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
687 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
688 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
689
690#define four_tables(x,tab,vf,rf,c) \
691 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
692 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
693 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
694 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
695
696#define vf1(x,r,c) (x)
697#define rf1(r,c) (r)
698#define rf2(r,c) ((8+r-c)&3)
699
700/* perform forward and inverse column mix operation on four bytes in long word x in */
701/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
702
703#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) || defined( ASM_X86_V2C )))
704
705#if defined( FM4_SET ) /* not currently used */
706# define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)
707#elif defined( FM1_SET ) /* not currently used */
708# define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)
709#else
710# define dec_fmvars uint_32t g2
711# define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
712#endif
713
714#if defined( IM4_SET )
715# define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)
716#elif defined( IM1_SET )
717# define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)
718#else
719# define dec_imvars uint_32t g2, g4, g9
720# define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
721 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
722#endif
723
724#if defined( FL4_SET )
725# define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)
726#elif defined( LS4_SET )
727# define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)
728#elif defined( FL1_SET )
729# define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)
730#elif defined( LS1_SET )
731# define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)
732#else
733# define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)
734#endif
735
736#endif
737
738#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
739# define ISB_SET
740#endif
741
742#endif