Blame - jni/libzrtp/sources/cryptcommon/aesopt.h - jami-client-android

blob: 471e0c9d38d949e247d10700a378b99367a7ff7a [file] [log] [blame]

Alexandre Lision	7fd5d3d	2013-12-04 13:06:40 -0500	[diff] [blame^]	1	/*
				2	---------------------------------------------------------------------------
				3	Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
				4
				5	The redistribution and use of this software (with or without changes)
				6	is allowed without the payment of fees or royalties provided that:
				7
				8	source code distributions include the above copyright notice, this
				9	list of conditions and the following disclaimer;
				10
				11	binary distributions include the above copyright notice, this list
				12	of conditions and the following disclaimer in their documentation.
				13
				14	This software is provided 'as is' with no explicit or implied warranties
				15	in respect of its operation, including, but not limited to, correctness
				16	and fitness for purpose.
				17	---------------------------------------------------------------------------
				18	Issue Date: 20/12/2007
				19
				20	This file contains the compilation options for AES (Rijndael) and code
				21	that is common across encryption, key scheduling and table generation.
				22
				23	OPERATION
				24
				25	These source code files implement the AES algorithm Rijndael designed by
				26	Joan Daemen and Vincent Rijmen. This version is designed for the standard
				27	block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
				28	and 32 bytes).
				29
				30	This version is designed for flexibility and speed using operations on
				31	32-bit words rather than operations on bytes. It can be compiled with
				32	either big or little endian internal byte order but is faster when the
				33	native byte order for the processor is used.
				34
				35	THE CIPHER INTERFACE
				36
				37	The cipher interface is implemented as an array of bytes in which lower
				38	AES bit sequence indexes map to higher numeric significance within bytes.
				39
				40	uint_8t (an unsigned 8-bit type)
				41	uint_32t (an unsigned 32-bit type)
				42	struct aes_encrypt_ctx (structure for the cipher encryption context)
				43	struct aes_decrypt_ctx (structure for the cipher decryption context)
				44	AES_RETURN the function return type
				45
				46	C subroutine calls:
				47
				48	AES_RETURN aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]);
				49	AES_RETURN aes_encrypt_key192(const unsigned char *key, aes_encrypt_ctx cx[1]);
				50	AES_RETURN aes_encrypt_key256(const unsigned char *key, aes_encrypt_ctx cx[1]);
				51	AES_RETURN aes_encrypt(const unsigned char in, unsigned char out,
				52	const aes_encrypt_ctx cx[1]);
				53
				54	AES_RETURN aes_decrypt_key128(const unsigned char *key, aes_decrypt_ctx cx[1]);
				55	AES_RETURN aes_decrypt_key192(const unsigned char *key, aes_decrypt_ctx cx[1]);
				56	AES_RETURN aes_decrypt_key256(const unsigned char *key, aes_decrypt_ctx cx[1]);
				57	AES_RETURN aes_decrypt(const unsigned char in, unsigned char out,
				58	const aes_decrypt_ctx cx[1]);
				59
				60	IMPORTANT NOTE: If you are using this C interface with dynamic tables make sure that
				61	you call aes_init() before AES is used so that the tables are initialised.
				62
				63	C++ aes class subroutines:
				64
				65	Class AESencrypt for encryption
				66
				67	Construtors:
				68	AESencrypt(void)
				69	AESencrypt(const unsigned char *key) - 128 bit key
				70	Members:
				71	AES_RETURN key128(const unsigned char *key)
				72	AES_RETURN key192(const unsigned char *key)
				73	AES_RETURN key256(const unsigned char *key)
				74	AES_RETURN encrypt(const unsigned char in, unsigned char out) const
				75
				76	Class AESdecrypt for encryption
				77	Construtors:
				78	AESdecrypt(void)
				79	AESdecrypt(const unsigned char *key) - 128 bit key
				80	Members:
				81	AES_RETURN key128(const unsigned char *key)
				82	AES_RETURN key192(const unsigned char *key)
				83	AES_RETURN key256(const unsigned char *key)
				84	AES_RETURN decrypt(const unsigned char in, unsigned char out) const
				85	*/
				86
				87	#if !defined( _AESOPT_H )
				88	#define _AESOPT_H
				89
				90	#if defined( __cplusplus )
				91	#include "aescpp.h"
				92	#else
				93	#include "aes.h"
				94	#endif
				95
				96	/* PLATFORM SPECIFIC INCLUDES */
				97
				98	#include "brg_endian.h"
				99
				100	/* CONFIGURATION - THE USE OF DEFINES
				101
				102	Later in this section there are a number of defines that control the
				103	operation of the code. In each section, the purpose of each define is
				104	explained so that the relevant form can be included or excluded by
				105	setting either 1's or 0's respectively on the branches of the related
				106	#if clauses. The following local defines should not be changed.
				107	*/
				108
				109	#define ENCRYPTION_IN_C 1
				110	#define DECRYPTION_IN_C 2
				111	#define ENC_KEYING_IN_C 4
				112	#define DEC_KEYING_IN_C 8
				113
				114	#define NO_TABLES 0
				115	#define ONE_TABLE 1
				116	#define FOUR_TABLES 4
				117	#define NONE 0
				118	#define PARTIAL 1
				119	#define FULL 2
				120
				121	/* --- START OF USER CONFIGURED OPTIONS --- */
				122
				123	/* 1. BYTE ORDER WITHIN 32 BIT WORDS
				124
				125	The fundamental data processing units in Rijndael are 8-bit bytes. The
				126	input, output and key input are all enumerated arrays of bytes in which
				127	bytes are numbered starting at zero and increasing to one less than the
				128	number of bytes in the array in question. This enumeration is only used
				129	for naming bytes and does not imply any adjacency or order relationship
				130	from one byte to another. When these inputs and outputs are considered
				131	as bit sequences, bits 8n to 8n+7 of the bit sequence are mapped to
				132	byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
				133	In this implementation bits are numbered from 0 to 7 starting at the
				134	numerically least significant end of each byte (bit n represents 2^n).
				135
				136	However, Rijndael can be implemented more efficiently using 32-bit
				137	words by packing bytes into words so that bytes 4n to 4n+3 are placed
				138	into word[n]. While in principle these bytes can be assembled into words
				139	in any positions, this implementation only supports the two formats in
				140	which bytes in adjacent positions within words also have adjacent byte
				141	numbers. This order is called big-endian if the lowest numbered bytes
				142	in words have the highest numeric significance and little-endian if the
				143	opposite applies.
				144
				145	This code can work in either order irrespective of the order used by the
				146	machine on which it runs. Normally the internal byte order will be set
				147	to the order of the processor on which the code is to be run but this
				148	define can be used to reverse this in special situations
				149
				150	WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
				151	This define will hence be redefined later (in section 4) if necessary
				152	*/
				153
				154	#if 1
				155	# define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
				156	#elif 0
				157	# define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
				158	#elif 0
				159	# define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
				160	#else
				161	# error The algorithm byte order is not defined
				162	#endif
				163
				164	/* 2. VIA ACE SUPPORT */
				165
				166	#if defined( __GNUC__ ) && defined( __i386__ ) \
				167	\|\| defined( _WIN32 ) && defined( _M_IX86 ) \
				168	&& !(defined( _WIN64 ) \|\| defined( _WIN32_WCE ) \|\| defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
				169	# define VIA_ACE_POSSIBLE
				170	#endif
				171
				172	/* Define this option if support for the VIA ACE is required. This uses
				173	inline assembler instructions and is only implemented for the Microsoft,
				174	Intel and GCC compilers. If VIA ACE is known to be present, then defining
				175	ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
				176	code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
				177	it is detected (both present and enabled) but the normal AES code will
				178	also be present.
				179
				180	When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
				181	aligned; other input/output buffers do not need to be 16 byte aligned
				182	but there are very large performance gains if this can be arranged.
				183	VIA ACE also requires the decryption key schedule to be in reverse
				184	order (which later checks below ensure).
				185	*/
				186
				187	#if 1 && defined( VIA_ACE_POSSIBLE ) && !defined( USE_VIA_ACE_IF_PRESENT )
				188	# define USE_VIA_ACE_IF_PRESENT
				189	#endif
				190
				191	#if 0 && defined( VIA_ACE_POSSIBLE ) && !defined( ASSUME_VIA_ACE_PRESENT )
				192	# define ASSUME_VIA_ACE_PRESENT
				193	# endif
				194
				195	/* 3. ASSEMBLER SUPPORT
				196
				197	This define (which can be on the command line) enables the use of the
				198	assembler code routines for encryption, decryption and key scheduling
				199	as follows:
				200
				201	ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
				202	encryption and decryption and but with key scheduling in C
				203	ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
				204	encryption, decryption and key scheduling
				205	ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
				206	encryption and decryption and but with key scheduling in C
				207	ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
				208	encryption and decryption and but with key scheduling in C
				209
				210	Change one 'if 0' below to 'if 1' to select the version or define
				211	as a compilation option.
				212	*/
				213
				214	#if 0 && !defined( ASM_X86_V1C )
				215	# define ASM_X86_V1C
				216	#elif 0 && !defined( ASM_X86_V2 )
				217	# define ASM_X86_V2
				218	#elif 0 && !defined( ASM_X86_V2C )
				219	# define ASM_X86_V2C
				220	#elif 0 && !defined( ASM_AMD64_C )
				221	# define ASM_AMD64_C
				222	#endif
				223
				224	#if (defined ( ASM_X86_V1C ) \|\| defined( ASM_X86_V2 ) \|\| defined( ASM_X86_V2C )) \
				225	&& !defined( _M_IX86 ) \|\| defined( ASM_AMD64_C ) && !defined( _M_X64 )
				226	# error Assembler code is only available for x86 and AMD64 systems
				227	#endif
				228
				229	/* 4. FAST INPUT/OUTPUT OPERATIONS.
				230
				231	On some machines it is possible to improve speed by transferring the
				232	bytes in the input and output arrays to and from the internal 32-bit
				233	variables by addressing these arrays as if they are arrays of 32-bit
				234	words. On some machines this will always be possible but there may
				235	be a large performance penalty if the byte arrays are not aligned on
				236	the normal word boundaries. On other machines this technique will
				237	lead to memory access errors when such 32-bit word accesses are not
				238	properly aligned. The option SAFE_IO avoids such problems but will
				239	often be slower on those machines that support misaligned access
				240	(especially so if care is taken to align the input and output byte
				241	arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
				242	assumed that access to byte arrays as if they are arrays of 32-bit
				243	words will not cause problems when such accesses are misaligned.
				244	*/
				245	#if 1 && !defined( _MSC_VER )
				246	# define SAFE_IO
				247	#endif
				248
				249	/* 5. LOOP UNROLLING
				250
				251	The code for encryption and decrytpion cycles through a number of rounds
				252	that can be implemented either in a loop or by expanding the code into a
				253	long sequence of instructions, the latter producing a larger program but
				254	one that will often be much faster. The latter is called loop unrolling.
				255	There are also potential speed advantages in expanding two iterations in
				256	a loop with half the number of iterations, which is called partial loop
				257	unrolling. The following options allow partial or full loop unrolling
				258	to be set independently for encryption and decryption
				259	*/
				260	#if 1
				261	# define ENC_UNROLL FULL
				262	#elif 0
				263	# define ENC_UNROLL PARTIAL
				264	#else
				265	# define ENC_UNROLL NONE
				266	#endif
				267
				268	#if 1
				269	# define DEC_UNROLL FULL
				270	#elif 0
				271	# define DEC_UNROLL PARTIAL
				272	#else
				273	# define DEC_UNROLL NONE
				274	#endif
				275
				276	#if 1
				277	# define ENC_KS_UNROLL
				278	#endif
				279
				280	#if 1
				281	# define DEC_KS_UNROLL
				282	#endif
				283
				284	/* 6. FAST FINITE FIELD OPERATIONS
				285
				286	If this section is included, tables are used to provide faster finite
				287	field arithmetic (this has no effect if FIXED_TABLES is defined).
				288	*/
				289	#if 1
				290	# define FF_TABLES
				291	#endif
				292
				293	/* 7. INTERNAL STATE VARIABLE FORMAT
				294
				295	The internal state of Rijndael is stored in a number of local 32-bit
				296	word varaibles which can be defined either as an array or as individual
				297	names variables. Include this section if you want to store these local
				298	varaibles in arrays. Otherwise individual local variables will be used.
				299	*/
				300	#if 1
				301	# define ARRAYS
				302	#endif
				303
				304	/* 8. FIXED OR DYNAMIC TABLES
				305
				306	When this section is included the tables used by the code are compiled
				307	statically into the binary file. Otherwise the subroutine aes_init()
				308	must be called to compute them before the code is first used.
				309	*/
				310	#if 1 && !(defined( _MSC_VER ) && ( _MSC_VER <= 800 ))
				311	# define FIXED_TABLES
				312	#endif
				313
				314	/* 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
				315
				316	In some systems it is better to mask longer values to extract bytes
				317	rather than using a cast. This option allows this choice.
				318	*/
				319	#if 0
				320	# define to_byte(x) ((uint_8t)(x))
				321	#else
				322	# define to_byte(x) ((x) & 0xff)
				323	#endif
				324
				325	/* 10. TABLE ALIGNMENT
				326
				327	On some sytsems speed will be improved by aligning the AES large lookup
				328	tables on particular boundaries. This define should be set to a power of
				329	two giving the desired alignment. It can be left undefined if alignment
				330	is not needed. This option is specific to the Microsft VC++ compiler -
				331	it seems to sometimes cause trouble for the VC++ version 6 compiler.
				332	*/
				333
				334	#if 1 && defined( _MSC_VER ) && ( _MSC_VER >= 1300 )
				335	# define TABLE_ALIGN 32
				336	#endif
				337
				338	/* 11. REDUCE CODE AND TABLE SIZE
				339
				340	This replaces some expanded macros with function calls if AES_ASM_V2 or
				341	AES_ASM_V2C are defined
				342	*/
				343
				344	#if 1 && (defined( ASM_X86_V2 ) \|\| defined( ASM_X86_V2C ))
				345	# define REDUCE_CODE_SIZE
				346	#endif
				347
				348	/* 12. TABLE OPTIONS
				349
				350	This cipher proceeds by repeating in a number of cycles known as 'rounds'
				351	which are implemented by a round function which can optionally be speeded
				352	up using tables. The basic tables are each 256 32-bit words, with either
				353	one or four tables being required for each round function depending on
				354	how much speed is required. The encryption and decryption round functions
				355	are different and the last encryption and decrytpion round functions are
				356	different again making four different round functions in all.
				357
				358	This means that:
				359	1. Normal encryption and decryption rounds can each use either 0, 1
				360	or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
				361	2. The last encryption and decryption rounds can also use either 0, 1
				362	or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
				363
				364	Include or exclude the appropriate definitions below to set the number
				365	of tables used by this implementation.
				366	*/
				367
				368	#if 1 /* set tables for the normal encryption round */
				369	# define ENC_ROUND FOUR_TABLES
				370	#elif 0
				371	# define ENC_ROUND ONE_TABLE
				372	#else
				373	# define ENC_ROUND NO_TABLES
				374	#endif
				375
				376	#if 1 /* set tables for the last encryption round */
				377	# define LAST_ENC_ROUND FOUR_TABLES
				378	#elif 0
				379	# define LAST_ENC_ROUND ONE_TABLE
				380	#else
				381	# define LAST_ENC_ROUND NO_TABLES
				382	#endif
				383
				384	#if 1 /* set tables for the normal decryption round */
				385	# define DEC_ROUND FOUR_TABLES
				386	#elif 0
				387	# define DEC_ROUND ONE_TABLE
				388	#else
				389	# define DEC_ROUND NO_TABLES
				390	#endif
				391
				392	#if 1 /* set tables for the last decryption round */
				393	# define LAST_DEC_ROUND FOUR_TABLES
				394	#elif 0
				395	# define LAST_DEC_ROUND ONE_TABLE
				396	#else
				397	# define LAST_DEC_ROUND NO_TABLES
				398	#endif
				399
				400	/* The decryption key schedule can be speeded up with tables in the same
				401	way that the round functions can. Include or exclude the following
				402	defines to set this requirement.
				403	*/
				404	#if 1
				405	# define KEY_SCHED FOUR_TABLES
				406	#elif 0
				407	# define KEY_SCHED ONE_TABLE
				408	#else
				409	# define KEY_SCHED NO_TABLES
				410	#endif
				411
				412	/* ---- END OF USER CONFIGURED OPTIONS ---- */
				413
				414	/* VIA ACE support is only available for VC++ and GCC */
				415
				416	#if !defined( _MSC_VER ) && !defined( __GNUC__ )
				417	# if defined( ASSUME_VIA_ACE_PRESENT )
				418	# undef ASSUME_VIA_ACE_PRESENT
				419	# endif
				420	# if defined( USE_VIA_ACE_IF_PRESENT )
				421	# undef USE_VIA_ACE_IF_PRESENT
				422	# endif
				423	#endif
				424
				425	#if defined( ASSUME_VIA_ACE_PRESENT ) && !defined( USE_VIA_ACE_IF_PRESENT )
				426	# define USE_VIA_ACE_IF_PRESENT
				427	#endif
				428
				429	#if defined( USE_VIA_ACE_IF_PRESENT ) && !defined ( AES_REV_DKS )
				430	# define AES_REV_DKS
				431	#endif
				432
				433	/* ******** UNDEF - we don't use VIA stuff **************** */
				434	#undef USE_VIA_ACE_IF_PRESENT
				435
				436	/* Assembler support requires the use of platform byte order */
				437
				438	#if ( defined( ASM_X86_V1C ) \|\| defined( ASM_X86_V2C ) \|\| defined( ASM_AMD64_C ) ) \
				439	&& (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
				440	# undef ALGORITHM_BYTE_ORDER
				441	# define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
				442	#endif
				443
				444	/* In this implementation the columns of the state array are each held in
				445	32-bit words. The state array can be held in various ways: in an array
				446	of words, in a number of individual word variables or in a number of
				447	processor registers. The following define maps a variable name x and
				448	a column number c to the way the state array variable is to be held.
				449	The first define below maps the state into an array x[c] whereas the
				450	second form maps the state into a number of individual variables x0,
				451	x1, etc. Another form could map individual state colums to machine
				452	register names.
				453	*/
				454
				455	#if defined( ARRAYS )
				456	# define s(x,c) x[c]
				457	#else
				458	# define s(x,c) x##c
				459	#endif
				460
				461	/* This implementation provides subroutines for encryption, decryption
				462	and for setting the three key lengths (separately) for encryption
				463	and decryption. Since not all functions are needed, masks are set
				464	up here to determine which will be implemented in C
				465	*/
				466
				467	#if !defined( AES_ENCRYPT )
				468	# define EFUNCS_IN_C 0
				469	#elif defined( ASSUME_VIA_ACE_PRESENT ) \|\| defined( ASM_X86_V1C ) \
				470	\|\| defined( ASM_X86_V2C ) \|\| defined( ASM_AMD64_C )
				471	# define EFUNCS_IN_C ENC_KEYING_IN_C
				472	#elif !defined( ASM_X86_V2 )
				473	# define EFUNCS_IN_C ( ENCRYPTION_IN_C \| ENC_KEYING_IN_C )
				474	#else
				475	# define EFUNCS_IN_C 0
				476	#endif
				477
				478	#if !defined( AES_DECRYPT )
				479	# define DFUNCS_IN_C 0
				480	#elif defined( ASSUME_VIA_ACE_PRESENT ) \|\| defined( ASM_X86_V1C ) \
				481	\|\| defined( ASM_X86_V2C ) \|\| defined( ASM_AMD64_C )
				482	# define DFUNCS_IN_C DEC_KEYING_IN_C
				483	#elif !defined( ASM_X86_V2 )
				484	# define DFUNCS_IN_C ( DECRYPTION_IN_C \| DEC_KEYING_IN_C )
				485	#else
				486	# define DFUNCS_IN_C 0
				487	#endif
				488
				489	#define FUNCS_IN_C ( EFUNCS_IN_C \| DFUNCS_IN_C )
				490
				491	/* END OF CONFIGURATION OPTIONS */
				492
				493	#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
				494
				495	/* Disable or report errors on some combinations of options */
				496
				497	#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
				498	# undef LAST_ENC_ROUND
				499	# define LAST_ENC_ROUND NO_TABLES
				500	#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
				501	# undef LAST_ENC_ROUND
				502	# define LAST_ENC_ROUND ONE_TABLE
				503	#endif
				504
				505	#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
				506	# undef ENC_UNROLL
				507	# define ENC_UNROLL NONE
				508	#endif
				509
				510	#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
				511	# undef LAST_DEC_ROUND
				512	# define LAST_DEC_ROUND NO_TABLES
				513	#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
				514	# undef LAST_DEC_ROUND
				515	# define LAST_DEC_ROUND ONE_TABLE
				516	#endif
				517
				518	#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
				519	# undef DEC_UNROLL
				520	# define DEC_UNROLL NONE
				521	#endif
				522
				523	#if defined( bswap32 )
				524	# define aes_sw32 bswap32
				525	#elif defined( bswap_32 )
				526	# define aes_sw32 bswap_32
				527	#else
				528	# define brot(x,n) (((uint_32t)(x) << n) \| ((uint_32t)(x) >> (32 - n)))
				529	# define aes_sw32(x) ((brot((x),8) & 0x00ff00ff) \| (brot((x),24) & 0xff00ff00))
				530	#endif
				531
				532	/* upr(x,n): rotates bytes within words by n positions, moving bytes to
				533	higher index positions with wrap around into low positions
				534	ups(x,n): moves bytes by n positions to higher index positions in
				535	words but without wrap around
				536	bval(x,n): extracts a byte from a word
				537
				538	WARNING: The definitions given here are intended only for use with
				539	unsigned variables and with shift counts that are compile
				540	time constants
				541	*/
				542
				543	#if ( ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN )
				544	# define upr(x,n) (((uint_32t)(x) << (8 * (n))) \| ((uint_32t)(x) >> (32 - 8 * (n))))
				545	# define ups(x,n) ((uint_32t) (x) << (8 * (n)))
				546	# define bval(x,n) to_byte((x) >> (8 * (n)))
				547	# define bytes2word(b0, b1, b2, b3) \
				548	(((uint_32t)(b3) << 24) \| ((uint_32t)(b2) << 16) \| ((uint_32t)(b1) << 8) \| (b0))
				549	#endif
				550
				551	#if ( ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN )
				552	# define upr(x,n) (((uint_32t)(x) >> (8 * (n))) \| ((uint_32t)(x) << (32 - 8 * (n))))
				553	# define ups(x,n) ((uint_32t) (x) >> (8 * (n)))
				554	# define bval(x,n) to_byte((x) >> (24 - 8 * (n)))
				555	# define bytes2word(b0, b1, b2, b3) \
				556	(((uint_32t)(b0) << 24) \| ((uint_32t)(b1) << 16) \| ((uint_32t)(b2) << 8) \| (b3))
				557	#endif
				558
				559	#if defined( SAFE_IO )
				560	# define word_in(x,c) bytes2word(((const uint_8t)(x)+4c)[0], ((const uint_8t)(x)+4c)[1], \
				561	((const uint_8t)(x)+4c)[2], ((const uint_8t)(x)+4c)[3])
				562	# define word_out(x,c,v) { ((uint_8t)(x)+4c)[0] = bval(v,0); ((uint_8t)(x)+4c)[1] = bval(v,1); \
				563	((uint_8t)(x)+4c)[2] = bval(v,2); ((uint_8t)(x)+4c)[3] = bval(v,3); }
				564	#elif ( ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER )
				565	# define word_in(x,c) (((uint_32t)(x)+(c)))
				566	# define word_out(x,c,v) (((uint_32t)(x)+(c)) = (v))
				567	#else
				568	# define word_in(x,c) aes_sw32(((uint_32t)(x)+(c)))
				569	# define word_out(x,c,v) (((uint_32t)(x)+(c)) = aes_sw32(v))
				570	#endif
				571
				572	/* the finite field modular polynomial and elements */
				573
				574	#define WPOLY 0x011b
				575	#define BPOLY 0x1b
				576
				577	/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
				578
				579	#define gf_c1 0x80808080
				580	#define gf_c2 0x7f7f7f7f
				581	#define gf_mulx(x) ((((x) & gf_c2) << 1) ^ ((((x) & gf_c1) >> 7) * BPOLY))
				582
				583	/* The following defines provide alternative definitions of gf_mulx that might
				584	give improved performance if a fast 32-bit multiply is not available. Note
				585	that a temporary variable u needs to be defined where gf_mulx is used.
				586
				587	#define gf_mulx(x) (u = (x) & gf_c1, u \|= (u >> 1), ((x) & gf_c2) << 1) ^ ((u >> 3) \| (u >> 6))
				588	#define gf_c4 (0x01010101 * BPOLY)
				589	#define gf_mulx(x) (u = (x) & gf_c1, ((x) & gf_c2) << 1) ^ ((u - (u >> 7)) & gf_c4)
				590	*/
				591
				592	/* Work out which tables are needed for the different options */
				593
				594	#if defined( ASM_X86_V1C )
				595	# if defined( ENC_ROUND )
				596	# undef ENC_ROUND
				597	# endif
				598	# define ENC_ROUND FOUR_TABLES
				599	# if defined( LAST_ENC_ROUND )
				600	# undef LAST_ENC_ROUND
				601	# endif
				602	# define LAST_ENC_ROUND FOUR_TABLES
				603	# if defined( DEC_ROUND )
				604	# undef DEC_ROUND
				605	# endif
				606	# define DEC_ROUND FOUR_TABLES
				607	# if defined( LAST_DEC_ROUND )
				608	# undef LAST_DEC_ROUND
				609	# endif
				610	# define LAST_DEC_ROUND FOUR_TABLES
				611	# if defined( KEY_SCHED )
				612	# undef KEY_SCHED
				613	# define KEY_SCHED FOUR_TABLES
				614	# endif
				615	#endif
				616
				617	#if ( FUNCS_IN_C & ENCRYPTION_IN_C ) \|\| defined( ASM_X86_V1C )
				618	# if ENC_ROUND == ONE_TABLE
				619	# define FT1_SET
				620	# elif ENC_ROUND == FOUR_TABLES
				621	# define FT4_SET
				622	# else
				623	# define SBX_SET
				624	# endif
				625	# if LAST_ENC_ROUND == ONE_TABLE
				626	# define FL1_SET
				627	# elif LAST_ENC_ROUND == FOUR_TABLES
				628	# define FL4_SET
				629	# elif !defined( SBX_SET )
				630	# define SBX_SET
				631	# endif
				632	#endif
				633
				634	#if ( FUNCS_IN_C & DECRYPTION_IN_C ) \|\| defined( ASM_X86_V1C )
				635	# if DEC_ROUND == ONE_TABLE
				636	# define IT1_SET
				637	# elif DEC_ROUND == FOUR_TABLES
				638	# define IT4_SET
				639	# else
				640	# define ISB_SET
				641	# endif
				642	# if LAST_DEC_ROUND == ONE_TABLE
				643	# define IL1_SET
				644	# elif LAST_DEC_ROUND == FOUR_TABLES
				645	# define IL4_SET
				646	# elif !defined(ISB_SET)
				647	# define ISB_SET
				648	# endif
				649	#endif
				650
				651	#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) \|\| defined( ASM_X86_V2C )))
				652	# if ((FUNCS_IN_C & ENC_KEYING_IN_C) \|\| (FUNCS_IN_C & DEC_KEYING_IN_C))
				653	# if KEY_SCHED == ONE_TABLE
				654	# if !defined( FL1_SET ) && !defined( FL4_SET )
				655	# define LS1_SET
				656	# endif
				657	# elif KEY_SCHED == FOUR_TABLES
				658	# if !defined( FL4_SET )
				659	# define LS4_SET
				660	# endif
				661	# elif !defined( SBX_SET )
				662	# define SBX_SET
				663	# endif
				664	# endif
				665	# if (FUNCS_IN_C & DEC_KEYING_IN_C)
				666	# if KEY_SCHED == ONE_TABLE
				667	# define IM1_SET
				668	# elif KEY_SCHED == FOUR_TABLES
				669	# define IM4_SET
				670	# elif !defined( SBX_SET )
				671	# define SBX_SET
				672	# endif
				673	# endif
				674	#endif
				675
				676	/* generic definitions of Rijndael macros that use tables */
				677
				678	#define no_table(x,box,vf,rf,c) bytes2word( \
				679	box[bval(vf(x,0,c),rf(0,c))], \
				680	box[bval(vf(x,1,c),rf(1,c))], \
				681	box[bval(vf(x,2,c),rf(2,c))], \
				682	box[bval(vf(x,3,c),rf(3,c))])
				683
				684	#define one_table(x,op,tab,vf,rf,c) \
				685	( tab[bval(vf(x,0,c),rf(0,c))] \
				686	^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
				687	^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
				688	^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
				689
				690	#define four_tables(x,tab,vf,rf,c) \
				691	( tab[0][bval(vf(x,0,c),rf(0,c))] \
				692	^ tab[1][bval(vf(x,1,c),rf(1,c))] \
				693	^ tab[2][bval(vf(x,2,c),rf(2,c))] \
				694	^ tab[3][bval(vf(x,3,c),rf(3,c))])
				695
				696	#define vf1(x,r,c) (x)
				697	#define rf1(r,c) (r)
				698	#define rf2(r,c) ((8+r-c)&3)
				699
				700	/* perform forward and inverse column mix operation on four bytes in long word x in */
				701	/* parallel. NOTE: x must be a simple variable, NOT an expression in these macros. */
				702
				703	#if !(defined( REDUCE_CODE_SIZE ) && (defined( ASM_X86_V2 ) \|\| defined( ASM_X86_V2C )))
				704
				705	#if defined( FM4_SET ) /* not currently used */
				706	# define fwd_mcol(x) four_tables(x,t_use(f,m),vf1,rf1,0)
				707	#elif defined( FM1_SET ) /* not currently used */
				708	# define fwd_mcol(x) one_table(x,upr,t_use(f,m),vf1,rf1,0)
				709	#else
				710	# define dec_fmvars uint_32t g2
				711	# define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ upr((x), 2) ^ upr((x), 1))
				712	#endif
				713
				714	#if defined( IM4_SET )
				715	# define inv_mcol(x) four_tables(x,t_use(i,m),vf1,rf1,0)
				716	#elif defined( IM1_SET )
				717	# define inv_mcol(x) one_table(x,upr,t_use(i,m),vf1,rf1,0)
				718	#else
				719	# define dec_imvars uint_32t g2, g4, g9
				720	# define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = (x) ^ gf_mulx(g4), g4 ^= g9, \
				721	(x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ upr(g4, 2) ^ upr(g9, 1))
				722	#endif
				723
				724	#if defined( FL4_SET )
				725	# define ls_box(x,c) four_tables(x,t_use(f,l),vf1,rf2,c)
				726	#elif defined( LS4_SET )
				727	# define ls_box(x,c) four_tables(x,t_use(l,s),vf1,rf2,c)
				728	#elif defined( FL1_SET )
				729	# define ls_box(x,c) one_table(x,upr,t_use(f,l),vf1,rf2,c)
				730	#elif defined( LS1_SET )
				731	# define ls_box(x,c) one_table(x,upr,t_use(l,s),vf1,rf2,c)
				732	#else
				733	# define ls_box(x,c) no_table(x,t_use(s,box),vf1,rf2,c)
				734	#endif
				735
				736	#endif
				737
				738	#if defined( ASM_X86_V1C ) && defined( AES_DECRYPT ) && !defined( ISB_SET )
				739	# define ISB_SET
				740	#endif
				741
				742	#endif