Blame - jni/libzrtp/sources/bnlib/ec/ec.c - jami-client-android

blob: 18e612f82107b353be27e9c7a2310e0c5e336960 [file] [log] [blame]

Alexandre Lision	7fd5d3d	2013-12-04 13:06:40 -0500	[diff] [blame]	1	/*
				2	* Copyright (C) 2012-2013 Werner Dittmann
				3	* All rights reserved. For licensing and other legal details, see the file legal.c.
				4	*
				5	* @author Werner Dittmann <Werner.Dittmann@t-online.de>
				6	*
				7	*/
				8	#include <stdio.h>
				9	#include <stdlib.h>
				10	#include <stdint.h>
				11
				12	#include <bn.h>
				13	#include <bnprint.h>
				14
				15	#include <ec/ec.h>
				16
				17	static BigNum _mpiZero;
				18	static BigNum _mpiOne;
				19	static BigNum _mpiTwo;
				20	static BigNum _mpiThree;
				21	static BigNum _mpiFour;
				22	static BigNum _mpiEight;
				23
				24	static BigNum* mpiZero = &_mpiZero;
				25	static BigNum* mpiOne = &_mpiOne;
				26	static BigNum* mpiTwo = &_mpiTwo;
				27	static BigNum* mpiThree = &_mpiThree;
				28	static BigNum* mpiFour = &_mpiFour;
				29	static BigNum* mpiEight = &_mpiEight;
				30	static int initialized = 0;
				31
				32
				33	/* The following parameters are given:
				34	- The prime modulus p
				35	- The order n
				36	- The 160-bit input seed SEED to the SHA-1 based algorithm (i.e., the domain parameter seed)
				37	- The output c of the SHA-1 based algorithm
				38	- The coefficient b (satisfying b2 c ≡ –27 (mod p))
				39	- The base point x coordinate Gx
				40	- The base point y coordinate Gy
				41	*/
				42
				43	typedef struct _curveData {
				44	char *p;
				45	char *n;
				46	char *SEED;
				47	char *c;
				48	char *b;
				49	char *Gx;
				50	char *Gy;
				51	} curveData;
				52
				53	static curveData nist192 = {
				54	"6277101735386680763835789423207666416083908700390324961279",
				55	"6277101735386680763835789423176059013767194773182842284081",
				56	"3045ae6fc8422f64ed579528d38120eae12196d5",
				57	"3099d2bbbfcb2538542dcd5fb078b6ef5f3d6fe2c745de65",
				58	"64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1",
				59	"188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012",
				60	"07192b95ffc8da78631011ed6b24cdd573f977a11e794811",
				61	};
				62
				63	static curveData nist224 = {
				64	"26959946667150639794667015087019630673557916260026308143510066298881",
				65	"26959946667150639794667015087019625940457807714424391721682722368061",
				66	"bd71344799d5c7fcdc45b59fa3b9ab8f6a948bc5",
				67	"5b056c7e11dd68f40469ee7f3c7a7d74f7d121116506d031218291fb",
				68	"b4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4",
				69	"b70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
				70	"bd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34",
				71	};
				72
				73	static curveData nist256 = {
				74	"115792089210356248762697446949407573530086143415290314195533631308867097853951",
				75	"115792089210356248762697446949407573529996955224135760342422259061068512044369",
				76	"c49d360886e704936a6678e1139d26b7819f7e90",
				77	"7efba1662985be9403cb055c75d4f7e0ce8d84a9c5114abcaf3177680104fa0d",
				78	"5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b",
				79	"6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
				80	"4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5",
				81	};
				82
				83	static curveData nist384 = {
				84	"39402006196394479212279040100143613805079739270465446667948293404245721771496870329047266088258938001861606973112319",
				85	"39402006196394479212279040100143613805079739270465446667946905279627659399113263569398956308152294913554433653942643",
				86	"a335926aa319a27a1d00896a6773a4827acdac73",
				87	"79d1e655f868f02fff48dcdee14151ddb80643c1406d0ca10dfe6fc52009540a495e8042ea5f744f6e184667cc722483",
				88	"b3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875ac656398d8a2ed19d2a85c8edd3ec2aef",
				89	"aa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a385502f25dbf55296c3a545e3872760ab7",
				90	"3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c00a60b1ce1d7e819d7a431d7c90ea0e5f",
				91	};
				92
				93	static curveData nist521 = {
				94	"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151",
				95	"6864797660130609714981900799081393217269435300143305409394463459185543183397655394245057746333217197532963996371363321113864768612440380340372808892707005449",
				96	"d09e8800291cb85396cc6717393284aaa0da64ba",
				97	"0b48bfa5f420a34949539d2bdfc264eeeeb077688e44fbf0ad8f6d0edb37bd6b533281000518e19f1b9ffbe0fe9ed8a3c2200b8f875e523868c70c1e5bf55bad637",
				98	"051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
				99	"c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
				100	"11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650",
				101	};
				102
				103
				104	/*
				105	* The data for curve3617 copied from:
				106	* http://safecurves.cr.yp.to/field.html
				107	* http://safecurves.cr.yp.to/base.html
				108	*/
				109	static curveData curve3617 = {
				110	"3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffef", /* Prime */
				111	"7ffffffffffffffffffffffffffffffffffffffffffffffffffeb3cc92414cf706022b36f1c0338ad63cf181b0e71a5e106af79", /* order */
				112	"", /* SEED */
				113	"", /* c */
				114	"", /* b */
				115	"1a334905141443300218c0631c326e5fcd46369f44c03ec7f57ff35498a4ab4d6d6ba111301a73faa8537c64c4fd3812f3cbc595", /* Gx*/
				116	"22", /* Gy (radix 16) */
				117	};
				118
				119	/*
				120	* The data for curve25519 copied from:
				121	* http://safecurves.cr.yp.to/field.html
				122	* http://safecurves.cr.yp.to/base.html
				123	*
				124	* Note:
				125	* The data for Curve25519 is here for the sake of completeness and to have the same
				126	* set of initialization. One exception if the base point X coordinate (Gx) that we use to
				127	* compute the DH public value, refer to function ecdhGeneratePublic(...) in ecdh.c.
				128	*
				129	* Otherwise the functions use EcCurve structure only to get the pointers to the Curve25519
				130	* wrapper functions.
				131	*
				132	*/
				133	static curveData curve25519 = {
				134	"7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed", /* Prime */
				135	"1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed", /* order */
				136	"", /* SEED */
				137	"", /* c */
				138	"", /* b */
				139	"9", /* Gx */
				140	"20ae19a1b8a086b4e01edd2c7748d14c923d4d7e6d7c61b229e9c5a27eced3d9", /* Gy */
				141	};
				142
				143	/============================================================================/
				144	/* Bignum Shorthand Functions */
				145	/============================================================================/
				146
				147	int bnAddMod_ (struct BigNum rslt, struct BigNum n1, struct BigNum *mod)
				148	{
				149	bnAdd (rslt, n1);
				150	if (bnCmp (rslt, mod) >= 0) {
				151	bnSub (rslt, mod);
				152	}
				153	return 0;
				154	}
				155
				156	int bnAddQMod_ (struct BigNum rslt, unsigned n1, struct BigNum mod)
				157	{
				158	bnAddQ (rslt, n1);
				159	if (bnCmp (rslt, mod) >= 0) {
				160	bnSub (rslt, mod);
				161	}
				162	return 0;
				163	}
				164
				165	int bnSubMod_ (struct BigNum rslt, struct BigNum n1, struct BigNum *mod)
				166	{
				167	if (bnCmp (rslt, n1) < 0) {
				168	bnAdd (rslt, mod);
				169	}
				170	bnSub (rslt, n1);
				171	return 0;
				172	}
				173
				174	int bnSubQMod_ (struct BigNum rslt, unsigned n1, struct BigNum mod)
				175	{
				176	if (bnCmpQ (rslt, n1) < 0) {
				177	bnAdd (rslt, mod);
				178	}
				179	bnSubQ (rslt, n1);
				180	return 0;
				181	}
				182
				183	int bnMulMod_ (struct BigNum rslt, struct BigNum n1, struct BigNum n2, struct BigNum mod, const EcCurve *curve)
				184	{
				185	bnMul (rslt, n1, n2);
				186	if (curve)
				187	curve->modOp(rslt, rslt, mod);
				188	else
				189	bnMod(rslt, rslt, mod);
				190	return 0;
				191	}
				192
				193	int bnMulQMod_ (struct BigNum rslt, struct BigNum n1, unsigned n2, struct BigNum mod, const EcCurve curve)
				194	{
				195	bnMulQ (rslt, n1, n2);
				196	if (curve)
				197	curve->modOp(rslt, rslt, mod);
				198	else
				199	bnMod(rslt, rslt, mod);
				200	return 0;
				201	}
				202
				203	int bnSquareMod_ (struct BigNum rslt, struct BigNum n1, struct BigNum mod, const EcCurve curve)
				204	{
				205	bnSquare (rslt, n1);
				206	if (curve)
				207	curve->modOp(rslt, rslt, mod);
				208	else
				209	bnMod(rslt, rslt, mod);
				210	return 0;
				211	}
				212
				213	/*
				214	* Note on the Curve25519 functions and usage of BigNumber:
				215	* In most cases the functions to compute Curve25519 data are small wrapper functions
				216	* that implement the same API as for the other curve functions. The wrapper functions
				217	* then call the very specific, high-efficient function in curve25519-donna.c .
				218	*
				219	* For Curve25519 we don't have a real implementation for point add, point doubling, modulo
				220	* and check public key. Please refer to the actual implementations below.
				221	*/
				222
				223	static int ecGetAffineNist(const EcCurve curve, EcPoint R, const EcPoint *P);
				224	static int ecGetAffineEd(const EcCurve curve, EcPoint R, const EcPoint *P);
				225	static int ecGetAffine25519(const EcCurve curve, EcPoint R, const EcPoint *P);
				226
				227	static int ecDoublePointNist(const EcCurve curve, EcPoint R, const EcPoint *P);
				228	static int ecDoublePointEd(const EcCurve curve, EcPoint R, const EcPoint *P);
				229	static int ecDoublePoint25519(const EcCurve curve, EcPoint R, const EcPoint *P);
				230
				231	static int ecAddPointNist(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q);
				232	static int ecAddPointEd(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q);
				233	static int ecAddPoint25519(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q);
				234
				235	static int ecCheckPubKeyNist(const EcCurve curve, const EcPoint pub);
				236	static int ecCheckPubKey3617(const EcCurve curve, const EcPoint pub);
				237	static int ecCheckPubKey25519(const EcCurve curve, const EcPoint pub);
				238
				239	static int ecGenerateRandomNumberNist(const EcCurve curve, BigNum d);
				240	static int ecGenerateRandomNumber3617(const EcCurve curve, BigNum d);
				241	static int ecGenerateRandomNumber25519(const EcCurve curve, BigNum d);
				242
				243	static int ecMulPointScalarNormal(const EcCurve curve, EcPoint R, const EcPoint P, const BigNum scalar);
				244	static int ecMulPointScalar25519(const EcCurve curve, EcPoint R, const EcPoint P, const BigNum scalar);
				245
				246	/* Forward declaration of new modulo functions for the EC curves */
				247	static int newMod192(BigNum r, const BigNum a, const BigNum *modulo);
				248	static int newMod256(BigNum r, const BigNum a, const BigNum *modulo);
				249	static int newMod384(BigNum r, const BigNum a, const BigNum *modulo);
				250	static int newMod521(BigNum r, const BigNum a, const BigNum *modulo);
				251
				252	static int mod3617(BigNum r, const BigNum a, const BigNum *modulo);
				253	static int mod25519(BigNum r, const BigNum a, const BigNum *modulo);
				254
				255	static void commonInit()
				256	{
				257	bnBegin(mpiZero); bnSetQ(mpiZero, 0);
				258	bnBegin(mpiOne); bnSetQ(mpiOne, 1);
				259	bnBegin(mpiTwo); bnSetQ(mpiTwo, 2);
				260	bnBegin(mpiThree); bnSetQ(mpiThree, 3);
				261	bnBegin(mpiFour); bnSetQ(mpiFour, 4);
				262	bnBegin(mpiEight); bnSetQ(mpiEight, 8);
				263	}
				264
				265	static void curveCommonInit(EcCurve *curve)
				266	{
				267	/* Initialize scratchpad variables and their pointers */
				268	bnBegin(&curve->_S1); curve->S1 = &curve->_S1;
				269	bnBegin(&curve->_U1); curve->U1 = &curve->_U1;
				270	bnBegin(&curve->_H); curve->H = &curve->_H;
				271	bnBegin(&curve->_R); curve->R = &curve->_R;
				272	bnBegin(&curve->_t0); curve->t0 = &curve->_t0;
				273	bnBegin(&curve->_t1); curve->t1 = &curve->_t1;
				274	bnBegin(&curve->_t2); curve->t2 = &curve->_t2;
				275	bnBegin(&curve->_t3); curve->t3 = &curve->_t3;
				276	}
				277
				278	static void curveCommonPrealloc(EcCurve *curve)
				279	{
				280	size_t maxBits;
				281
				282	/* variables must be able to hold p^2, plus one nimb (min. 15 bits) for overflow */
				283	maxBits = bnBits(curve->p) * 2 + 15;
				284
				285	/* The set_bit allocates enough memory to hold maximum values */
				286	/* Initialize scratchpad variables before use */
				287	bnPrealloc(curve->S1, maxBits);
				288	bnPrealloc(curve->U1, maxBits);
				289	bnPrealloc(curve->H, maxBits);
				290	bnPrealloc(curve->R, maxBits);
				291	bnPrealloc(curve->S1, maxBits);
				292	bnPrealloc(curve->t1, maxBits);
				293	bnPrealloc(curve->t2, maxBits);
				294	bnPrealloc(curve->t3, maxBits);
				295	}
				296
				297	int ecGetCurveNistECp(Curves curveId, EcCurve *curve)
				298	{
				299	curveData *cd;
				300
				301	if (curveId >= Curve25519 && curveId <= Curve3617)
				302	return ecGetCurvesCurve(curveId, curve);
				303
				304	if (!initialized) {
				305	commonInit();
				306	initialized = 1;
				307	}
				308	if (curve == NULL)
				309	return -2;
				310
				311	bnBegin(&curve->_p); curve->p = &curve->_p;
				312	bnBegin(&curve->_n); curve->n = &curve->_n;
				313	bnBegin(&curve->_SEED); curve->SEED = &curve->_SEED;
				314	bnBegin(&curve->_c); curve->c = &curve->_c;
				315	bnBegin(&curve->_a); curve->a = &curve->_a;
				316	bnBegin(&curve->_b); curve->b = &curve->_b;
				317	bnBegin(&curve->_Gx); curve->Gx = &curve->_Gx;
				318	bnBegin(&curve->_Gy); curve->Gy = &curve->_Gy;
				319
				320	curveCommonInit(curve);
				321
				322	switch (curveId) {
				323	case NIST192P:
				324	cd = &nist192;
				325	curve->modOp = newMod192;
				326	break;
				327
				328	case NIST224P:
				329	cd = &nist224;
				330	curve->modOp = bnMod;
				331	break;
				332
				333	case NIST256P:
				334	cd = &nist256;
				335	curve->modOp = bnMod;
				336	break;
				337
				338	case NIST384P:
				339	cd = &nist384;
				340	curve->modOp = newMod384;
				341	break;
				342
				343	case NIST521P:
				344	cd = &nist521;
				345	curve->modOp = newMod521;
				346	break;
				347
				348	default:
				349	return -2;
				350	}
				351
				352	curve->affineOp = ecGetAffineNist;
				353	curve->doubleOp = ecDoublePointNist;
				354	curve->addOp = ecAddPointNist;
				355	curve->checkPubOp = ecCheckPubKeyNist;
				356	curve->randomOp = ecGenerateRandomNumberNist;
				357	curve->mulScalar = ecMulPointScalarNormal;
				358
				359	bnReadAscii(curve->p, cd->p, 10);
				360	bnReadAscii(curve->n, cd->n, 10);
				361	bnReadAscii(curve->SEED, cd->SEED, 16);
				362	bnReadAscii(curve->c, cd->c, 16);
				363	bnCopy(curve->a, curve->p);
				364	bnSub(curve->a, mpiThree);
				365	bnReadAscii(curve->b, cd->b, 16);
				366	bnReadAscii(curve->Gx, cd->Gx, 16);
				367	bnReadAscii(curve->Gy, cd->Gy, 16);
				368
				369	curveCommonPrealloc(curve);
				370	curve->id = curveId;
				371
				372	return 0;
				373	}
				374
				375	int ecGetCurvesCurve(Curves curveId, EcCurve *curve)
				376	{
				377	curveData *cd;
				378
				379	if (!initialized) {
				380	commonInit();
				381	initialized = 1;
				382	}
				383	if (curve == NULL)
				384	return -2;
				385
				386	/* set-up all bignum structures, simplifies "free" handling */
				387	bnBegin(&curve->_p); curve->p = &curve->_p;
				388	bnBegin(&curve->_n); curve->n = &curve->_n;
				389	bnBegin(&curve->_SEED); curve->SEED = &curve->_SEED;
				390	bnBegin(&curve->_c); curve->c = &curve->_c;
				391	bnBegin(&curve->_a); curve->a = &curve->_a;
				392	bnBegin(&curve->_b); curve->b = &curve->_b;
				393	bnBegin(&curve->_Gx); curve->Gx = &curve->_Gx;
				394	bnBegin(&curve->_Gy); curve->Gy = &curve->_Gy;
				395
				396	curveCommonInit(curve);
				397
				398	switch (curveId) {
				399	case Curve3617:
				400	cd = &curve3617;
				401	curve->modOp = mod3617;
				402	curve->affineOp = ecGetAffineEd;
				403	curve->doubleOp = ecDoublePointEd;
				404	curve->addOp = ecAddPointEd;
				405	curve->checkPubOp = ecCheckPubKey3617;
				406	curve->randomOp = ecGenerateRandomNumber3617;
				407	curve->mulScalar = ecMulPointScalarNormal;
				408
				409	bnReadAscii(curve->a, "3617", 10);
				410	break;
				411
				412	case Curve25519:
				413	cd = &curve25519;
				414	curve->modOp = mod25519;
				415	curve->affineOp = ecGetAffine25519;
				416	curve->doubleOp = ecDoublePoint25519;
				417	curve->addOp = ecAddPoint25519;
				418	curve->checkPubOp = ecCheckPubKey25519;
				419	curve->randomOp = ecGenerateRandomNumber25519;
				420	curve->mulScalar = ecMulPointScalar25519;
				421
				422	bnReadAscii(curve->a, "486662", 10);
				423	break;
				424
				425	default:
				426	return -2;
				427	}
				428	bnReadAscii(curve->p, cd->p, 16);
				429	bnReadAscii(curve->n, cd->n, 16);
				430
				431	bnReadAscii(curve->Gx, cd->Gx, 16);
				432	bnReadAscii(curve->Gy, cd->Gy, 16);
				433
				434	curveCommonPrealloc(curve);
				435	curve->id = curveId;
				436	return 0;
				437	}
				438
				439	void ecFreeCurveNistECp(EcCurve *curve)
				440	{
				441	if (curve == NULL)
				442	return;
				443
				444	bnEnd(curve->p);
				445	bnEnd(curve->n);
				446	bnEnd(curve->SEED);
				447	bnEnd(curve->c);
				448	bnEnd(curve->b);
				449	bnEnd(curve->Gx);
				450	bnEnd(curve->Gy);
				451
				452	bnEnd(curve->S1);
				453	bnEnd(curve->U1);
				454	bnEnd(curve->H);
				455	bnEnd(curve->R);
				456	bnEnd(curve->t0);
				457	bnEnd(curve->t1);
				458	bnEnd(curve->t2);
				459	bnEnd(curve->t3);
				460	}
				461
				462	/*
				463	* EC point helper functions
				464	*/
				465
				466	void ecInitPoint(EcPoint *P)
				467	{
				468	INIT_EC_POINT(P);
				469	}
				470
				471	void ecFreePoint(EcPoint *P)
				472	{
				473	FREE_EC_POINT(P);
				474	}
				475
				476	void ecSetBasePoint(EcCurve C, EcPoint P)
				477	{
				478	SET_EC_BASE_POINT(C, P);
				479	}
				480
				481	void ecFreeCurvesCurve(EcCurve *curve)
				482	{
				483	ecFreeCurveNistECp(curve);
				484	}
				485
				486	/============================================================================/
				487	/* Elliptic Curve arithmetic */
				488	/============================================================================/
				489
				490	int ecGetAffine(const EcCurve curve, EcPoint R, const EcPoint *P)
				491	{
				492	return curve->affineOp(curve, R, P);
				493	}
				494
				495	static int ecGetAffineNist(const EcCurve curve, EcPoint R, const EcPoint *P)
				496	{
				497	int ret = 0;
				498
				499	struct BigNum z_1, z_2;
				500
				501	bnBegin(&z_1);
				502	bnBegin(&z_2);
				503
				504	/* affine x = X / Z^2 */
				505	bnInv (&z_1, P->z, curve->p); /* z_1 = Z^(-1) */
				506	bnMulMod_(&z_2, &z_1, &z_1, curve->p, curve); /* z_2 = Z^(-2) */
				507	bnMulMod_(R->x, P->x, &z_2, curve->p, curve);
				508
				509	/* affine y = Y / Z^3 */
				510	bnMulMod_(&z_2, &z_2, &z_1, curve->p, curve); /* z_2 = Z^(-3) */
				511	bnMulMod_(R->y, P->y, &z_2, curve->p, curve);
				512
				513	bnSetQ(R->z, 1);
				514
				515	bnEnd(&z_1);
				516	bnEnd(&z_2);
				517	return ret;
				518	}
				519
				520	static int ecGetAffineEd(const EcCurve curve, EcPoint R, const EcPoint *P)
				521	{
				522	int ret = 0;
				523
				524	struct BigNum z_1;
				525
				526	bnBegin(&z_1);
				527
				528	/* affine x = X / Z */
				529	bnInv (&z_1, P->z, curve->p); /* z_1 = Z^(-1) */
				530	bnMulMod_(R->x, P->x, &z_1, curve->p, curve);
				531
				532	/* affine y = Y / Z */
				533	bnMulMod_(R->y, P->y, &z_1, curve->p, curve);
				534
				535	bnSetQ(R->z, 1);
				536
				537	bnEnd(&z_1);
				538	return ret;
				539
				540	}
				541
				542	/*
				543	* If the arguments do not point to the same EcPoint then copy P to result.
				544	* Curve25519 has no specific GetAffine function, it's all inside curve25519-donna
				545	*/
				546	static int ecGetAffine25519(const EcCurve curve, EcPoint R, const EcPoint *P)
				547	{
				548	if (R != P) {
				549	bnCopy(R->x, P->x);
				550	bnCopy(R->y, P->y);
				551	bnCopy(R->z, P->z);
				552	}
				553	return 0;
				554	}
				555
				556	int ecDoublePoint(const EcCurve curve, EcPoint R, const EcPoint *P)
				557	{
				558	return curve->doubleOp(curve, R, P);
				559	}
				560
				561	static int ecDoublePointNist(const EcCurve curve, EcPoint R, const EcPoint *P)
				562	{
				563	int ret = 0;
				564
				565	EcPoint tP;
				566	const EcPoint *ptP = 0;
				567
				568	if (!bnCmp(P->y, mpiZero) \|\| !bnCmp(P->z, mpiZero)) {
				569	bnSetQ(R->x, 1);
				570	bnSetQ(R->y, 1);
				571	bnSetQ(R->z, 0);
				572	return 0;
				573	}
				574
				575	/* Check for overlapping arguments, copy if necessary and set pointer */
				576	if (P == R) {
				577	INIT_EC_POINT(&tP);
				578	ptP = &tP;
				579	bnCopy(tP.x, P->x);
				580	bnCopy(tP.y, P->y);
				581	bnCopy(tP.z, P->z);
				582	}
				583	else
				584	ptP = P;
				585
				586	/* S = 4XY^2, save Y^2 in t1 for later use */
				587	bnMulMod_(curve->t1, ptP->y, ptP->y, curve->p, curve); /* t1 = Y^2 */
				588	bnMulMod_(curve->t0, ptP->x, mpiFour, curve->p, curve); /* t0 = 4 * X */
				589	bnMulMod_(curve->S1, curve->t0, curve->t1, curve->p, curve); /* S1 = t0 * t1 */
				590
				591	/* M = 3(X + Z^2)(X - Z^2), use scratch variable U1 to store M value */
				592	bnMulMod_(curve->t2, ptP->z, ptP->z, curve->p, curve); /* t2 = Z^2 */
				593	bnCopy(curve->t0, ptP->x);
				594	bnAddMod_(curve->t0, curve->t2, curve->p); /* t0 = X + t2 */
				595	bnMulMod_(curve->t3, curve->t0, mpiThree, curve->p, curve); /* t3 = 3 * t0 */
				596	bnCopy(curve->t0, ptP->x);
				597	bnSubMod_(curve->t0, curve->t2, curve->p); /* t0 = X - t2 */
				598	bnMulMod_(curve->U1, curve->t3, curve->t0, curve->p, curve); /* M = t3 * t0 */
				599
				600	/* X' = M^2 - 2S /
				601	bnMulMod_(curve->t2, curve->U1, curve->U1, curve->p, curve); /* t2 = M^2 */
				602	bnMulMod_(curve->t0, curve->S1, mpiTwo, curve->p, curve); /* t0 = S * 2 */
				603	bnCopy(R->x, curve->t2);
				604	bnSubMod_(R->x, curve->t0, curve->p); /* X' = t2 - t0 */
				605
				606	/* Y' = M(S - X') - 8Y^4 */
				607	bnMulMod_(curve->t3, curve->t1, curve->t1, curve->p, curve); /* t3 = Y^4 (t1 saved above) */
				608	bnMulMod_(curve->t2, curve->t3, mpiEight, curve->p, curve); /* t2 = t3 * 8 */
				609	bnCopy(curve->t3, curve->S1);
				610	bnSubMod_(curve->t3, R->x, curve->p); /* t3 = S - X' */
				611	bnMulMod_(curve->t0, curve->U1, curve->t3, curve->p, curve); /* t0 = M * t3 */
				612	bnCopy(R->y, curve->t0);
				613	bnSubMod_(R->y, curve->t2, curve->p); /* Y' = t0 - t2 */
				614
				615	/* Z' = 2YZ */
				616	bnMulMod_(curve->t0, ptP->y, mpiTwo, curve->p, curve); /* t0 = 2 * Y */
				617	bnMulMod_(R->z, curve->t0, ptP->z, curve->p, curve); /* Z' = to * Z */
				618
				619	if (P == R)
				620	FREE_EC_POINT(&tP);
				621
				622	return ret;
				623	}
				624
				625	static int ecDoublePointEd(const EcCurve curve, EcPoint R, const EcPoint *P)
				626	{
				627	EcPoint tP;
				628	const EcPoint *ptP = 0;
				629
				630	/* Check for overlapping arguments, copy if necessary and set pointer */
				631	if (P == R) {
				632	INIT_EC_POINT(&tP);
				633	ptP = &tP;
				634	bnCopy(tP.x, P->x);
				635	bnCopy(tP.y, P->y);
				636	bnCopy(tP.z, P->z);
				637	}
				638	else
				639	ptP = P;
				640
				641	/* Compute B, C, D, H, E */
				642	bnCopy(curve->t1, ptP->x);
				643	bnAddMod_(curve->t1, ptP->y, curve->p);
				644	bnSquareMod_(curve->t0, curve->t1, curve->p, curve); /* t0 -> B */
				645
				646	bnSquareMod_(R->x, ptP->x, curve->p, curve); /* Rx -> C */
				647
				648	bnSquareMod_(R->y, ptP->y, curve->p, curve); /* Ry -> D */
				649
				650	bnSquareMod_(R->z, ptP->z, curve->p, curve); /* Rz -> H */
				651	bnAddMod_(R->z, R->z, curve->p); /* Rz -> 2H */
				652
				653	bnCopy(curve->t1, R->x);
				654	bnAddMod_(curve->t1, R->y, curve->p); /* t1 -> E */
				655
				656	/* Compute Ry */
				657	bnCopy(curve->t2, R->x);
				658	bnSubMod_(curve->t2, R->y, curve->p); /* C - D */
				659	bnMulMod_(R->y, curve->t1, curve->t2, curve->p, curve); /* E * t3; Ry */
				660
				661	/* Compute Rx */
				662	bnSubMod_(curve->t0, curve->t1, curve->p); /* B - E; sub result */
				663	bnCopy(curve->t2, curve->t1);
				664	bnSubMod_(curve->t2, R->z, curve->p); /* t2 -> J; (E - 2H) */
				665	bnMulMod_(R->x, curve->t2, curve->t0, curve->p, curve); /* J * t0 */
				666
				667	/* Compute Rz */
				668	bnMulMod_(R->z, curve->t2, curve->t1, curve->p, curve); /* J * E */
				669
				670	if (P == R)
				671	FREE_EC_POINT(&tP);
				672
				673	return 0;
				674	}
				675
				676	/*
				677	* Curve25519 has no specific Double Point function, all inside curve25519-donna
				678	*/
				679	static int ecDoublePoint25519(const EcCurve curve, EcPoint R, const EcPoint *P)
				680	{
				681	return -2;
				682	}
				683
				684	/* Add two elliptic curve points. Any of them may be the same object. */
				685	int ecAddPoint(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q)
				686	{
				687	return curve->addOp(curve, R, P, Q);
				688	}
				689
				690	static int ecAddPointNist(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q)
				691	{
				692	int ret = 0;
				693
				694	EcPoint tP, tQ;
				695	const EcPoint *ptP = 0;
				696	const EcPoint *ptQ = 0;
				697
				698
				699	/* Fast check if application called add(R, P, P) */
				700	if (!bnCmp(P->x, Q->x) && !bnCmp(P->y, Q->y) && !bnCmp(P->z, Q->z)) {
				701	return ecDoublePoint(curve, R, P);
				702	}
				703
				704	/* if P is (@,@), R = Q */
				705	if (!bnCmp(P->z, mpiZero)) {
				706	bnCopy(R->x, Q->x);
				707	bnCopy(R->y, Q->y);
				708	bnCopy(R->z, Q->z);
				709	return 0;
				710	}
				711
				712	/* if Q is (@,@), R = P */
				713	if (!bnCmp(Q->z, mpiZero)) {
				714	bnCopy(R->x, P->x);
				715	bnCopy(R->y, P->y);
				716	bnCopy(R->z, P->z);
				717	return 0;
				718	}
				719
				720	/* Check for overlapping arguments, copy if necessary and set pointers */
				721	if (P == R) {
				722	INIT_EC_POINT(&tP);
				723	ptP = &tP;
				724	bnCopy(tP.x, P->x);
				725	bnCopy(tP.y, P->y);
				726	bnCopy(tP.z, P->z);
				727	}
				728	else
				729	ptP = P;
				730
				731	if (Q == R) {
				732	INIT_EC_POINT(&tQ);
				733	ptQ = &tQ;
				734	bnCopy(tQ.x, Q->x);
				735	bnCopy(tQ.y, Q->y);
				736	bnCopy(tQ.z, Q->z);
				737	}
				738	else
				739	ptQ = Q;
				740
				741	/* U1 = X1Z2^2, where X1: P->x, Z2: Q->z /
				742	bnMulMod_(curve->t1, ptQ->z, ptQ->z, curve->p, curve); /* t1 = Z2^2 */
				743	bnMulMod_(curve->U1, ptP->x, curve->t1, curve->p, curve); /* U1 = X1 * z_2 */
				744
				745	/* S1 = Y1Z2^3, where Y1: P->y /
				746	bnMulMod_(curve->t1, curve->t1, ptQ->z, curve->p, curve); /* t1 = Z2^3 */
				747	bnMulMod_(curve->S1, ptP->y, curve->t1, curve->p, curve); /* S1 = Y1 * z_2 */
				748
				749	/* U2 = X2Z1^2, where X2: Q->x, Z1: P->z /
				750	bnMulMod_(curve->t1, ptP->z, ptP->z, curve->p, curve); /* t1 = Z1^2 */
				751	bnMulMod_(curve->H, ptQ->x, curve->t1, curve->p, curve); /* H = X2 * t1 (store U2 in H) */
				752
				753	/* H = U2 - U1 */
				754	bnSubMod_(curve->H, curve->U1, curve->p);
				755
				756	/* S2 = Y2Z1^3, where Y2: Q->y /
				757	bnMulMod_(curve->t1, curve->t1, ptP->z, curve->p, curve); /* t1 = Z1^3 */
				758	bnMulMod_(curve->R, ptQ->y, curve->t1, curve->p, curve); /* R = Y2 * t1 (store S2 in R) */
				759
				760	/* R = S2 - S1 */
				761	bnSubMod_(curve->R, curve->S1, curve->p);
				762
				763	/* if (U1 == U2), i.e H is zero */
				764	if (!bnCmp(curve->H, mpiZero)) {
				765
				766	/* if (S1 != S2), i.e. R is _not_ zero: return infinity*/
				767	if (bnCmp(curve->R, mpiZero)) {
				768	bnSetQ(R->x, 1);
				769	bnSetQ(R->y, 1);
				770	bnSetQ(R->z, 0);
				771	return 0;
				772	}
				773	return ecDoublePoint(curve, R, P);
				774	}
				775	/* X3 = R^2 - H^3 - 2U1H^2, where X3: R->x */
				776	bnMulMod_(curve->t0, curve->H, curve->H, curve->p, curve); /* t0 = H^2 */
				777	bnMulMod_(curve->t1, curve->U1, curve->t0, curve->p, curve); /* t1 = U1 * t0, (hold t1) */
				778	bnMulMod_(curve->t0, curve->t0, curve->H, curve->p, curve); /* t0 = H^3, (hold t0) */
				779	bnMulMod_(curve->t2, curve->R, curve->R, curve->p, curve); /* t2 = R^2 */
				780	bnCopy(curve->t3, curve->t2);
				781	bnSubMod_(curve->t3, curve->t0, curve->p); /* t3 = t2 - t0, (-H^3)*/
				782	bnMulMod_(curve->t2, mpiTwo, curve->t1, curve->p, curve); /* t2 = 2 * t1 */
				783	bnCopy(R->x, curve->t3);
				784	bnSubMod_(R->x, curve->t2, curve->p); /* X3 = t3 - t2 */
				785
				786	/* Y3 = R(U1H^2 - X3) - S1H^3, where Y3: R->y /
				787	bnSubMod_(curve->t1, R->x, curve->p); /* t1 = t1 - X3, overwrites t1 now */
				788	bnMulMod_(curve->t2, curve->R, curve->t1, curve->p, curve); /* t2 = R * z_2 */
				789	bnMulMod_(curve->S1, curve->S1, curve->t0, curve->p, curve); /* S1 = S1 * t0, (t0 has H^3) */
				790	bnCopy(R->y, curve->t2);
				791	bnSubMod_(R->y, curve->S1, curve->p); /* Y3 = t2 - S1 */
				792
				793	/* Z3 = HZ1Z2, where Z1: P->z, Z2: Q->z, Z3: R->z */
				794	bnMulMod_(curve->t2, curve->H, P->z, curve->p, curve); /* t2 = H * Z1 */
				795	bnMulMod_(R->z, curve->t2, Q->z, curve->p, curve); /* Z3 = t2 * Z2 */
				796
				797	if (P == R)
				798	FREE_EC_POINT(&tP);
				799	if (Q == R)
				800	FREE_EC_POINT(&tQ);
				801	return ret;
				802	}
				803
				804	/*
				805	* Refer to the document: Faster addition and doubling on elliptic curves; Daniel J. Bernstein and Tanja Lange
				806	* section 4.
				807	*
				808	* This function is a variant of the 'addition'. The function returns the result in an own curve point
				809	* and does not overwrite its input parameters.
				810	*/
				811	static int ecAddPointEd(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q)
				812	{
				813	EcPoint tP, tQ;
				814	const EcPoint *ptP = 0;
				815	const EcPoint *ptQ = 0;
				816
				817	/* if P is (@,@), R = Q */
				818	if (!bnCmp(P->z, mpiZero)) {
				819	bnCopy(R->x, Q->x);
				820	bnCopy(R->y, Q->y);
				821	bnCopy(R->z, Q->z);
				822	return 0;
				823	}
				824
				825	/* if Q is (@,@), R = P */
				826	if (!bnCmp(Q->z, mpiZero)) {
				827	bnCopy(R->x, P->x);
				828	bnCopy(R->y, P->y);
				829	bnCopy(R->z, P->z);
				830	return 0;
				831	}
				832
				833	/* Check for overlapping arguments, copy if necessary and set pointers */
				834	if (P == R) {
				835	INIT_EC_POINT(&tP);
				836	ptP = &tP;
				837	bnCopy(tP.x, P->x);
				838	bnCopy(tP.y, P->y);
				839	bnCopy(tP.z, P->z);
				840	}
				841	else
				842	ptP = P;
				843
				844	if (Q == R) {
				845	INIT_EC_POINT(&tQ);
				846	ptQ = &tQ;
				847	bnCopy(tQ.x, Q->x);
				848	bnCopy(tQ.y, Q->y);
				849	bnCopy(tQ.z, Q->z);
				850	}
				851	else
				852	ptQ = Q;
				853
				854	/* Compute A, C, D first */
				855	bnMulMod_(R->z, ptP->z, ptQ->z, curve->p, curve); /* Rz -> A; (Z1 * z2); Rz becomes R3 */
				856	bnMulMod_(R->x, ptP->x, ptQ->x, curve->p, curve); /* Rx -> C; (X1 * X2); Rx becomes R1 */
				857	bnMulMod_(R->y, ptP->y, ptQ->y, curve->p, curve); /* Ry -> D; (Y1 * Y2); Ry becomes R2 */
				858
				859	/* Compute large parts of X3 equation, sub result in t0 */
				860	bnCopy(curve->t0, ptP->x);
				861	bnAddMod_(curve->t0, ptP->y, curve->p); /* t0 -> X1 + Y1 */
				862	bnCopy(curve->t1, ptQ->x);
				863	bnAddMod_(curve->t1, ptQ->y, curve->p); /* t1 -> X2 + Y2 */
				864	bnMulMod_(curve->t2, curve->t0, curve->t1, curve->p, curve); /* t2 = t0 * t1 */
				865	bnSubMod_(curve->t2, R->x, curve->p); /* t2 - C */
				866	bnSubMod_(curve->t2, R->y, curve->p); /* t2 - D */
				867	bnMulMod_(curve->t0, curve->t2, R->z, curve->p, curve); /* t0 -> R7; (t2 * A); sub result */
				868
				869	/* Compute E */
				870	bnMulMod_(curve->t2, R->x, R->y, curve->p, curve); /* t2 = C * D */
				871	bnMulMod_(curve->t1, curve->t2, curve->a, curve->p, curve); /* t1 -> E; t1 new R8 */
				872
				873	/* Compute part of Y3 equation, sub result in t2 */
				874	bnSubMod_(R->y, R->x, curve->p); /* Ry = D - C; sub result */
				875	bnMulMod_(curve->t2, R->y, R->z, curve->p, curve); /* t2 = Ry * A; sub result */
				876
				877	/* Compute B */
				878	bnSquareMod_(R->z, R->z, curve->p, curve); /* Rz -> B; (A^2) */
				879
				880	/* Compute F */
				881	bnCopy(curve->t3, R->z);
				882	bnSubMod_(curve->t3, curve->t1, curve->p); /* t3 -> F; (B - E) */
				883
				884	/* Compute G */
				885	bnAddMod_(R->z, curve->t1, curve->p); /* Rz -> G; (B + E) */
				886
				887	/* Compute, X, Y, Z results */
				888	bnMulMod_(R->x, curve->t3, curve->t0, curve->p, curve); /* Rx = F * t0 */
				889	bnMulMod_(R->y, curve->t2, R->z, curve->p, curve); /* Ry = t2 * G */
				890	bnMulMod_(R->z, curve->t3, R->z, curve->p, curve); /* Rz = F * G */
				891
				892	if (P == R)
				893	FREE_EC_POINT(&tP);
				894	if (Q == R)
				895	FREE_EC_POINT(&tQ);
				896
				897	return 0;
				898	}
				899
				900	/*
				901	* Curve25519 has no specific Add Point function, all inside curve25519-donna
				902	*/
				903	static int ecAddPoint25519(const EcCurve curve, EcPoint R, const EcPoint P, const EcPoint Q)
				904	{
				905	return -2;
				906	}
				907
				908	int ecMulPointScalar(const EcCurve curve, EcPoint R, const EcPoint P, const BigNum scalar)
				909	{
				910	return curve->mulScalar(curve, R, P, scalar);
				911	}
				912
				913	static int ecMulPointScalarNormal(const EcCurve curve, EcPoint R, const EcPoint P, const BigNum scalar)
				914	{
				915	int ret = 0;
				916	int i;
				917	int bits = bnBits(scalar);
				918	EcPoint n;
				919
				920	INIT_EC_POINT(&n);
				921	bnCopy(n.x, P->x);
				922	bnCopy(n.y, P->y);
				923	bnCopy(n.z, P->z);
				924
				925	bnSetQ(R->x, 0);
				926	bnSetQ(R->y, 0);
				927	bnSetQ(R->z, 0);
				928
				929	for (i = 0; i < bits; i++) {
				930	if (bnReadBit(scalar, i))
				931	ecAddPoint(curve, R, R, &n);
				932
				933	/* ecAddPoint(curve, &n, &n, &n); */
				934	ecDoublePoint(curve, &n, &n);
				935	}
				936	FREE_EC_POINT(&n);
				937	return ret;
				938	}
				939
				940	/*
				941	* This function uses BigNumber only as containers to transport the 32 byte data.
				942	* This makes it compliant to the other functions and thus higher-level API does not change.
				943	*
				944	* curve25519_donna function uses data in little endian format.
				945	*/
				946	static int ecMulPointScalar25519(const EcCurve curve, EcPoint R, const EcPoint P, const BigNum scalar)
				947	{
				948	uint8_t basepoint[32], secret[32], result[32];
				949
				950	bnExtractLittleBytes(P->x, basepoint, 0, 32); /* 25519 function requires the X coordinate only (compressed) */
				951	bnExtractLittleBytes(scalar, secret, 0, 32);
				952	curve25519_donna(result, secret, basepoint);
				953	bnInsertLittleBytes(R->x, result, 0, 32);
				954	return 0;
				955	}
				956
				957	#ifdef WEAKRANDOM
				958	#include <fcntl.h>
				959
				960	/*
				961	* A standard random number generator that uses the portable random() system function.
				962	*
				963	* This should be enhanced to use a better random generator
				964	*/
				965	static int _random(unsigned char *output, size_t len)
				966	{
				967	size_t num = 0;
				968
				969	int rnd = open("/dev/urandom", O_RDONLY);
				970	if (rnd >= 0) {
				971	num = read(rnd, output, len);
				972	close(rnd);
				973	}
				974	else
				975	return num;
				976
				977	return( 0 );
				978	}
				979	#else
				980	#include <cryptcommon/ZrtpRandom.h>
				981	static int _random(unsigned char *output, size_t len)
				982	{
				983	return zrtp_getRandomData(output, len);
				984	}
				985	#endif
				986
				987	int ecGenerateRandomNumber(const EcCurve curve, BigNum d)
				988	{
				989	return curve->randomOp(curve, d);
				990	}
				991
				992	static int ecGenerateRandomNumberNist(const EcCurve curve, BigNum d)
				993	{
				994	BigNum c, nMinusOne;
				995
				996	size_t randomBytes = ((bnBits(curve->n) + 64) + 7) / 8;
				997
				998	uint8_t *ran = malloc(randomBytes);
				999
				1000	bnBegin(&c);
				1001	bnBegin(&nMinusOne);
				1002
				1003	bnCopy(&nMinusOne, curve->n);
				1004	bnSubMod_(&nMinusOne, mpiOne, curve->p);
				1005
				1006	bnSetQ(d, 0);
				1007
				1008	while (!bnCmpQ(d, 0)) {
				1009	/* use _random function */
				1010	_random(ran, randomBytes);
				1011	bnInsertBigBytes(&c, ran, 0, randomBytes);
				1012	bnMod(d, &c, &nMinusOne);
				1013	bnAddMod_(d, mpiOne, curve->p);
				1014	}
				1015
				1016	bnEnd(&c);
				1017	bnEnd(&nMinusOne);
				1018	free(ran);
				1019
				1020	return 0;
				1021	}
				1022
				1023	static int ecGenerateRandomNumber3617(const EcCurve curve, BigNum d)
				1024	{
				1025	unsigned char random[52];
				1026	_random(random, 52);
				1027
				1028	/* prepare the secret random data: clear bottom 3 bits. Clearing top 2 bits
				1029	* makes is a 414 bit value
				1030	*/
				1031	random[51] &= ~0x7;
				1032	random[0] &= 0x3f;
				1033	/* convert the random data into big numbers */
				1034	bnInsertBigBytes(d, random, 0, 52);
				1035	return 0;
				1036	}
				1037
				1038	static int ecGenerateRandomNumber25519(const EcCurve curve, BigNum d)
				1039	{
				1040	unsigned char random[32];
				1041	_random(random, 32);
				1042
				1043	/* No specific preparation. The curve25519_donna functions prepares the data.
				1044	*
				1045	* convert the random data into big numbers. the bigNumber is a container only.
				1046	* we don not use the big number for any arithmetic
				1047	*/
				1048	bnInsertLittleBytes(d, random, 0, 32);
				1049	return 0;
				1050
				1051	}
				1052
				1053	int ecCheckPubKey(const EcCurve curve, const EcPoint pub)
				1054	{
				1055	return curve->checkPubOp(curve, pub);
				1056	}
				1057
				1058	static int ecCheckPubKeyNist(const NistECpCurve curve, const EcPoint pub)
				1059	{
				1060	/* Represent point at infinity by (0, 0), make sure it's not that */
				1061	if (bnCmpQ(pub->x, 0) == 0 && bnCmpQ(pub->y, 0) == 0) {
				1062	return 0;
				1063	}
				1064	/* Check that coordinates are within range */
				1065	if (bnCmpQ(pub->x, 0) < 0 \|\| bnCmp(pub->x, curve->p) >= 0) {
				1066	return 0;
				1067	}
				1068	if (bnCmpQ(pub->y, 0) < 0 \|\| bnCmp(pub->y, curve->p) >= 0) {
				1069	return 0;
				1070	}
				1071	/* Check that point satisfies EC equation y^2 = x^3 - 3x + b, mod P */
				1072	bnSquareMod_(curve->t1, pub->y, curve->p, curve);
				1073	bnSquareMod_(curve->t2, pub->x, curve->p, curve);
				1074	bnSubQMod_(curve->t2, 3, curve->p);
				1075	bnMulMod_(curve->t2, curve->t2, pub->x, curve->p, curve);
				1076	bnAddMod_(curve->t2, curve->b, curve->p);
				1077	if (bnCmp (curve->t1, curve->t2) != 0) {
				1078	return 0;
				1079	}
				1080	return 1;
				1081
				1082	}
				1083
				1084	static int ecCheckPubKey3617(const EcCurve curve, const EcPoint pub)
				1085	{
				1086	/* Represent point at infinity by (0, 0), make sure it's not that */
				1087	if (bnCmpQ(pub->x, 0) == 0 && bnCmpQ(pub->y, 0) == 0) {
				1088	return 0;
				1089	}
				1090	/* Check that coordinates are within range */
				1091	if (bnCmpQ(pub->x, 0) < 0 \|\| bnCmp(pub->x, curve->p) >= 0) {
				1092	return 0;
				1093	}
				1094	if (bnCmpQ(pub->y, 0) < 0 \|\| bnCmp(pub->y, curve->p) >= 0) {
				1095	return 0;
				1096	}
				1097	/* Check that point satisfies EC equation x^2+y^2 = 1+3617x^2y^2, mod P */
				1098	bnSquareMod_(curve->t1, pub->y, curve->p, curve);
				1099	bnSquareMod_(curve->t2, pub->x, curve->p, curve);
				1100	bnCopy(curve->t3, curve->t1); /* Load t3 */
				1101	bnAddMod_(curve->t3, curve->t2, curve->p); /* t3 = t1 + t2, (x^2+y^2)*/
				1102
				1103	bnMulMod_(curve->t0, curve->a, curve->t1, curve->p, curve); /* t0 = a * t1, (3617 * x^2) */
				1104	bnMulMod_(curve->t0, curve->t0, curve->t2, curve->p, curve); /* t0 = t0 * t1, (3617 * x^2 * y^2) */
				1105	bnAddMod_(curve->t0, mpiOne, curve->p); /* t0 = t0 + 1, (3617 * x^2 * y^2 + 1) */
				1106
				1107	if (bnCmp (curve->t0, curve->t3) != 0) {
				1108	return 0;
				1109	}
				1110	return 1;
				1111	}
				1112
				1113	/**
				1114	* According to http://cr.yp.to/ecdh.html#validate no validation is required if used for Diffie-Hellman
				1115	* thus always return success.
				1116	*/
				1117	static int ecCheckPubKey25519(const EcCurve curve, const EcPoint pub)
				1118	{
				1119	return 1;
				1120	}
				1121
				1122	static int mod3617(BigNum r, const BigNum a, const BigNum *modulo)
				1123	{
				1124	unsigned char buffer[52] = {0};
				1125	int cmp;
				1126	BigNum tmp;
				1127
				1128	bnBegin(&tmp);
				1129	cmp = bnCmp(modulo, a);
				1130	if (cmp == 0) { /* a is equal modulo, set resul to zero */
				1131	bnSetQ(r, 0);
				1132	return 0;
				1133	}
				1134	if (cmp > 0) { /* modulo is greater than a - copy a to r and return it */
				1135	bnCopy(r, a);
				1136	return 0;
				1137	}
				1138	bnExtractLittleBytes(a, buffer, 0, 52);
				1139	buffer[51] &= 0x3f;
				1140
				1141	bnCopy(&tmp, a);
				1142	bnRShift(&tmp, 414);
				1143	bnCopy(r, &tmp);
				1144	bnLShift(r, 4);
				1145	bnAdd(r, &tmp);
				1146
				1147	bnInsertLittleBytes(&tmp, buffer, 0, 52);
				1148
				1149	bnAdd(r, &tmp);
				1150	while (bnCmp(r, modulo) >= 0) {
				1151	bnSub(r, modulo);
				1152	}
				1153	bnEnd(&tmp);
				1154	return 0;
				1155	}
				1156
				1157	/*
				1158	* Curve25519 has no specific modulo function, all inside curve25519-donna
				1159	*/
				1160	static int mod25519(BigNum r, const BigNum a, const BigNum *modulo)
				1161	{
				1162	return -2;
				1163	}
				1164
				1165	/*
				1166	* Beware: Here are the dragons.
				1167	*
				1168	* The modulo implementations for the NIST curves. For more detailled information see
				1169	* FIPS 186-3, chapter D.2 and other papers about Generailzed Mersenne numbers.
				1170	*
				1171	* I use byte operations to perfom the additions with carry. On a little endian machine
				1172	* this saves conversion from/to big endian format if I would use integers for example. Also
				1173	* using byte addition into a short carry accumulator works on every word size and avoids
				1174	* complex testing and handling of wordsizes and big/little endian stuff.
				1175	*
				1176	*/
				1177
				1178	/* new modulo for 192bit curve */
				1179	static int newMod192(BigNum r, const BigNum a, const BigNum *modulo)
				1180	{
				1181	unsigned char buffer[200] = {0};
				1182	unsigned char *pt;
				1183	unsigned char *ps1;
				1184	unsigned char *ps2;
				1185	unsigned char *ps3;
				1186	short ac;
				1187	int cmp;
				1188
				1189	/* Binary big number representation in PolarSSL is always big endian
				1190	*
				1191	* the least significant 64bit large word starts at byte offset 40,
				1192	* the least significant 32bit word starts at byte offset 44
				1193	* the least significant byte starts at byte offset 47
				1194	*
				1195	* S3 S2 S1 T
				1196	* /-----^------\
				1197	* A5 A4 A3 A2 A1 A0
				1198	* 64bit 0 1 2 3 4 5
				1199	* \|--+--\|--+--\|--+--\|--+--\|--+--\|--+--\|
				1200	* 32bit 0 1 2 3 4 5 6 7 8 9 10 11
				1201	*
				1202	* perform T + S1 + S2 + S3 mod p
				1203
				1204	* where T = (A2 \|\| A1 \|\| A0)
				1205	* + S1 = ( 0 \|\| A3 \|\| A3)
				1206	* + S2 = (A4 \|\| A4 \|\| 0)
				1207	* + S3 = (A5 \|\| A5 \|\| A5)
				1208	*
				1209	* TODO: error check if input variable is > modulo^2 (do normal mpi_mod_mpi),
				1210	*/
				1211
				1212	/* TODO: check if a is > modulo^2 */
				1213	cmp = bnCmp(modulo, a);
				1214	if (cmp == 0) { /* a is equal modulo, set resul to zero */
				1215	bnSetQ(r, 0);
				1216	return 0;
				1217	}
				1218	if (cmp > 0) { /* modulo is greater than a - copy a to r and return it */
				1219	bnCopy(r, a);
				1220	return 0;
				1221	}
				1222	bnExtractBigBytes(a, buffer, 0, bnBytes(modulo)*2);
				1223
				1224	/* 6 'A' words, each word is 8 byte. Compute offset to least significant byte of word X */
				1225	#define A(X) buffer + (((6-X)*8)-1)
				1226
				1227	ac = 0;
				1228
				1229	pt = A(0); /* pt points to least significant byte of A0 */
				1230
				1231	/* Add up first 8 byte word, no need to add ps2 */
				1232	ps1 = A(3); /* ps1 points to least significant byte of S1 (A3) */
				1233	ps3 = A(5); /* ps3 points to least significant byte of S3 (A5)*/
				1234
				1235	/* Each block processes one 32 bit word, big endian, using byte operations */
				1236	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1237	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1238	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1239	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1240
				1241	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1242	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1243	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1244	ac += pt + ps1--; ac += ps3--; pt-- = ac; ac >>= 8;
				1245
				1246	/* Add up second 8 byte word, all three S words are used here */
				1247	ps1 = A(3); ps2 = A(4); ps3 = A(5);
				1248
				1249	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1250	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1251	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1252	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1253
				1254	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1255	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1256	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1257	ac += pt + ps1--; ac += ps2--; ac += ps3--; *pt-- = ac; ac >>= 8;
				1258
				1259	/* Add up third 8 byte word, no need to add S1 word */
				1260	ps2 = A(4); ps3 = A(5);
				1261
				1262	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1263	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1264	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1265	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1266
				1267	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1268	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1269	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1270	ac += pt + ps2--; ac += ps3--; pt-- = ac; ac >>= 8;
				1271
				1272	/* In this function we cannot have a negative carry and at most a carry of 2
				1273	* thus just subtract the modulo until we are less than modulo
				1274	*/
				1275	bnSetQ(r, 0);
				1276
				1277	(A(3)) = ac; / Store the carry */
				1278	bnInsertBigBytes(r, A(3), 0, 25); /* 25: 3 * 8 byte words + 1 carry byte */
				1279	while (bnCmp(r, modulo) >= 0) {
				1280	bnSub(r, modulo);
				1281	}
				1282	return 0;
				1283	}
				1284	#undef A
				1285
				1286	/* new modulo for 256bit curve */
				1287	static int newMod256(BigNum r, const BigNum a, const BigNum *modulo)
				1288	{
				1289	unsigned char buffer[200] = {0};
				1290	unsigned char *pt;
				1291	unsigned char *ps1;
				1292	unsigned char *ps2;
				1293	unsigned char *ps3;
				1294	unsigned char *ps4;
				1295
				1296	unsigned char *pd1;
				1297	unsigned char *pd2;
				1298	unsigned char *pd3;
				1299	unsigned char *pd4;
				1300	short ac;
				1301	int cmp;
				1302
				1303	/* Binary big number representation in PolarSSL is always big endian
				1304	*
				1305	* the least significant byte starts at byte offset 63
				1306	*
				1307	* T
				1308	* /-----------------^------------------\
				1309	* A15 A14 A13 A12 A11 A10 A9 A8 A7 A6 A5 A4 A3 A2 A1 A0
				1310	* \|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|
				1311	* offset 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64
				1312	*
				1313	* T = ( A7 \|\| A6 \|\| A5 \|\| A4 \|\| A3 \|\| A2 \|\| A1 \|\| A0 )
				1314	*
				1315	* S1 = ( A15 \|\| A14 \|\| A13 \|\| A12 \|\| A11 \|\| 00 \|\| 00 \|\| 00 )
				1316	* S2 = ( 00 \|\| A15 \|\| A14 \|\| A13 \|\| A12 \|\| 00 \|\| 00 \|\| 00 )
				1317	* S3 = ( A15 \|\| A14 \|\| 00 \|\| 00 \|\| 00 \|\| A10 \|\| A9 \|\| A8 )
				1318	* S4 = ( A8 \|\| A13 \|\| A15 \|\| A14 \|\| A13 \|\| A11 \|\| A10 \|\| A9 )
				1319	* D1 = ( A10 \|\| A8 \|\| 00 \|\| 00 \|\| 00 \|\| A13 \|\| A12 \|\| A11 )
				1320	* D2 = ( A11 \|\| A9 \|\| 00 \|\| 00 \|\| A15 \|\| A14 \|\| A13 \|\| A12 )
				1321	* D3 = ( A12 \|\| 00 \|\| A10 \|\| A9 \|\| A8 \|\| A15 \|\| A14 \|\| A13 )
				1322	* D4 = ( A13 \|\| 00 \|\| A11 \|\| A10 \|\| A9 \|\| 00 \|\| A15 \|\| A14 )
				1323	*
				1324	* perform B = T + 2S1 + 2S2 + S3 + S4 - D1 - D2 - D3 - D4 mod p
				1325	*
				1326	* TODO: error check if input variable is > modulo^2 (do normal mpi_mod_mpi),
				1327	*/
				1328
				1329	cmp = bnCmp(modulo, a);
				1330	if (cmp == 0) { /* a is equal modulo, set resul to zero */
				1331	bnSetQ(r, 0);
				1332	return 0;
				1333	}
				1334	if (cmp > 0) { /* modulo is greater than a - copya to r and return it */
				1335	bnCopy(r, a);
				1336	return 0;
				1337	}
				1338	bnExtractBigBytes(a, buffer, 0, bnBytes(modulo)*2);
				1339
				1340	/* 16 'A' words, each word is 4 byte. Compute offset to least significant byte of word X */
				1341	#define A(X) buffer + (((16-X)*4)-1)
				1342
				1343	ac = 0;
				1344
				1345	pt = A(0); /* pt points to least significant byte of A0 */
				1346
				1347	/* Set up to add up data that goes into A0 (right-most column abover); S1, S2 not used */
				1348	ps3 = A(8); /* ps3 points to least significant byte of S3 */
				1349	ps4 = A(9); /* ps4 points to least significant byte of S4 */
				1350	pd1 = A(11); /* pd1 points to least significant byte of D1 */
				1351	pd2 = A(12); /* pd2 points to least significant byte of D2 */
				1352	pd3 = A(13); /* pd3 points to least significant byte of D3 */
				1353	pd4 = A(14); /* pd4 points to least significant byte of D4 */
				1354
				1355	/* Each block processes one 32 bit word, big endian, using byte operations */
				1356	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1357	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1358	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1359	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1360
				1361	/* Set up to add up data that goes into A1; S1, S2 not used */
				1362	ps3 = A(9); ps4 = A(10); pd1 = A(12); pd2 = A(13); pd3 = A(14); pd4 = A(15);
				1363	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1364	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1365	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1366	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1367
				1368	/* Set up to add up data that goes into A2; S1, S2, D4 not used */
				1369	ps3 = A(10); ps4 = A(11); pd1 = A(13); pd2 = A(14); pd3 = A(15);
				1370	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1371	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1372	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1373	ac += pt + ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1374
				1375	/* Set up to add up data that goes into A3; S3, D1 not used */
				1376	ps1 = A(11); ps2 = A(12); ps4 = A(13); pd2 = A(15); pd3 = A(8); pd4 = A(9);
				1377	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1378	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1379	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1380	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1381
				1382	/* Set up to add up data that goes into A4; S3, D1, D2 not used */
				1383	ps1 = A(12); ps2 = A(13); ps4 = A(14); pd3 = A(9); pd4 = A(10);
				1384	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1385	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1386	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1387	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1388
				1389	/* Set up to add up data that goes into A5; S3, D1, D2 not used */
				1390	ps1 = A(13); ps2 = A(14); ps4 = A(15); pd3 = A(10); pd4 = A(11);
				1391	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1392	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1393	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1394	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps4--; ac -= pd3--; ac -= pd4--; *pt-- = ac; ac >>= 8;
				1395
				1396	/* Set up to add up data that goes into A6; D3, D4 not used */
				1397	ps1 = A(14); ps2 = A(15); ps3 = A(14); ps4 = A(13); pd1 = A(8); pd2 = A(9);
				1398	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1399	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1400	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1401	ac += pt + ps1;ac += ps1--; ac += ps2;ac += ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1402
				1403	/* Set up to add up data that goes into A7; S2 not used */
				1404	ps1 = A(15); ps3 = A(15); ps4 = A(8); pd1 = A(10); pd2 = A(11); pd3 = A(12); pd4 = A(13);
				1405	ac += pt + ps1;ac += ps1--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1406	ac += pt + ps1;ac += ps1--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1407	ac += pt + ps1;ac += ps1--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1408	ac += pt + ps1;ac += ps1--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; ac -= pd3--; ac -= pd4--; pt-- = ac; ac >>= 8;
				1409
				1410	bnSetQ(r, 0);
				1411	if (ac > 0) {
				1412	(A(8)) = ac; / Store the carry */
				1413	bnInsertBigBytes(r, A(8), 0, 33); /* 33: 8 * 4 byte words + 1 carry byte */
				1414	}
				1415	/* Negative carry requires that we add the modulo (carry * -1) times to make
				1416	* the result positive. Then get the result mod(256).
				1417	*/
				1418	else if (ac < 0) {
				1419	int msb, maxMsb;
				1420
				1421	*(A(8)) = 0;
				1422	bnInsertBigBytes(r, A(8), 0, 33); /* 33: 8 * 4 byte words + 1 carry byte */
				1423	ac *= -1;
				1424	while (ac--) {
				1425	bnAdd(r, modulo);
				1426	}
				1427	maxMsb = bnBits(modulo);
				1428	msb = bnBits(r) - maxMsb;
				1429	/* clear all bits above bit length of modulo. This length is 256 here, thus
				1430	* we effectiviely doing a mod(256)
				1431	*/
				1432	if (msb > 0) {
				1433	BigNum tmp;
				1434	bnBegin(&tmp);
				1435	bnSetQ (&tmp, 1);
				1436	bnLShift (&tmp, maxMsb);
				1437	bnMod(r, r, &tmp);
				1438	bnEnd(&tmp);
				1439	}
				1440	}
				1441	else {
				1442	*(A(8)) = 0;
				1443	bnInsertBigBytes(r, A(8), 0, 33); /* 33: 8 * 4 byte words + 1 carry byte */
				1444	}
				1445	while (bnCmp(r, modulo) >= 0) {
				1446	bnSub(r, modulo);
				1447	}
				1448	return 0;
				1449	}
				1450	#undef A
				1451
				1452
				1453	/* new modulo for 384bit curve */
				1454	static int newMod384(BigNum r, const BigNum a, const BigNum *modulo)
				1455	{
				1456	unsigned char buffer[200] = {0};
				1457	unsigned char *pt;
				1458	unsigned char *ps1;
				1459	unsigned char *ps2;
				1460	unsigned char *ps3;
				1461	unsigned char *ps4;
				1462	unsigned char *ps5;
				1463	unsigned char *ps6;
				1464
				1465	unsigned char *pd1;
				1466	unsigned char *pd2;
				1467	unsigned char *pd3;
				1468	short ac;
				1469	int cmp;
				1470
				1471	/*
				1472	*
				1473	* the least significant byte starts at byte offset 97
				1474	*
				1475	* T
				1476	* /---------------------------^----------------------------\
				1477	* A23 ......... A15 A14 A13 A12 A11 A10 A9 A8 A7 A6 A5 A4 A3 A2 A1 A0
				1478	* \|----+ ...... \|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|----+----\|
				1479	*
				1480	* T = (A11 \|\| A10 \|\| A9 \|\| A8 \|\| A7 \|\| A6 \|\| A5 \|\| A4 \|\| A3 \|\| A2 \|\| A1 \|\| A0)
				1481
				1482	* S1 = ( 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| A23 \|\| A22 \|\| A21 \|\| 00 \|\| 00 \|\| 00 \|\| 00)
				1483	* S2 = (A23 \|\| A22 \|\| A21 \|\| A20 \|\| A19 \|\| A18 \|\| A17 \|\| A16 \|\| A15 \|\| A14 \|\| A13 \|\| A12)
				1484	* S3 = (A20 \|\| A19 \|\| A18 \|\| A17 \|\| A16 \|\| A15 \|\| A14 \|\| A13 \|\| A12 \|\| A23 \|\| A22 \|\| A21)
				1485	* S4 = (A19 \|\| A18 \|\| A17 \|\| A16 \|\| A15 \|\| A14 \|\| A13 \|\| A12 \|\| A20 \|\| 00 \|\| A23 \|\| 00)
				1486	* S5 = ( 00 \|\| 00 \|\| 00 \|\| 00 \|\| A23 \|\| A22 \|\| A21 \|\| A20 \|\| 00 \|\| 00 \|\| 00 \|\| 00)
				1487	* S6 = ( 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| A23 \|\| A22 \|\| A21 \|\| 00 \|\| 00 \|\| A20)
				1488	* D1 = (A22 \|\| A21 \|\| A20 \|\| A19 \|\| A18 \|\| A17 \|\| A16 \|\| A15 \|\| A14 \|\| A13 \|\| A12 \|\| A23)
				1489	* D2 = ( 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| A23 \|\| A22 \|\| A21 \|\| A20 \|\| 00)
				1490	* D3 = ( 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| 00 \|\| A23 \|\| A23 \|\| 00 \|\| 00 \|\| 00)
				1491	*
				1492	* perform B = T + 2S1 + S2 + S3 + S4 + S5 + S6 – D1 – D2 – D3 mod p
				1493	*
				1494	* TODO: error check if input variable is > modulo^2 (do normal mpi_mod_mpi),
				1495	* optimize if input is already < modulo (just copy over in this case).
				1496	*/
				1497
				1498	cmp = bnCmp(modulo, a);
				1499	if (cmp == 0) { /* a is equal modulo, set resul to zero */
				1500	bnSetQ(r, 0);
				1501	return 0;
				1502	}
				1503	if (cmp > 0) { /* modulo is greater than a - copy a to r and return it */
				1504	bnCopy(r, a);
				1505	return 0;
				1506	}
				1507
				1508	bnExtractBigBytes(a, buffer, 0, bnBytes(modulo)*2);
				1509
				1510	/* 24 'A' words, each word is 4 byte. Compute offset to least significant byte of word X */
				1511	#define A(X) buffer + (((24-X)*4)-1)
				1512
				1513	ac = 0;
				1514
				1515	pt = A(0); /* pt points to least significant byte of A0 */
				1516
				1517	/* Set up to add data that goes into A0; S1, S4, S5, D2, D3 not used */
				1518	ps2 = A(12); ps3 = A(21); ps6 = A(20); pd1 = A(23);
				1519
				1520	/* Each block processes one 32 bit word, big endian, using byte operations */
				1521	ac += pt + ps2--; ac += ps3--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1522	ac += pt + ps2--; ac += ps3--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1523	ac += pt + ps2--; ac += ps3--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1524	ac += pt + ps2--; ac += ps3--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1525
				1526	/* Set up to add data that goes into A1; S1, S5, S6, D3 not used */
				1527	ps2 = A(13); ps3 = A(22); ps4 = A(23); pd1= A(12); pd2 = A(20);
				1528	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; *pt-- = ac; ac >>= 8;
				1529	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; *pt-- = ac; ac >>= 8;
				1530	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; *pt-- = ac; ac >>= 8;
				1531	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; ac -= pd2--; *pt-- = ac; ac >>= 8;
				1532
				1533	/* Set up to add data that goes into A2; S1, S4, S5, S6, D3 not used */
				1534	ps2 = A(14); ps3 = A(23); pd1 = A(13); pd2 = A(21);
				1535	ac += pt + ps2--; ac += ps3--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1536	ac += pt + ps2--; ac += ps3--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1537	ac += pt + ps2--; ac += ps3--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1538	ac += pt + ps2--; ac += ps3--; ac -= pd1--; ac -= pd2--; pt-- = ac; ac >>= 8;
				1539
				1540	/* Set up to add data that goes into A3; S1, S5, S6 not used */
				1541	ps2 = A(15); ps3 = A(12); ps4 = A(20); ps6 = A(21); pd1 = A(14); pd2 = A(22); pd3 = A(23);
				1542	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1543	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1544	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1545	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; *pt-- = ac; ac >>= 8;
				1546
				1547	/* Set up to add data that goes into A4 */
				1548	ps1 = A(21); ps2 = A(16); ps3 = A(13); ps4 = A(12); ps5 = A(20); ps6 = A(22); pd1 = A(15); pd2 = A(23), pd3 = A(23);
				1549	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; pt-- = ac; ac >>= 8;
				1550	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; pt-- = ac; ac >>= 8;
				1551	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; pt-- = ac; ac >>= 8;
				1552	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; ac -= pd2--; ac -= pd3--; pt-- = ac; ac >>= 8;
				1553
				1554	/* Set up to add data that goes into A5; D2, D3 not used */
				1555	ps1 = A(22); ps2 = A(17); ps3 = A(14); ps4 = A(13); ps5 = A(21); ps6 = A(23); pd1 = A(16);
				1556	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1557	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1558	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1559	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac += ps6--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1560
				1561	/* Set up to add data that goes into A6; S6, D2, D3 not used */
				1562	ps1 = A(23); ps2 = A(18); ps3 = A(15); ps4 = A(14); ps5 = A(22); pd1 = A(17);
				1563	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1564	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1565	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1566	ac += pt + ps1;ac += ps1--; ac += ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1567
				1568	/* Set up to add data that goes into A7; S1, S6, D2, D3 not used */
				1569	ps2 = A(19); ps3 = A(16); ps4 = A(15); ps5 = A(23); pd1 = A(18);
				1570	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1571	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1572	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1573	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac += ps5--; ac -= pd1--; *pt-- = ac; ac >>= 8;
				1574
				1575	/* Set up to add data that goes into A8; S1, S5, S6, D2, D3 not used */
				1576	ps2 = A(20); ps3 = A(17); ps4 = A(16); pd1 = A(19);
				1577	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1578	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1579	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1580	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1581
				1582	/* Set up to add data that goes into A9; S1, S5, S6, D2, D3 not used */
				1583	ps2 = A(21); ps3 = A(18); ps4 = A(17); pd1 = A(20);
				1584	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1585	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1586	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1587	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1588
				1589	/* Set up to add data that goes into A10; S1, S5, S6, D2, D3 not used */
				1590	ps2 = A(22); ps3 = A(19); ps4 = A(18); pd1 = A(21);
				1591	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1592	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1593	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1594	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1595
				1596	/* Set up to add data that goes into A10; S1, S5, S6, D2, D3 not used */
				1597	ps2 = A(23); ps3 = A(20); ps4 = A(19); pd1 = A(22);
				1598	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1599	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1600	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1601	ac += pt + ps2--; ac += ps3--; ac += ps4--; ac -= pd1--; pt-- = ac; ac >>= 8;
				1602
				1603	bnSetQ(r, 0);
				1604	if (ac > 0) {
				1605	(A(12)) = ac; / Store the carry */
				1606	bnInsertBigBytes(r, A(12), 0, 49); /* 49: 12 * 4 byte words + 1 carry byte */
				1607	}
				1608	/* Negative carry requires that we add the modulo (carry * -1) times to make
				1609	* the result positive. Then get the result mod(256).
				1610	*/
				1611	else if (ac < 0) {
				1612	int msb, maxMsb;
				1613
				1614	*(A(12)) = 0;
				1615	bnInsertBigBytes(r, A(12), 0, 49); /* 49: 12 * 4 byte words + 1 carry byte */
				1616	ac *= -1;
				1617	while (ac--) {
				1618	bnAdd(r, modulo);
				1619	}
				1620	maxMsb = bnBits(modulo);
				1621	msb = bnBits(r) - maxMsb;
				1622	/* clear all bits above bit length of modulo. This length is 384 here, thus
				1623	* we effectiviely doing a mod(384)
				1624	*/
				1625	if (msb > 0) {
				1626	BigNum tmp;
				1627	bnBegin(&tmp);
				1628	bnSetQ (&tmp, 1);
				1629	bnLShift (&tmp, maxMsb);
				1630	bnMod(r, r, &tmp);
				1631	bnEnd(&tmp);
				1632	}
				1633	}
				1634	else {
				1635	*(A(12)) = 0;
				1636	bnInsertBigBytes(r, A(12), 0, 49); /* 49: 12 * 4 byte words + 1 carry byte */
				1637	}
				1638	while (bnCmp(r, modulo) >= 0) {
				1639	bnSub(r, modulo);
				1640	}
				1641	return 0;
				1642	}
				1643	#undef A
				1644
				1645
				1646	/* new modulo for 521bit curve, much easier because the prime for 521 is a real Mersenne prime */
				1647	static int newMod521(BigNum r, const BigNum a, const BigNum *modulo)
				1648	{
				1649	unsigned char buf1[200] = {0};
				1650	unsigned char buf2[200] = {0};
				1651	unsigned char *p1;
				1652	unsigned char *p2;
				1653	size_t modSize;
				1654	short ac = 0;
				1655	unsigned int i;
				1656	int cmp;
				1657
				1658	/* TODO: check if a is > modulo^2 */
				1659	#if 0
				1660	if (a->s < 0) /* is it a negative value? */
				1661	return bnMod(r, a, modulo);
				1662	#endif
				1663	cmp = bnCmp(modulo, a);
				1664	if (cmp == 0) { /* a is equal modulo, set resul to zero */
				1665	bnSetQ(r, 0);
				1666	return 0;
				1667	}
				1668	bnCopy(r, a);
				1669	if (cmp > 0) { /* modulo is greater than a - return the prepared r */
				1670	return 0;
				1671	}
				1672	modSize = bnBytes(modulo);
				1673
				1674	bnExtractBigBytes(a, buf1, 0, modSize2); / a must be less modulo^2 */
				1675	buf1[modSize] &= 1; /* clear all bits except least significat */
				1676
				1677	bnRShift(r, 521);
				1678	bnExtractBigBytes(r, buf2, 0, modSize*2);
				1679	buf2[modSize] &= 1;
				1680
				1681	p1 = &buf2[131]; /* p1 is pointer to A0 */
				1682	p2 = &buf1[131]; /* p2 is pointer to A1 */
				1683
				1684	for (i = 0; i < modSize; i++) {
				1685	ac += p1 + p2--; *p1-- = ac; ac >>= 8;
				1686	}
				1687	bnSetQ(r, 0);
				1688	bnInsertBigBytes(r, p1+1, 0, modSize);
				1689
				1690	while (bnCmp(r, modulo) >= 0) {
				1691	bnSub(r, modulo);
				1692	}
				1693	return 0;
				1694	}
				1695