Blame - jni/libzrtp/sources/bnlib/sieve.c - jami-client-android

blob: 7362ff52108f1e987c83c1d0ede7b87a2fcd45f7 [file] [log] [blame]

Alexandre Lision	7fd5d3d	2013-12-04 13:06:40 -0500	[diff] [blame]	1	/*
				2	* sieve.c - Trial division for prime finding.
				3	*
				4	* Copyright (c) 1995 Colin Plumb. All rights reserved.
				5	* For licensing and other legal details, see the file legal.c.
				6	*
				7	* Finding primes:
				8	* - Sieve 1 to find the small primes for
				9	* - Sieve 2 to find the candidate large primes, then
				10	* - Pseudo-primality test.
				11	*
				12	* An important question is how much trial division by small primes
				13	* should we do? The answer is a LOT. Even a heavily optimized
				14	* Fermat test to the base 2 (the simplest pseudoprimality test)
				15	* is much more expensive than a division.
				16	*
				17	* For an prime of n k-bit words, a Fermat test to the base 2 requires n*k
				18	* modular squarings, each of which involves n*(n+1)/2 signle-word multiplies
				19	* in the squaring and n*(n+1) multiplies in the modular reduction, plus
				20	* some overhead to get into and out of Montgomery form. This is a total
				21	* of 3/2 * k * n^2 * (n+1). Equivalently, if n*k = b bits, it's
				22	* 3/2 * (b/k+1) * b^2 / k.
				23	*
				24	* A modulo operation requires n single-word divides. Let's assume that
				25	* a divide is 4 times the cost of a multiply. That's 4*n multiplies.
				26	* However, you only have to do the division once for your entire
				27	* search. It can be amortized over 10-15 primes. So it's
				28	* really more like n/3 multiplies. This is b/3k.
				29	*
				30	* Now, let's suppose you have a candidate prime t. Your options
				31	* are to a) do trial division by a prime p, then do a Fermat test,
				32	* or to do the Fermat test directly. Doing the trial division
				33	* costs b/3k multiplies, but a certain fraction of the time (1/p), it
				34	* saves you 3/2 b^3 / k^2 multiplies. Thus, it's worth it doing the
				35	* division as long as b/3k < 3/2 * (b/k+1) * b^2 / k / p.
				36	* I.e. p < 9/2 * (b/k + 1) * b = 9/2 * (b^2/k + b).
				37	* E.g. for k=16 and b=256, p < 9/2 * 17 * 256 = 19584.
				38	* Solving for k=16 and k=32 at a few interesting value of b:
				39	*
				40	* k=16, b=256: p < 19584 k=32, b=256: p < 10368
				41	* k=16, b=384: p < 43200 k=32, b=384; p < 22464
				42	* k=16, b=512: p < 76032 k=32, b=512: p < 39168
				43	* k=16, b=640: p < 118080 k=32, b=640: p < 60480
				44	*
				45	* H'm... before using the highly-optimized Fermat test, I got much larger
				46	* numbers (64K to 256K), and designed the sieve for that. Maybe it needs
				47	* to be reduced. It is true that the desirable sieve size increases
				48	* rapidly with increasing prime size, and it's the larger primes that are
				49	* worrisome in any case. I'll leave it as is (64K) for now while I
				50	* think about it.
				51	*
				52	* A bit of tweaking the division (we can compute a reciprocal and do
				53	* multiplies instead, turning 4n into 4 + 2n) would increase all the
				54	* numbers by a factor of 2 or so.
				55	*
				56	*
				57	* Bit k in a sieve corresponds to the number a + k*b.
				58	* For a given a and b, the sieve's job is to find the values of
				59	* k for which a + k*b == 0 (mod p). Multiplying by b^-1 and
				60	* isolating k, you get k == -a*b^-1 (mod p). So the values of
				61	* k which should be worked on are k = (-ab^-1 mod p) + i p,
				62	* for i = 0, 1, 2,...
				63	*
				64	* Note how this is still easy to use with very large b, if you need it.
				65	* It just requires computing (b mod p) and then finding the multiplicative
				66	* inverse of that.
				67	*
				68	*
				69	* How large a space to search to ensure that one will hit a prime?
				70	* The average density is known, but the primes behave oddly, and sometimes
				71	* there are large gaps. It is conjectured by shanks that the first gap
				72	* of size "delta" will occur at approximately exp(sqrt(delta)), so a delta
				73	* of 65536 is conjectured to be to contain a prime up to e^256.
				74	* Remembering the handy 2<->e conversion ratios:
				75	* ln(2) = 0.693147 log2(e) = 1.442695
				76	* This covers up to 369 bits. Damn, not enough! Still, it'll have to do.
				77	*
				78	* Cramer's conjecture (he proved it for "most" cases) is that in the limit,
				79	* as p goes to infinity, the largest gap after a prime p tends to (ln(p))^2.
				80	* So, for a 1024-bit p, the interval to the next prime is expected to be
				81	* about 709.78^2, or 503791. We'd need to enlarge our space by a factor of
				82	* 8 to be sure. It isn't worth the hassle.
				83	*
				84	* Note that a span of this size is expected to contain 92 primes even
				85	* in the vicinity of 2^1024 (it's 369 at 256 bits and 492 at 192 bits).
				86	* So the probability of failure is pretty low.
				87	*/
				88	#ifndef HAVE_CONFIG_H
				89	#define HAVE_CONFIG_H 0
				90	#endif
				91	#if HAVE_CONFIG_H
				92	#include <bnconfig.h>
				93	#endif
				94
				95	/*
				96	* Some compilers complain about #if FOO if FOO isn't defined,
				97	* so do the ANSI-mandated thing explicitly...
				98	*/
				99	#ifndef NO_ASSERT_H
				100	#define NO_ASSERT_H 0
				101	#endif
				102	#ifndef NO_LIMITS_H
				103	#define NO_LIMITS_H 0
				104	#endif
				105	#ifndef NO_STRING_H
				106	#define NO_STRING_H 0
				107	#endif
				108	#ifndef HAVE_STRINGS_H
				109	#define HAVE_STRINGS_H 0
				110	#endif
				111	#ifndef NEED_MEMORY_H
				112	#define NEED_MEMORY_H 0
				113	#endif
				114
				115	#if !NO_ASSERT_H
				116	#include <assert.h>
				117	#else
				118	#define assert(x) (void)0
				119	#endif
				120
				121	#if !NO_LIMITS_H
				122	#include <limits.h> /* For UINT_MAX */
				123	#endif /* If not avail, default value of 0 is safe */
				124
				125	#if !NO_STRING_H
				126	#include <string.h> /* for memset() */
				127	#elif HAVE_STRINGS_H
				128	#include <strings.h>
				129	#endif
				130	#if NEED_MEMORY_H
				131	#include <memory.h>
				132	#endif
				133
				134	#include "bn.h"
				135	#include "sieve.h"
				136	#ifdef MSDOS
				137	#include "lbnmem.h"
				138	#endif
				139
				140	#include "kludge.h"
				141
				142	/*
				143	* Each array stores potential primes as 1 bits in little-endian bytes.
				144	* Bit k in an array represents a + k*b, for some parameters a and b
				145	* of the sieve. Currently, b is hardcoded to 2.
				146	*
				147	* Various factors of 16 arise because these are all byte sizes, and
				148	* skipping even numbers, 16 numbers fit into a byte's worth of bitmap.
				149	*/
				150
				151	/*
				152	* The first number in the small prime sieve. This could be raised to
				153	* 3 if you want to squeeze bytes out aggressively for a smaller SMALL
				154	* table, and doing so would let one more prime into the end of the array,
				155	* but there is no sense making it larger if you're generating small
				156	* primes up to the limit if 2^16, since it doesn't save any memory and
				157	* would require extra code to ignore 65537 in the last byte, which is
				158	* over the 16-bit limit.
				159	*/
				160	#define SMALLSTART 1
				161
				162	/*
				163	* Size of sieve used to find large primes, in bytes. For compatibility
				164	* with 16-bit-int systems, the largest prime that can appear in it,
				165	* SMALL * 16 + SMALLSTART - 2, must be < 65536. Since 65537 is a prime,
				166	* this is the absolute maximum table size.
				167	*/
				168	#define SMALL (65536/16)
				169
				170	/*
				171	* Compute the multiplicative inverse of x, modulo mod, using the extended
				172	* Euclidean algorithm. The classical EEA returns two results, traditionally
				173	* named s and t, but only one (t) is needed or computed here.
				174	* It is unrolled twice to avoid some variable-swapping, and because negating
				175	* t every other round makes all the number positive and less than the
				176	* modulus, which makes fixed-length arithmetic easier.
				177	*
				178	* If gcd(x, mod) != 1, then this will return 0.
				179	*/
				180	static unsigned
				181	sieveModInvert(unsigned x, unsigned mod)
				182	{
				183	unsigned y;
				184	unsigned t0, t1;
				185	unsigned q;
				186
				187	if (x <= 1)
				188	return x; /* 0 and 1 are self-inverse */
				189	/*
				190	* The first round is simplified based on the
				191	* initial conditions t0 = 1 and t1 = 0.
				192	*/
				193	t1 = mod / x;
				194	y = mod % x;
				195	if (y <= 1)
				196	return y ? mod - t1 : 0;
				197	t0 = 1;
				198
				199	do {
				200	q = x / y;
				201	x = x % y;
				202	t0 += q * t1;
				203	if (x <= 1)
				204	return x ? t0 : 0;
				205	q = y / x;
				206	y = y % x;
				207	t1 += q * t0;
				208	} while (y > 1);
				209	return y ? mod - t1 : 0;
				210	}
				211
				212
				213	/*
				214	* Perform a single sieving operation on an array. Clear bits "start",
				215	* "start+step", "start+2*step", etc. from the array, up to the size
				216	* limit (in BYTES) "size". All of the arguments must fit into 16 bits
				217	* for portability.
				218	*
				219	* This is the core of the sieving operation. In addition to being
				220	* called from the sieving functions, it is useful to call directly if,
				221	* say, you want to exclude primes congruent to 1 mod 3, or whatever.
				222	* (Although in that case, it would be better to change the sieving to
				223	* use a step size of 6 and start == 5 (mod 6).)
				224	*
				225	* Originally, this was inlined in the code below (with various checks
				226	* turned off where they could be inferred from the environment), but it
				227	* turns out that all the sieving is so fast that it makes a negligible
				228	* speed difference and smaller, cleaner code was preferred.
				229	*
				230	* Rather than increment a bit index through the array and clear
				231	* the corresponding bit, this code takes advantage of the fact that
				232	* every eighth increment must use the same bit position in a byte.
				233	* I.e. start + kstep == start + (k+8)step (mod 8). Thus, a bitmask
				234	* can be computed only eight times and used for all multiples. Thus, the
				235	* outer loop is over (k mod 8) while the inner loop is over (k div 8).
				236	*
				237	* The only further trickiness is that this code is designed to accept
				238	* start, step, and size up to 65535 on 16-bit machines. On such a
				239	* machine, the computation "start+step" can overflow, so we need to
				240	* insert an extra check for that situation.
				241	*/
				242	void
				243	sieveSingle(unsigned char *array, unsigned size, unsigned start, unsigned step)
				244	{
				245	unsigned bit;
				246	unsigned char mask;
				247	unsigned i;
				248
				249	#if UINT_MAX < 0x1ffff
				250	/* Unsigned is small; add checks for wrap */
				251	for (bit = 0; bit < 8; bit++) {
				252	i = start/8;
				253	if (i >= size)
				254	break;
				255	mask = ~(1 << (start & 7));
				256	do {
				257	array[i] &= mask;
				258	i += step;
				259	} while (i >= step && i < size);
				260	start += step;
				261	if (start < step) /* Overflow test */
				262	break;
				263	}
				264	#else
				265	/* Unsigned has the range - no overflow possible */
				266	for (bit = 0; bit < 8; bit++) {
				267	i = start/8;
				268	if (i >= size)
				269	break;
				270	mask = ~(1 << (start & 7));
				271	do {
				272	array[i] &= mask;
				273	i += step;
				274	} while (i < size);
				275	start += step;
				276	}
				277	#endif
				278	}
				279
				280	/*
				281	* Returns the index of the next bit set in the given array. The search
				282	* begins after the specified bit, so if you care about bit 0, you need
				283	* to check it explicitly yourself. This returns 0 if no bits are found.
				284	*
				285	* Note that the size is in bytes, and that it takes and returns BIT
				286	* positions. If the array represents odd numbers only, as usual, the
				287	* returned values must be doubled to turn them into offsets from the
				288	* initial number.
				289	*/
				290	unsigned
				291	sieveSearch(unsigned char const *array, unsigned size, unsigned start)
				292	{
				293	unsigned i; /* Loop index */
				294	unsigned char t; /* Temp */
				295
				296	if (!++start)
				297	return 0;
				298	i = start/8;
				299	if (i >= size)
				300	return 0; /* Done! */
				301
				302	/* Deal with odd-bit beginnings => search the first byte */
				303	if (start & 7) {
				304	t = array[i++] >> (start & 7);
				305	if (t) {
				306	if (!(t & 15)) {
				307	t >>= 4;
				308	start += 4;
				309	}
				310	if (!(t & 3)) {
				311	t >>= 2;
				312	start += 2;
				313	}
				314	if (!(t & 1))
				315	start += 1;
				316	return start;
				317	} else if (i == size) {
				318	return 0; /* Done */
				319	}
				320	}
				321
				322	/* Now the main search loop */
				323
				324	do {
				325	if ((t = array[i]) != 0) {
				326	start = 8*i;
				327	if (!(t & 15)) {
				328	t >>= 4;
				329	start += 4;
				330	}
				331	if (!(t & 3)) {
				332	t >>= 2;
				333	start += 2;
				334	}
				335	if (!(t & 1))
				336	start += 1;
				337	return start;
				338	}
				339	} while (++i < size);
				340
				341	/* Failed */
				342	return 0;
				343	}
				344
				345	/*
				346	* Build a table of small primes for sieving larger primes with. This
				347	* could be cached between calls to sieveBuild, but it's so fast that
				348	* it's really not worth it. This code takes a few milliseconds to run.
				349	*/
				350	static void
				351	sieveSmall(unsigned char *array, unsigned size)
				352	{
				353	unsigned i; /* Loop index */
				354	unsigned p; /* The current prime */
				355
				356	/* Initialize to all 1s */
				357	memset(array, 0xFF, size);
				358
				359	#if SMALLSTART == 1
				360	/* Mark 1 as NOT prime */
				361	array[0] = 0xfe;
				362	i = 1; /* Index of first prime */
				363	#else
				364	i = 0; /* Index of first prime */
				365	#endif
				366
				367	/*
				368	* Okay, now sieve via the primes up to 256, obtained from the
				369	* table itself. We know the maximum possible table size is
				370	* 65536, and sieveSingle() can cope with out-of-range inputs
				371	* safely, and the time required is trivial, so it isn't adaptive
				372	* based on the array size.
				373	*
				374	* Convert each bit position into a prime, compute a starting
				375	* sieve position (the square of the prime), and remove multiples
				376	* from the table, using sieveSingle(). I used to have that
				377	* code in line here, but the speed difference was so small it
				378	* wasn't worth it. If a compiler really wants to waste memory,
				379	* it can inline it.
				380	*/
				381	do {
				382	p = 2 * i + SMALLSTART;
				383	if (p > 256)
				384	break;
				385	/* Start at square of p */
				386	sieveSingle(array, size, (p*p-SMALLSTART)/2, p);
				387
				388	/* And find the next prime */
				389	i = sieveSearch(array, 16, i);
				390	} while (i);
				391	}
				392
				393
				394	/*
				395	* This is the primary sieving function. It fills in the array with
				396	* a sieve (multiples of small primes removed) beginning at bn and
				397	* proceeding in steps of "step".
				398	*
				399	* It generates a small array to get the primes to sieve by. It's
				400	* generated on the fly - sieveSmall is fast enough to make that
				401	* perfectly acceptable.
				402	*
				403	* The caller should take the array, walk it with sieveSearch, and
				404	* apply a stronger primality test to the numbers that are returned.
				405	*
				406	* If the "dbl" flag non-zero (at least 1), this also sieves 2*bn+1, in
				407	* steps of 2step. If dbl is 2 or more, this also sieve 4bn+3,
				408	* in steps of 4*step, and so on for arbitrarily high values of "dbl".
				409	* This is convenient for finding primes such that (p-1)/2 is also prime.
				410	* This is particularly efficient because sieveSingle is controlled by the
				411	* parameter s = -n/step (mod p). (In fact, we find t = -1/step (mod p)
				412	* and multiply that by n (mod p).) If you have -n/step (mod p), then
				413	* finding -(2n+1)/(2step) (mod p), which is -n/step - 1/(2*step) (mod p),
				414	* reduces to finding -1/(2*step) (mod p), or t/2 (mod p), and adding that
				415	* to s = -n/step (mod p). Dividing by 2 modulo an odd p is easy -
				416	* if even, divide directly. Otherwise, add p (which produces an even
				417	* sum), and divide by 2. Very simple. And this produces s' and t'
				418	* for step' = 2step. It can be repeated for step'' = 4step and so on.
				419	*
				420	* Note that some of the math is complicated by the fact that 2*p might
				421	* not fit into an unsigned, so rather than if (odd(x)) x = (x+p)/2,
				422	* we do if (odd(x)) x = x/2 + p/2 + 1;
				423	*
				424	* TODO: Do the double-sieving by sieving the larger number, and then
				425	* just subtract one from the remainder to get the other parameter.
				426	* (bn-1)/2 is divisible by an odd p iff bn-1 is divisible, which is
				427	* true iff bn == 1 mod p. This requires using a step size of 4.
				428	*/
				429	int
				430	sieveBuild(unsigned char array, unsigned size, struct BigNum const bn,
				431	unsigned step, unsigned dbl)
				432	{
				433	unsigned i, j; /* Loop index */
				434	unsigned p; /* Current small prime */
				435	unsigned s; /* Where to start operations in the big sieve */
				436	unsigned t; /* Step modulo p, the current prime */
				437	#ifdef MSDOS /* Use dynamic allocation rather than on the stack */
				438	unsigned char *small;
				439	#else
				440	unsigned char small[SMALL];
				441	#endif
				442
				443	assert(array);
				444
				445	#ifdef MSDOS
				446	small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */
				447	if (!small)
				448	return -1; /* Failed */
				449	#endif
				450
				451	/*
				452	* An odd step is a special case, since we must sieve by 2,
				453	* which isn't in the small prime array and has a few other
				454	* special properties. These are:
				455	* - Since the numbers are stored in binary, we don't need to
				456	* use bnModQ to find the remainder.
				457	* - If step is odd, then t = step % 2 is 1, which allows
				458	* the elimination of a lot of math. Inverting and negating
				459	* t don't change it, and multiplying s by 1 is a no-op,
				460	* so t isn't actually mentioned.
				461	* - Since this is the first sieving, instead of calling
				462	* sieveSingle, we can just use memset to fill the array
				463	* with 0x55 or 0xAA. Since a 1 bit means possible prime
				464	* (i.e. NOT divisible by 2), and the least significant bit
				465	* is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT
				466	* prime), while if bn % 2 == 1, use 0x55.
				467	* (If step is even, bn must be odd, so fill the array with 0xFF.)
				468	* - Any doublings need not be considered, since 2*bn+1 is odd, and
				469	* 2*step is even, so none of these numbers are divisible by 2.
				470	*/
				471	if (step & 1) {
				472	s = bnLSWord(bn) & 1;
				473	memset(array, 0xAA >> s, size);
				474	} else {
				475	/* Initialize the array to all 1's */
				476	memset(array, 255, size);
				477	assert(bnLSWord(bn) & 1);
				478	}
				479
				480	/*
				481	* This could be cached between calls to sieveBuild, but
				482	* it's really not worth it; sieveSmall is very fast.
				483	* sieveSmall returns a sieve of odd primes.
				484	*/
				485	sieveSmall(small, SMALL);
				486
				487	/*
				488	* Okay, now sieve via the primes up to ssize*16+SMALLSTART-1,
				489	* obtained from the small table.
				490	*/
				491	i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0);
				492	do {
				493	p = 2 * i + SMALLSTART;
				494
				495	/*
				496	* Modulo is usually very expensive, but step is usually
				497	* small, so this conditional is worth it.
				498	*/
				499	t = (step < p) ? step : step % p;
				500	if (!t) {
				501	/*
				502	* Instead of assert failing, returning all zero
				503	* bits is the "correct" thing to do, but I think
				504	* that the caller should take care of that
				505	* themselves before starting.
				506	*/
				507	assert(bnModQ(bn, p) != 0);
				508	continue;
				509	}
				510	/*
				511	* Get inverse of step mod p. 0 < t < p, and p is prime,
				512	* so it has an inverse and sieveModInvert can't return 0.
				513	*/
				514	t = sieveModInvert(t, p);
				515	assert(t);
				516	/* Negate t, so now t == -1/step (mod p) */
				517	t = p - t;
				518
				519	/* Now get the bignum modulo the prime. */
				520	s = bnModQ(bn, p);
				521
				522	/* Multiply by t, the negative inverse of step size */
				523	#if UINT_MAX/0xffff < 0xffff
				524	s = (unsigned)(((unsigned long)s * t) % p);
				525	#else
				526	s = (s * t) % p;
				527	#endif
				528
				529	/* s is now the starting bit position, so sieve */
				530	sieveSingle(array, size, s, p);
				531
				532	/* Now do the double sieves as desired. */
				533	for (j = 0; j < dbl; j++) {
				534	/* Halve t modulo p */
				535	#if UINT_MAX < 0x1ffff
				536	t = (t & 1) ? p/2 + t/2 + 1 : t/2;
				537	/* Add t to s, modulo p with overflow checks. */
				538	s += t;
				539	if (s >= p \|\| s < t)
				540	s -= p;
				541	#else
				542	if (t & 1)
				543	t += p;
				544	t /= 2;
				545	/* Add t to s, modulo p */
				546	s += t;
				547	if (s >= p)
				548	s -= p;
				549	#endif
				550	sieveSingle(array, size, s, p);
				551	}
				552
				553	/* And find the next prime */
				554	} while ((i = sieveSearch(small, SMALL, i)) != 0);
				555
				556	#ifdef MSDOS
				557	lbnMemFree(small, SMALL);
				558	#endif
				559	return 0; /* Success */
				560	}
				561
				562	/*
				563	* Similar to the above, but use "step" (which must be even) as a step
				564	* size rather than a fixed value of 2. If "step" has any small divisors
				565	* other than 2, this will blow up.
				566	*
				567	* Returns -1 on out of memory (MSDOS only, actually), and -2
				568	* if step is found to be non-prime.
				569	*/
				570	int
				571	sieveBuildBig(unsigned char array, unsigned size, struct BigNum const bn,
				572	struct BigNum const *step, unsigned dbl)
				573	{
				574	unsigned i, j; /* Loop index */
				575	unsigned p; /* Current small prime */
				576	unsigned s; /* Where to start operations in the big sieve */
				577	unsigned t; /* step modulo p, the current prime */
				578	#ifdef MSDOS /* Use dynamic allocation rather than on the stack */
				579	unsigned char *small;
				580	#else
				581	unsigned char small[SMALL];
				582	#endif
				583
				584	assert(array);
				585
				586	#ifdef MSDOS
				587	small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */
				588	if (!small)
				589	return -1; /* Failed */
				590	#endif
				591	/*
				592	* An odd step is a special case, since we must sieve by 2,
				593	* which isn't in the small prime array and has a few other
				594	* special properties. These are:
				595	* - Since the numbers are stored in binary, we don't need to
				596	* use bnModQ to find the remainder.
				597	* - If step is odd, then t = step % 2 is 1, which allows
				598	* the elimination of a lot of math. Inverting and negating
				599	* t don't change it, and multiplying s by 1 is a no-op,
				600	* so t isn't actually mentioned.
				601	* - Since this is the first sieving, instead of calling
				602	* sieveSingle, we can just use memset to fill the array
				603	* with 0x55 or 0xAA. Since a 1 bit means possible prime
				604	* (i.e. NOT divisible by 2), and the least significant bit
				605	* is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT
				606	* prime), while if bn % 2 == 1, use 0x55.
				607	* (If step is even, bn must be odd, so fill the array with 0xFF.)
				608	* - Any doublings need not be considered, since 2*bn+1 is odd, and
				609	* 2*step is even, so none of these numbers are divisible by 2.
				610	*/
				611	if (bnLSWord(step) & 1) {
				612	s = bnLSWord(bn) & 1;
				613	memset(array, 0xAA >> s, size);
				614	} else {
				615	/* Initialize the array to all 1's */
				616	memset(array, 255, size);
				617	assert(bnLSWord(bn) & 1);
				618	}
				619
				620	/*
				621	* This could be cached between calls to sieveBuild, but
				622	* it's really not worth it; sieveSmall is very fast.
				623	* sieveSmall returns a sieve of the odd primes.
				624	*/
				625	sieveSmall(small, SMALL);
				626
				627	/*
				628	* Okay, now sieve via the primes up to ssize*16+SMALLSTART-1,
				629	* obtained from the small table.
				630	*/
				631	i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0);
				632	do {
				633	p = 2 * i + SMALLSTART;
				634
				635	t = bnModQ(step, p);
				636	if (!t) {
				637	assert(bnModQ(bn, p) != 0);
				638	continue;
				639	}
				640	/* Get negative inverse of step */
				641	t = sieveModInvert(bnModQ(step, p), p);
				642	assert(t);
				643	t = p-t;
				644
				645	/* Okay, we have a prime - get the remainder */
				646	s = bnModQ(bn, p);
				647
				648	/* Now multiply s by the negative inverse of step (mod p) */
				649	#if UINT_MAX/0xffff < 0xffff
				650	s = (unsigned)(((unsigned long)s * t) % p);
				651	#else
				652	s = (s * t) % p;
				653	#endif
				654	/* We now have the starting bit pos */
				655	sieveSingle(array, size, s, p);
				656
				657	/* Now do the double sieves as desired. */
				658	for (j = 0; j < dbl; j++) {
				659	/* Halve t modulo p */
				660	#if UINT_MAX < 0x1ffff
				661	t = (t & 1) ? p/2 + t/2 + 1 : t/2;
				662	/* Add t to s, modulo p with overflow checks. */
				663	s += t;
				664	if (s >= p \|\| s < t)
				665	s -= p;
				666	#else
				667	if (t & 1)
				668	t += p;
				669	t /= 2;
				670	/* Add t to s, modulo p */
				671	s += t;
				672	if (s >= p)
				673	s -= p;
				674	#endif
				675	sieveSingle(array, size, s, p);
				676	}
				677
				678	/* And find the next prime */
				679	} while ((i = sieveSearch(small, SMALL, i)) != 0);
				680
				681	#ifdef MSDOS
				682	lbnMemFree(small, SMALL);
				683	#endif
				684	return 0; /* Success */
				685	}