Alexandre Lision | 7fd5d3d | 2013-12-04 13:06:40 -0500 | [diff] [blame] | 1 | /* |
| 2 | * sieve.c - Trial division for prime finding. |
| 3 | * |
| 4 | * Copyright (c) 1995 Colin Plumb. All rights reserved. |
| 5 | * For licensing and other legal details, see the file legal.c. |
| 6 | * |
| 7 | * Finding primes: |
| 8 | * - Sieve 1 to find the small primes for |
| 9 | * - Sieve 2 to find the candidate large primes, then |
| 10 | * - Pseudo-primality test. |
| 11 | * |
| 12 | * An important question is how much trial division by small primes |
| 13 | * should we do? The answer is a LOT. Even a heavily optimized |
| 14 | * Fermat test to the base 2 (the simplest pseudoprimality test) |
| 15 | * is much more expensive than a division. |
| 16 | * |
| 17 | * For an prime of n k-bit words, a Fermat test to the base 2 requires n*k |
| 18 | * modular squarings, each of which involves n*(n+1)/2 signle-word multiplies |
| 19 | * in the squaring and n*(n+1) multiplies in the modular reduction, plus |
| 20 | * some overhead to get into and out of Montgomery form. This is a total |
| 21 | * of 3/2 * k * n^2 * (n+1). Equivalently, if n*k = b bits, it's |
| 22 | * 3/2 * (b/k+1) * b^2 / k. |
| 23 | * |
| 24 | * A modulo operation requires n single-word divides. Let's assume that |
| 25 | * a divide is 4 times the cost of a multiply. That's 4*n multiplies. |
| 26 | * However, you only have to do the division once for your entire |
| 27 | * search. It can be amortized over 10-15 primes. So it's |
| 28 | * really more like n/3 multiplies. This is b/3k. |
| 29 | * |
| 30 | * Now, let's suppose you have a candidate prime t. Your options |
| 31 | * are to a) do trial division by a prime p, then do a Fermat test, |
| 32 | * or to do the Fermat test directly. Doing the trial division |
| 33 | * costs b/3k multiplies, but a certain fraction of the time (1/p), it |
| 34 | * saves you 3/2 b^3 / k^2 multiplies. Thus, it's worth it doing the |
| 35 | * division as long as b/3k < 3/2 * (b/k+1) * b^2 / k / p. |
| 36 | * I.e. p < 9/2 * (b/k + 1) * b = 9/2 * (b^2/k + b). |
| 37 | * E.g. for k=16 and b=256, p < 9/2 * 17 * 256 = 19584. |
| 38 | * Solving for k=16 and k=32 at a few interesting value of b: |
| 39 | * |
| 40 | * k=16, b=256: p < 19584 k=32, b=256: p < 10368 |
| 41 | * k=16, b=384: p < 43200 k=32, b=384; p < 22464 |
| 42 | * k=16, b=512: p < 76032 k=32, b=512: p < 39168 |
| 43 | * k=16, b=640: p < 118080 k=32, b=640: p < 60480 |
| 44 | * |
| 45 | * H'm... before using the highly-optimized Fermat test, I got much larger |
| 46 | * numbers (64K to 256K), and designed the sieve for that. Maybe it needs |
| 47 | * to be reduced. It *is* true that the desirable sieve size increases |
| 48 | * rapidly with increasing prime size, and it's the larger primes that are |
| 49 | * worrisome in any case. I'll leave it as is (64K) for now while I |
| 50 | * think about it. |
| 51 | * |
| 52 | * A bit of tweaking the division (we can compute a reciprocal and do |
| 53 | * multiplies instead, turning 4*n into 4 + 2*n) would increase all the |
| 54 | * numbers by a factor of 2 or so. |
| 55 | * |
| 56 | * |
| 57 | * Bit k in a sieve corresponds to the number a + k*b. |
| 58 | * For a given a and b, the sieve's job is to find the values of |
| 59 | * k for which a + k*b == 0 (mod p). Multiplying by b^-1 and |
| 60 | * isolating k, you get k == -a*b^-1 (mod p). So the values of |
| 61 | * k which should be worked on are k = (-a*b^-1 mod p) + i * p, |
| 62 | * for i = 0, 1, 2,... |
| 63 | * |
| 64 | * Note how this is still easy to use with very large b, if you need it. |
| 65 | * It just requires computing (b mod p) and then finding the multiplicative |
| 66 | * inverse of that. |
| 67 | * |
| 68 | * |
| 69 | * How large a space to search to ensure that one will hit a prime? |
| 70 | * The average density is known, but the primes behave oddly, and sometimes |
| 71 | * there are large gaps. It is conjectured by shanks that the first gap |
| 72 | * of size "delta" will occur at approximately exp(sqrt(delta)), so a delta |
| 73 | * of 65536 is conjectured to be to contain a prime up to e^256. |
| 74 | * Remembering the handy 2<->e conversion ratios: |
| 75 | * ln(2) = 0.693147 log2(e) = 1.442695 |
| 76 | * This covers up to 369 bits. Damn, not enough! Still, it'll have to do. |
| 77 | * |
| 78 | * Cramer's conjecture (he proved it for "most" cases) is that in the limit, |
| 79 | * as p goes to infinity, the largest gap after a prime p tends to (ln(p))^2. |
| 80 | * So, for a 1024-bit p, the interval to the next prime is expected to be |
| 81 | * about 709.78^2, or 503791. We'd need to enlarge our space by a factor of |
| 82 | * 8 to be sure. It isn't worth the hassle. |
| 83 | * |
| 84 | * Note that a span of this size is expected to contain 92 primes even |
| 85 | * in the vicinity of 2^1024 (it's 369 at 256 bits and 492 at 192 bits). |
| 86 | * So the probability of failure is pretty low. |
| 87 | */ |
| 88 | #ifndef HAVE_CONFIG_H |
| 89 | #define HAVE_CONFIG_H 0 |
| 90 | #endif |
| 91 | #if HAVE_CONFIG_H |
| 92 | #include <bnconfig.h> |
| 93 | #endif |
| 94 | |
| 95 | /* |
| 96 | * Some compilers complain about #if FOO if FOO isn't defined, |
| 97 | * so do the ANSI-mandated thing explicitly... |
| 98 | */ |
| 99 | #ifndef NO_ASSERT_H |
| 100 | #define NO_ASSERT_H 0 |
| 101 | #endif |
| 102 | #ifndef NO_LIMITS_H |
| 103 | #define NO_LIMITS_H 0 |
| 104 | #endif |
| 105 | #ifndef NO_STRING_H |
| 106 | #define NO_STRING_H 0 |
| 107 | #endif |
| 108 | #ifndef HAVE_STRINGS_H |
| 109 | #define HAVE_STRINGS_H 0 |
| 110 | #endif |
| 111 | #ifndef NEED_MEMORY_H |
| 112 | #define NEED_MEMORY_H 0 |
| 113 | #endif |
| 114 | |
| 115 | #if !NO_ASSERT_H |
| 116 | #include <assert.h> |
| 117 | #else |
| 118 | #define assert(x) (void)0 |
| 119 | #endif |
| 120 | |
| 121 | #if !NO_LIMITS_H |
| 122 | #include <limits.h> /* For UINT_MAX */ |
| 123 | #endif /* If not avail, default value of 0 is safe */ |
| 124 | |
| 125 | #if !NO_STRING_H |
| 126 | #include <string.h> /* for memset() */ |
| 127 | #elif HAVE_STRINGS_H |
| 128 | #include <strings.h> |
| 129 | #endif |
| 130 | #if NEED_MEMORY_H |
| 131 | #include <memory.h> |
| 132 | #endif |
| 133 | |
| 134 | #include "bn.h" |
| 135 | #include "sieve.h" |
| 136 | #ifdef MSDOS |
| 137 | #include "lbnmem.h" |
| 138 | #endif |
| 139 | |
| 140 | #include "kludge.h" |
| 141 | |
| 142 | /* |
| 143 | * Each array stores potential primes as 1 bits in little-endian bytes. |
| 144 | * Bit k in an array represents a + k*b, for some parameters a and b |
| 145 | * of the sieve. Currently, b is hardcoded to 2. |
| 146 | * |
| 147 | * Various factors of 16 arise because these are all *byte* sizes, and |
| 148 | * skipping even numbers, 16 numbers fit into a byte's worth of bitmap. |
| 149 | */ |
| 150 | |
| 151 | /* |
| 152 | * The first number in the small prime sieve. This could be raised to |
| 153 | * 3 if you want to squeeze bytes out aggressively for a smaller SMALL |
| 154 | * table, and doing so would let one more prime into the end of the array, |
| 155 | * but there is no sense making it larger if you're generating small |
| 156 | * primes up to the limit if 2^16, since it doesn't save any memory and |
| 157 | * would require extra code to ignore 65537 in the last byte, which is |
| 158 | * over the 16-bit limit. |
| 159 | */ |
| 160 | #define SMALLSTART 1 |
| 161 | |
| 162 | /* |
| 163 | * Size of sieve used to find large primes, in bytes. For compatibility |
| 164 | * with 16-bit-int systems, the largest prime that can appear in it, |
| 165 | * SMALL * 16 + SMALLSTART - 2, must be < 65536. Since 65537 is a prime, |
| 166 | * this is the absolute maximum table size. |
| 167 | */ |
| 168 | #define SMALL (65536/16) |
| 169 | |
| 170 | /* |
| 171 | * Compute the multiplicative inverse of x, modulo mod, using the extended |
| 172 | * Euclidean algorithm. The classical EEA returns two results, traditionally |
| 173 | * named s and t, but only one (t) is needed or computed here. |
| 174 | * It is unrolled twice to avoid some variable-swapping, and because negating |
| 175 | * t every other round makes all the number positive and less than the |
| 176 | * modulus, which makes fixed-length arithmetic easier. |
| 177 | * |
| 178 | * If gcd(x, mod) != 1, then this will return 0. |
| 179 | */ |
| 180 | static unsigned |
| 181 | sieveModInvert(unsigned x, unsigned mod) |
| 182 | { |
| 183 | unsigned y; |
| 184 | unsigned t0, t1; |
| 185 | unsigned q; |
| 186 | |
| 187 | if (x <= 1) |
| 188 | return x; /* 0 and 1 are self-inverse */ |
| 189 | /* |
| 190 | * The first round is simplified based on the |
| 191 | * initial conditions t0 = 1 and t1 = 0. |
| 192 | */ |
| 193 | t1 = mod / x; |
| 194 | y = mod % x; |
| 195 | if (y <= 1) |
| 196 | return y ? mod - t1 : 0; |
| 197 | t0 = 1; |
| 198 | |
| 199 | do { |
| 200 | q = x / y; |
| 201 | x = x % y; |
| 202 | t0 += q * t1; |
| 203 | if (x <= 1) |
| 204 | return x ? t0 : 0; |
| 205 | q = y / x; |
| 206 | y = y % x; |
| 207 | t1 += q * t0; |
| 208 | } while (y > 1); |
| 209 | return y ? mod - t1 : 0; |
| 210 | } |
| 211 | |
| 212 | |
| 213 | /* |
| 214 | * Perform a single sieving operation on an array. Clear bits "start", |
| 215 | * "start+step", "start+2*step", etc. from the array, up to the size |
| 216 | * limit (in BYTES) "size". All of the arguments must fit into 16 bits |
| 217 | * for portability. |
| 218 | * |
| 219 | * This is the core of the sieving operation. In addition to being |
| 220 | * called from the sieving functions, it is useful to call directly if, |
| 221 | * say, you want to exclude primes congruent to 1 mod 3, or whatever. |
| 222 | * (Although in that case, it would be better to change the sieving to |
| 223 | * use a step size of 6 and start == 5 (mod 6).) |
| 224 | * |
| 225 | * Originally, this was inlined in the code below (with various checks |
| 226 | * turned off where they could be inferred from the environment), but it |
| 227 | * turns out that all the sieving is so fast that it makes a negligible |
| 228 | * speed difference and smaller, cleaner code was preferred. |
| 229 | * |
| 230 | * Rather than increment a bit index through the array and clear |
| 231 | * the corresponding bit, this code takes advantage of the fact that |
| 232 | * every eighth increment must use the same bit position in a byte. |
| 233 | * I.e. start + k*step == start + (k+8)*step (mod 8). Thus, a bitmask |
| 234 | * can be computed only eight times and used for all multiples. Thus, the |
| 235 | * outer loop is over (k mod 8) while the inner loop is over (k div 8). |
| 236 | * |
| 237 | * The only further trickiness is that this code is designed to accept |
| 238 | * start, step, and size up to 65535 on 16-bit machines. On such a |
| 239 | * machine, the computation "start+step" can overflow, so we need to |
| 240 | * insert an extra check for that situation. |
| 241 | */ |
| 242 | void |
| 243 | sieveSingle(unsigned char *array, unsigned size, unsigned start, unsigned step) |
| 244 | { |
| 245 | unsigned bit; |
| 246 | unsigned char mask; |
| 247 | unsigned i; |
| 248 | |
| 249 | #if UINT_MAX < 0x1ffff |
| 250 | /* Unsigned is small; add checks for wrap */ |
| 251 | for (bit = 0; bit < 8; bit++) { |
| 252 | i = start/8; |
| 253 | if (i >= size) |
| 254 | break; |
| 255 | mask = ~(1 << (start & 7)); |
| 256 | do { |
| 257 | array[i] &= mask; |
| 258 | i += step; |
| 259 | } while (i >= step && i < size); |
| 260 | start += step; |
| 261 | if (start < step) /* Overflow test */ |
| 262 | break; |
| 263 | } |
| 264 | #else |
| 265 | /* Unsigned has the range - no overflow possible */ |
| 266 | for (bit = 0; bit < 8; bit++) { |
| 267 | i = start/8; |
| 268 | if (i >= size) |
| 269 | break; |
| 270 | mask = ~(1 << (start & 7)); |
| 271 | do { |
| 272 | array[i] &= mask; |
| 273 | i += step; |
| 274 | } while (i < size); |
| 275 | start += step; |
| 276 | } |
| 277 | #endif |
| 278 | } |
| 279 | |
| 280 | /* |
| 281 | * Returns the index of the next bit set in the given array. The search |
| 282 | * begins after the specified bit, so if you care about bit 0, you need |
| 283 | * to check it explicitly yourself. This returns 0 if no bits are found. |
| 284 | * |
| 285 | * Note that the size is in bytes, and that it takes and returns BIT |
| 286 | * positions. If the array represents odd numbers only, as usual, the |
| 287 | * returned values must be doubled to turn them into offsets from the |
| 288 | * initial number. |
| 289 | */ |
| 290 | unsigned |
| 291 | sieveSearch(unsigned char const *array, unsigned size, unsigned start) |
| 292 | { |
| 293 | unsigned i; /* Loop index */ |
| 294 | unsigned char t; /* Temp */ |
| 295 | |
| 296 | if (!++start) |
| 297 | return 0; |
| 298 | i = start/8; |
| 299 | if (i >= size) |
| 300 | return 0; /* Done! */ |
| 301 | |
| 302 | /* Deal with odd-bit beginnings => search the first byte */ |
| 303 | if (start & 7) { |
| 304 | t = array[i++] >> (start & 7); |
| 305 | if (t) { |
| 306 | if (!(t & 15)) { |
| 307 | t >>= 4; |
| 308 | start += 4; |
| 309 | } |
| 310 | if (!(t & 3)) { |
| 311 | t >>= 2; |
| 312 | start += 2; |
| 313 | } |
| 314 | if (!(t & 1)) |
| 315 | start += 1; |
| 316 | return start; |
| 317 | } else if (i == size) { |
| 318 | return 0; /* Done */ |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | /* Now the main search loop */ |
| 323 | |
| 324 | do { |
| 325 | if ((t = array[i]) != 0) { |
| 326 | start = 8*i; |
| 327 | if (!(t & 15)) { |
| 328 | t >>= 4; |
| 329 | start += 4; |
| 330 | } |
| 331 | if (!(t & 3)) { |
| 332 | t >>= 2; |
| 333 | start += 2; |
| 334 | } |
| 335 | if (!(t & 1)) |
| 336 | start += 1; |
| 337 | return start; |
| 338 | } |
| 339 | } while (++i < size); |
| 340 | |
| 341 | /* Failed */ |
| 342 | return 0; |
| 343 | } |
| 344 | |
| 345 | /* |
| 346 | * Build a table of small primes for sieving larger primes with. This |
| 347 | * could be cached between calls to sieveBuild, but it's so fast that |
| 348 | * it's really not worth it. This code takes a few milliseconds to run. |
| 349 | */ |
| 350 | static void |
| 351 | sieveSmall(unsigned char *array, unsigned size) |
| 352 | { |
| 353 | unsigned i; /* Loop index */ |
| 354 | unsigned p; /* The current prime */ |
| 355 | |
| 356 | /* Initialize to all 1s */ |
| 357 | memset(array, 0xFF, size); |
| 358 | |
| 359 | #if SMALLSTART == 1 |
| 360 | /* Mark 1 as NOT prime */ |
| 361 | array[0] = 0xfe; |
| 362 | i = 1; /* Index of first prime */ |
| 363 | #else |
| 364 | i = 0; /* Index of first prime */ |
| 365 | #endif |
| 366 | |
| 367 | /* |
| 368 | * Okay, now sieve via the primes up to 256, obtained from the |
| 369 | * table itself. We know the maximum possible table size is |
| 370 | * 65536, and sieveSingle() can cope with out-of-range inputs |
| 371 | * safely, and the time required is trivial, so it isn't adaptive |
| 372 | * based on the array size. |
| 373 | * |
| 374 | * Convert each bit position into a prime, compute a starting |
| 375 | * sieve position (the square of the prime), and remove multiples |
| 376 | * from the table, using sieveSingle(). I used to have that |
| 377 | * code in line here, but the speed difference was so small it |
| 378 | * wasn't worth it. If a compiler really wants to waste memory, |
| 379 | * it can inline it. |
| 380 | */ |
| 381 | do { |
| 382 | p = 2 * i + SMALLSTART; |
| 383 | if (p > 256) |
| 384 | break; |
| 385 | /* Start at square of p */ |
| 386 | sieveSingle(array, size, (p*p-SMALLSTART)/2, p); |
| 387 | |
| 388 | /* And find the next prime */ |
| 389 | i = sieveSearch(array, 16, i); |
| 390 | } while (i); |
| 391 | } |
| 392 | |
| 393 | |
| 394 | /* |
| 395 | * This is the primary sieving function. It fills in the array with |
| 396 | * a sieve (multiples of small primes removed) beginning at bn and |
| 397 | * proceeding in steps of "step". |
| 398 | * |
| 399 | * It generates a small array to get the primes to sieve by. It's |
| 400 | * generated on the fly - sieveSmall is fast enough to make that |
| 401 | * perfectly acceptable. |
| 402 | * |
| 403 | * The caller should take the array, walk it with sieveSearch, and |
| 404 | * apply a stronger primality test to the numbers that are returned. |
| 405 | * |
| 406 | * If the "dbl" flag non-zero (at least 1), this also sieves 2*bn+1, in |
| 407 | * steps of 2*step. If dbl is 2 or more, this also sieve 4*bn+3, |
| 408 | * in steps of 4*step, and so on for arbitrarily high values of "dbl". |
| 409 | * This is convenient for finding primes such that (p-1)/2 is also prime. |
| 410 | * This is particularly efficient because sieveSingle is controlled by the |
| 411 | * parameter s = -n/step (mod p). (In fact, we find t = -1/step (mod p) |
| 412 | * and multiply that by n (mod p).) If you have -n/step (mod p), then |
| 413 | * finding -(2*n+1)/(2*step) (mod p), which is -n/step - 1/(2*step) (mod p), |
| 414 | * reduces to finding -1/(2*step) (mod p), or t/2 (mod p), and adding that |
| 415 | * to s = -n/step (mod p). Dividing by 2 modulo an odd p is easy - |
| 416 | * if even, divide directly. Otherwise, add p (which produces an even |
| 417 | * sum), and divide by 2. Very simple. And this produces s' and t' |
| 418 | * for step' = 2*step. It can be repeated for step'' = 4*step and so on. |
| 419 | * |
| 420 | * Note that some of the math is complicated by the fact that 2*p might |
| 421 | * not fit into an unsigned, so rather than if (odd(x)) x = (x+p)/2, |
| 422 | * we do if (odd(x)) x = x/2 + p/2 + 1; |
| 423 | * |
| 424 | * TODO: Do the double-sieving by sieving the larger number, and then |
| 425 | * just subtract one from the remainder to get the other parameter. |
| 426 | * (bn-1)/2 is divisible by an odd p iff bn-1 is divisible, which is |
| 427 | * true iff bn == 1 mod p. This requires using a step size of 4. |
| 428 | */ |
| 429 | int |
| 430 | sieveBuild(unsigned char *array, unsigned size, struct BigNum const *bn, |
| 431 | unsigned step, unsigned dbl) |
| 432 | { |
| 433 | unsigned i, j; /* Loop index */ |
| 434 | unsigned p; /* Current small prime */ |
| 435 | unsigned s; /* Where to start operations in the big sieve */ |
| 436 | unsigned t; /* Step modulo p, the current prime */ |
| 437 | #ifdef MSDOS /* Use dynamic allocation rather than on the stack */ |
| 438 | unsigned char *small; |
| 439 | #else |
| 440 | unsigned char small[SMALL]; |
| 441 | #endif |
| 442 | |
| 443 | assert(array); |
| 444 | |
| 445 | #ifdef MSDOS |
| 446 | small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */ |
| 447 | if (!small) |
| 448 | return -1; /* Failed */ |
| 449 | #endif |
| 450 | |
| 451 | /* |
| 452 | * An odd step is a special case, since we must sieve by 2, |
| 453 | * which isn't in the small prime array and has a few other |
| 454 | * special properties. These are: |
| 455 | * - Since the numbers are stored in binary, we don't need to |
| 456 | * use bnModQ to find the remainder. |
| 457 | * - If step is odd, then t = step % 2 is 1, which allows |
| 458 | * the elimination of a lot of math. Inverting and negating |
| 459 | * t don't change it, and multiplying s by 1 is a no-op, |
| 460 | * so t isn't actually mentioned. |
| 461 | * - Since this is the first sieving, instead of calling |
| 462 | * sieveSingle, we can just use memset to fill the array |
| 463 | * with 0x55 or 0xAA. Since a 1 bit means possible prime |
| 464 | * (i.e. NOT divisible by 2), and the least significant bit |
| 465 | * is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT |
| 466 | * prime), while if bn % 2 == 1, use 0x55. |
| 467 | * (If step is even, bn must be odd, so fill the array with 0xFF.) |
| 468 | * - Any doublings need not be considered, since 2*bn+1 is odd, and |
| 469 | * 2*step is even, so none of these numbers are divisible by 2. |
| 470 | */ |
| 471 | if (step & 1) { |
| 472 | s = bnLSWord(bn) & 1; |
| 473 | memset(array, 0xAA >> s, size); |
| 474 | } else { |
| 475 | /* Initialize the array to all 1's */ |
| 476 | memset(array, 255, size); |
| 477 | assert(bnLSWord(bn) & 1); |
| 478 | } |
| 479 | |
| 480 | /* |
| 481 | * This could be cached between calls to sieveBuild, but |
| 482 | * it's really not worth it; sieveSmall is *very* fast. |
| 483 | * sieveSmall returns a sieve of odd primes. |
| 484 | */ |
| 485 | sieveSmall(small, SMALL); |
| 486 | |
| 487 | /* |
| 488 | * Okay, now sieve via the primes up to ssize*16+SMALLSTART-1, |
| 489 | * obtained from the small table. |
| 490 | */ |
| 491 | i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0); |
| 492 | do { |
| 493 | p = 2 * i + SMALLSTART; |
| 494 | |
| 495 | /* |
| 496 | * Modulo is usually very expensive, but step is usually |
| 497 | * small, so this conditional is worth it. |
| 498 | */ |
| 499 | t = (step < p) ? step : step % p; |
| 500 | if (!t) { |
| 501 | /* |
| 502 | * Instead of assert failing, returning all zero |
| 503 | * bits is the "correct" thing to do, but I think |
| 504 | * that the caller should take care of that |
| 505 | * themselves before starting. |
| 506 | */ |
| 507 | assert(bnModQ(bn, p) != 0); |
| 508 | continue; |
| 509 | } |
| 510 | /* |
| 511 | * Get inverse of step mod p. 0 < t < p, and p is prime, |
| 512 | * so it has an inverse and sieveModInvert can't return 0. |
| 513 | */ |
| 514 | t = sieveModInvert(t, p); |
| 515 | assert(t); |
| 516 | /* Negate t, so now t == -1/step (mod p) */ |
| 517 | t = p - t; |
| 518 | |
| 519 | /* Now get the bignum modulo the prime. */ |
| 520 | s = bnModQ(bn, p); |
| 521 | |
| 522 | /* Multiply by t, the negative inverse of step size */ |
| 523 | #if UINT_MAX/0xffff < 0xffff |
| 524 | s = (unsigned)(((unsigned long)s * t) % p); |
| 525 | #else |
| 526 | s = (s * t) % p; |
| 527 | #endif |
| 528 | |
| 529 | /* s is now the starting bit position, so sieve */ |
| 530 | sieveSingle(array, size, s, p); |
| 531 | |
| 532 | /* Now do the double sieves as desired. */ |
| 533 | for (j = 0; j < dbl; j++) { |
| 534 | /* Halve t modulo p */ |
| 535 | #if UINT_MAX < 0x1ffff |
| 536 | t = (t & 1) ? p/2 + t/2 + 1 : t/2; |
| 537 | /* Add t to s, modulo p with overflow checks. */ |
| 538 | s += t; |
| 539 | if (s >= p || s < t) |
| 540 | s -= p; |
| 541 | #else |
| 542 | if (t & 1) |
| 543 | t += p; |
| 544 | t /= 2; |
| 545 | /* Add t to s, modulo p */ |
| 546 | s += t; |
| 547 | if (s >= p) |
| 548 | s -= p; |
| 549 | #endif |
| 550 | sieveSingle(array, size, s, p); |
| 551 | } |
| 552 | |
| 553 | /* And find the next prime */ |
| 554 | } while ((i = sieveSearch(small, SMALL, i)) != 0); |
| 555 | |
| 556 | #ifdef MSDOS |
| 557 | lbnMemFree(small, SMALL); |
| 558 | #endif |
| 559 | return 0; /* Success */ |
| 560 | } |
| 561 | |
| 562 | /* |
| 563 | * Similar to the above, but use "step" (which must be even) as a step |
| 564 | * size rather than a fixed value of 2. If "step" has any small divisors |
| 565 | * other than 2, this will blow up. |
| 566 | * |
| 567 | * Returns -1 on out of memory (MSDOS only, actually), and -2 |
| 568 | * if step is found to be non-prime. |
| 569 | */ |
| 570 | int |
| 571 | sieveBuildBig(unsigned char *array, unsigned size, struct BigNum const *bn, |
| 572 | struct BigNum const *step, unsigned dbl) |
| 573 | { |
| 574 | unsigned i, j; /* Loop index */ |
| 575 | unsigned p; /* Current small prime */ |
| 576 | unsigned s; /* Where to start operations in the big sieve */ |
| 577 | unsigned t; /* step modulo p, the current prime */ |
| 578 | #ifdef MSDOS /* Use dynamic allocation rather than on the stack */ |
| 579 | unsigned char *small; |
| 580 | #else |
| 581 | unsigned char small[SMALL]; |
| 582 | #endif |
| 583 | |
| 584 | assert(array); |
| 585 | |
| 586 | #ifdef MSDOS |
| 587 | small = lbnMemAlloc(SMALL); /* Which allocator? Not secure. */ |
| 588 | if (!small) |
| 589 | return -1; /* Failed */ |
| 590 | #endif |
| 591 | /* |
| 592 | * An odd step is a special case, since we must sieve by 2, |
| 593 | * which isn't in the small prime array and has a few other |
| 594 | * special properties. These are: |
| 595 | * - Since the numbers are stored in binary, we don't need to |
| 596 | * use bnModQ to find the remainder. |
| 597 | * - If step is odd, then t = step % 2 is 1, which allows |
| 598 | * the elimination of a lot of math. Inverting and negating |
| 599 | * t don't change it, and multiplying s by 1 is a no-op, |
| 600 | * so t isn't actually mentioned. |
| 601 | * - Since this is the first sieving, instead of calling |
| 602 | * sieveSingle, we can just use memset to fill the array |
| 603 | * with 0x55 or 0xAA. Since a 1 bit means possible prime |
| 604 | * (i.e. NOT divisible by 2), and the least significant bit |
| 605 | * is first, if bn % 2 == 0, we use 0xAA (bit 0 = bn is NOT |
| 606 | * prime), while if bn % 2 == 1, use 0x55. |
| 607 | * (If step is even, bn must be odd, so fill the array with 0xFF.) |
| 608 | * - Any doublings need not be considered, since 2*bn+1 is odd, and |
| 609 | * 2*step is even, so none of these numbers are divisible by 2. |
| 610 | */ |
| 611 | if (bnLSWord(step) & 1) { |
| 612 | s = bnLSWord(bn) & 1; |
| 613 | memset(array, 0xAA >> s, size); |
| 614 | } else { |
| 615 | /* Initialize the array to all 1's */ |
| 616 | memset(array, 255, size); |
| 617 | assert(bnLSWord(bn) & 1); |
| 618 | } |
| 619 | |
| 620 | /* |
| 621 | * This could be cached between calls to sieveBuild, but |
| 622 | * it's really not worth it; sieveSmall is *very* fast. |
| 623 | * sieveSmall returns a sieve of the odd primes. |
| 624 | */ |
| 625 | sieveSmall(small, SMALL); |
| 626 | |
| 627 | /* |
| 628 | * Okay, now sieve via the primes up to ssize*16+SMALLSTART-1, |
| 629 | * obtained from the small table. |
| 630 | */ |
| 631 | i = (small[0] & 1) ? 0 : sieveSearch(small, SMALL, 0); |
| 632 | do { |
| 633 | p = 2 * i + SMALLSTART; |
| 634 | |
| 635 | t = bnModQ(step, p); |
| 636 | if (!t) { |
| 637 | assert(bnModQ(bn, p) != 0); |
| 638 | continue; |
| 639 | } |
| 640 | /* Get negative inverse of step */ |
| 641 | t = sieveModInvert(bnModQ(step, p), p); |
| 642 | assert(t); |
| 643 | t = p-t; |
| 644 | |
| 645 | /* Okay, we have a prime - get the remainder */ |
| 646 | s = bnModQ(bn, p); |
| 647 | |
| 648 | /* Now multiply s by the negative inverse of step (mod p) */ |
| 649 | #if UINT_MAX/0xffff < 0xffff |
| 650 | s = (unsigned)(((unsigned long)s * t) % p); |
| 651 | #else |
| 652 | s = (s * t) % p; |
| 653 | #endif |
| 654 | /* We now have the starting bit pos */ |
| 655 | sieveSingle(array, size, s, p); |
| 656 | |
| 657 | /* Now do the double sieves as desired. */ |
| 658 | for (j = 0; j < dbl; j++) { |
| 659 | /* Halve t modulo p */ |
| 660 | #if UINT_MAX < 0x1ffff |
| 661 | t = (t & 1) ? p/2 + t/2 + 1 : t/2; |
| 662 | /* Add t to s, modulo p with overflow checks. */ |
| 663 | s += t; |
| 664 | if (s >= p || s < t) |
| 665 | s -= p; |
| 666 | #else |
| 667 | if (t & 1) |
| 668 | t += p; |
| 669 | t /= 2; |
| 670 | /* Add t to s, modulo p */ |
| 671 | s += t; |
| 672 | if (s >= p) |
| 673 | s -= p; |
| 674 | #endif |
| 675 | sieveSingle(array, size, s, p); |
| 676 | } |
| 677 | |
| 678 | /* And find the next prime */ |
| 679 | } while ((i = sieveSearch(small, SMALL, i)) != 0); |
| 680 | |
| 681 | #ifdef MSDOS |
| 682 | lbnMemFree(small, SMALL); |
| 683 | #endif |
| 684 | return 0; /* Success */ |
| 685 | } |