Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // zrbj_sx_sieve64_v1.cpp - Sieve of Eratosthenes reference implementation
- // 2014-11-09
- //
- // This implements a segmented odds-only Sieve of Eratosthenes for sieving up to 2^64-1,
- // for a maximum window size of 32 bits (corresponding to bitmaps with 2^31 bits). All
- // index-related (32-bit) variables use the uintxx_t typedef so that potential bugs
- // related to index math can be experimentally excluded by changing that typedef to
- // uint64_t. The digests for the test ranges and the actual primes have been verified
- // against the console version of the primesieve.org program, except for those that are
- // beyond its limits.
- //
- // The actual sieve is implemented in sieve32(), the rest is just overhead for testing.
- #undef NDEBUG
- //#include "zrbj/hdr_pre.h"
- #include <cassert>
- #include <climits>
- #include <cmath>
- #include <cstdint>
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include <ctime>
- #include <algorithm>
- #include <vector>
- //#include "zrbj/hdr_post.h"
- // Dealing only with odd numbers and packed (odd-only) bitmaps makes a lot of things easier.
- // The same goes for specifying all ranges - like sieve windows - via bit indices and not via
- // the corresponding numbers.
- //
- // For one thing, the small factors bitmap needs only 31 bits, which buys a bit of head room
- // for bitmap index variables even if those are uint32_t. It also reduces drastically the
- // number of special cases that need to be handled, pretty much everywhere, and this makes
- // the code much more robust. Last but not least, this makes it possible to specify all ranges
- // as half-open or base + count, without dirty tricks like decreeing that 0 be treated as 2^32.
- // Closed ranges (i.e. 'max_bit' instead of 'end_bit') can be made to work but they require
- // more care and cause a lot of +1/-1 noise.
- //
- // The code here is designed for an index type with 32 bits. When bit indexes can go beyond that,
- // the corresponding number type (uint64_t) is used instead.
- //
- // Changing this typedef to uint64_t eliminates all potential bugs related to 32-bit index math
- // but it makes the code slower (slightly so if compiled as 64-bit code, signficantly for 32-bit
- // code). Compiling as 32-bit code with gcc, the difference is 8.9 s instead of 4.7 s for the
- // sieve init, 9.7 s instead of 5 s for sieving a million primes just below 2^64.
- typedef uint_fast32_t uintxx_t;
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // Bit manipulation functions are sometimes faster using the machine word size, sometimes using
- // unsigned char. Using templated functions makes it possible to mix and match. It also makes
- // it easy to experiment with intrinsics, by the simple expedient of defining specialisations
- // for certain types and implementing them via intrinsics.
- template<typename word_t>
- void set_bit (word_t *p, uintxx_t index)
- {
- enum { BITS_PER_WORD = sizeof(word_t) * CHAR_BIT };
- // we can trust the compiler to use masking and shifting instead of division; we cannot do that
- // ourselves without having the log2 which cannot easily be computed as a constexpr
- p[index / BITS_PER_WORD] |= word_t(1) << (index % BITS_PER_WORD);
- }
- //-------------------------------------------------------------------------------------------------
- // Returning the result as unsigned char makes this compatible with the intrinsics for equivalent
- // CPU instructions; forcing the intrinsics to bool instead would cause potential inefficiencies.
- template<typename word_t>
- unsigned char bit (word_t const *p, uintxx_t index)
- {
- enum { BITS_PER_WORD = sizeof(word_t) * CHAR_BIT };
- return (unsigned char)((p[index / BITS_PER_WORD] >> (index % BITS_PER_WORD)) & 1u);
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // As far as prime counts and checksums are concerned, bit 0 is often treated as a stand-in for the
- // non-representable number 2 (which, being even, is the odd man out in such a bitmap). This makes
- // some things simpler; for example, it makes the number of primes in a bitmap equal to the number
- // of 0 bits.
- //
- // Bit 0 needs special consideration in any case since 1 it is neither prime nor composite.
- //
- // Prime counts, sums and products (primorials) are often available from sources like oeis.org.
- // They are independent of position and order, which makes it easy to combine the results of
- // separately processed blocks. The checksum is order-dependent - intendedly so - and it further
- // enhances the error detection capability of the digest.
- template<typename word_t>
- struct digest_pod_t
- {
- word_t count;
- word_t sum;
- word_t product;
- word_t checksum;
- bool operator != (digest_pod_t const &other) const
- {
- return !(*this == other);
- }
- bool operator == (digest_pod_t const &other) const
- {
- return checksum == other.checksum
- && product == other.product
- && sum == other.sum
- && count == other.count;
- }
- void add_prime (word_t n)
- {
- count += 1;
- sum += n;
- product *= n;
- checksum += n * count;
- }
- template<typename const_forward_iterator_t>
- void add_primes (const_forward_iterator_t begin, const_forward_iterator_t end)
- {
- for (const_forward_iterator_t p = begin; p != end; ++p)
- {
- add_prime(*p);
- }
- }
- void add_odd_composites_bitmap (
- void const *bitmap,
- uintxx_t min_bit,
- uintxx_t end_bit, // first bit past the range (max_bit + 1)
- word_t bit_offset = 0 ) // virtual offset of bit 0 of the bitmap (first number >> 1)
- {
- unsigned char const *odd_composites = static_cast<unsigned char const *>(bitmap);
- if (min_bit < end_bit)
- {
- if (min_bit == 0 && bit_offset == 0)
- {
- if (bit(odd_composites, 0) == 0) add_prime(2);
- ++min_bit;
- }
- word_t base = (bit_offset << 1) + 1;
- for (uintxx_t k = min_bit; k < end_bit; ++k)
- {
- if (bit(odd_composites, k)) continue;
- add_prime(word_t(base + (k << 1)));
- }
- }
- }
- };
- //-------------------------------------------------------------------------------------------------
- // mixed-size comparison
- bool operator == (digest_pod_t<uint32_t> const &d32, digest_pod_t<uint64_t> const &d64)
- {
- return d32.checksum == uint32_t(d64.checksum)
- && d32.product == uint32_t(d64.product)
- && d32.sum == uint32_t(d64.sum)
- && d32.count == uint32_t(d64.count);
- }
- bool operator == (digest_pod_t<uint64_t> const &d64, digest_pod_t<uint32_t> const &d32)
- {
- return d32 == d64;
- }
- bool operator != (digest_pod_t<uint32_t> const &d32, digest_pod_t<uint64_t> const &d64)
- {
- return !(d32 == d64);
- }
- bool operator != (digest_pod_t<uint64_t> const &d64, digest_pod_t<uint32_t> const &d32)
- {
- return !(d64 == d32);
- }
- //-------------------------------------------------------------------------------------------------
- // convenience
- template<typename value_t>
- void print (
- char const *prefix,
- digest_pod_t<value_t> const &digest,
- unsigned count_digits = 9 )
- {
- enum { H = 2 * sizeof(value_t) };
- std::printf("%s{ %*llu, %0*llX, %0*llX, %0*llX }",
- prefix ? prefix : "",
- count_digits, uint64_t(digest.count),
- H, uint64_t(digest.sum),
- H, uint64_t(digest.product),
- H, uint64_t(digest.checksum) );
- }
- template<typename word_t>
- struct digest_t: digest_pod_t<word_t>
- {
- digest_t ()
- {
- this->count = this->sum = this->checksum = 0;
- this->product = 1;
- }
- digest_t (word_t c, word_t s, word_t p, word_t x)
- {
- this->count = c; this->sum = s; this->product = p; this->checksum = x;
- }
- };
- typedef digest_t<uint32_t> digest32_t;
- typedef digest_t<uint64_t> digest64_t;
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // all ranges have been verified against the primesieve.org console executable, except for those
- // which exceed its cap (18446744030759878665 == UINT64_MAX - 10 * UINT32_MAX).
- struct test_range_t
- {
- uint64_t lo, hi;
- digest_pod_t<uint64_t> digest;
- };
- #define _( lo, hi, first_prime, last_prime, cnt, sum, prd, chk ) \
- { lo ## u, hi ## u, { cnt ## u, 0x ## sum ## u, 0x ## prd ## u, 0x ## chk ## u } }
- test_range_t const Ranges_1M[] =
- {
- _( 1, 10000000 /* 10000000 */, 2, 9999991 /* 9999990 */, 664579, 000002E9D50C6334, E284CDD9939E68AA, 13F0C0CBB8773897 ),
- _( 81673073, 100000000 /* 18326928 */, 81673073, 99999989 /* 18326917 */, 1000000, 00005299CB5A5478, 7236ED226E8BA3D5, 8B636C3E423B6BAA ),
- _( 979292933, 1000000000 /* 20707068 */, 979292933, 999999937 /* 20707005 */, 1000000, 000384138D549EA0, 879FC41968FAD3E9, EAFCC52696740814 ),
- _( 9976982339, 10000000000 /* 23017662 */, 9976982339, 9999999967 /* 23017629 */, 1000000, 00237C7AA33A931A, FEEAC78D65A10667, D7C1D531E4400712 ),
- _( 99974683489, 100000000000 /* 25316512 */, 99974683489, 99999999977 /* 25316489 */, 1000000, 016339F4D1418D36, A1C1A63E3E302FEF, 478434FCE8AB133E ),
- _( 999972400027, 1000000000000 /* 27599974 */, 999972400027, 999999999989 /* 27599963 */, 1000000, 0DE0AA269BDFD8BA, CEE122E76B72227F, D50470D359BE4DFE ),
- _( 9999970054519, 10000000000000 /* 29945482 */, 9999970054519, 9999999999971 /* 29945453 */, 1000000, 8AC715685B09F258, 8593DE5292613F85, 8B32A42E25B99C64 ),
- _( 99999967808393, 100000000000000 /* 32191608 */, 99999967808393, 99999999999973 /* 32191581 */, 1000000, 6BC74F88FD0C5A6C, 1AB5F68D9123D491, D9CDC114AA6B0AAE ),
- _( 999999965485057, 1000000000000000 /* 34514944 */, 999999965485057, 999999999999989 /* 34514933 */, 1000000, 35C99E13AAF0731E, FC1D792962ADB95B, 1AF84035F20F6A04 ),
- _( 9999999963142769, 10000000000000000 /* 36857232 */, 9999999963142769, 9999999999999937 /* 36857169 */, 1000000, 19E0B8F8BCF0E2DE, 98216464A377198F, D6CB044A5230ADB8 ),
- _( 99999999960819169, 100000000000000000 /* 39180832 */, 99999999960819169, 99999999999999997 /* 39180829 */, 1000000, 02C7CF7BB7826270, 3B6966E20059EEA9, 5D85F14A7AF08E06 ),
- _( 999999999958516801, 1000000000000000000 /* 41483200 */, 999999999958516801, 999999999999999989 /* 41483189 */, 1000000, 1BCEBA0D5ECD2FEE, 5946DFC574AB3CF3, D118C1B8D724181C ),
- _( 9999999999956286497, 10000000000000000000 /* 43713504 */, 9999999999956286497, 9999999999999999961 /* 43713465 */, 1000000, 1613ED6695FB8F08, C1237D83F03C887D, 862E500F9255057E ),
- _( 18446744030715545269, 18446744030759878665 /* 44333397 */, 18446744030715545269, 18446744030759878627 /* 44333359 */, 1000000, FF675557CDDE0298, 7EA5838E55BEEEC5, 71F3BDD5137CBDEC ),
- _( 18446744073665204447, 18446744073709551615 /* 44347169 */, 18446744073665204447, 18446744073709551557 /* 44347111 */, 1000000, FFFFEBD3966C841C, 5E37B0500AB5B8BD, 995B0CEF6CA1E4A8 )
- };
- test_range_t const Ranges_10M[] =
- {
- _( 1, 100000000 /* 100000000 */, 2, 99999989 /* 99999988 */, 5761455, 0000FDF0985FB44C, 6D921F88B2D434D2, C0B16F72901E85FF ),
- _( 793877173, 1000000000 /* 206122828 */, 793877173, 999999937 /* 206122765 */, 10000000, 001FDBE1C333ECF8, 651A0A9574621079, C45243BCC5A786A2 ),
- _( 9769857367, 10000000000 /* 230142634 */, 9769857367, 9999999967 /* 230142601 */, 10000000, 015F2EBED9399456, 9E5053E10173B0AB, 13AC8B7227297280 ),
- _( 99746761237, 100000000000 /* 253238764 */, 99746761237, 99999999977 /* 253238741 */, 10000000, 0DDC371E71307554, AABB2057BB24D2C9, C6B49F02CC79C6F4 ),
- _( 999723733787, 1000000000000 /* 276266214 */, 999723733787, 999999999989 /* 276266203 */, 10000000, 8AC23ACFC174A930, 66BAD7F180005B21, 23284B3010659CD0 ),
- _( 9999700629011, 10000000000000 /* 299370990 */, 9999700629011, 9999999999971 /* 299370961 */, 10000000, 6BC20C8E38DE8F36, ED985713434EEDFB, 7B1FD08D2CF8ECD4 ),
- _( 99999677617663, 100000000000000 /* 322382338 */, 99999677617663, 99999999999973 /* 322382311 */, 10000000, 35C3F3CFA654F69A, 72861CA76E6E313F, F8CCDA86C924E3C6 ),
- _( 999999654617569, 1000000000000000 /* 345382432 */, 999999654617569, 999999999999989 /* 345382421 */, 10000000, 19DAA726AC5A2024, 2E27E7F123B0F7B1, 38455B855C8EECAE ),
- _( 9999999631636541, 10000000000000000 /* 368363460 */, 9999999631636541, 9999999999999937 /* 368363397 */, 10000000, 02C1563C4B6432C4, C36AB4E6E6B37F65, D29BB9FED3D6DA14 ),
- _( 99999999608465399, 100000000000000000 /* 391534602 */, 99999999608465399, 99999999999999997 /* 391534599 */, 10000000, 1BC7D81E877E2454, 292A3536FD400481, 9395B877E4E6EEC2 ),
- _( 999999999585415333, 1000000000000000000 /* 414584668 */, 999999999585415333, 999999999999999989 /* 414584657 */, 10000000, 160CA3C90CF00EF0, 7E1A4A66DE1CA111, ADB5D2CEF74C6B04 ),
- _( 9999999999562573471, 10000000000000000000 /* 437426530 */, 9999999999562573471, 9999999999999999961 /* 437426491 */, 10000000, DCC0478D1CC9FA52, 2F64319F30800833, D59808CFCB4ADAEA ),
- _( 18446744030316425227, 18446744030759878665 /* 443453439 */, 18446744030316425227, 18446744030759878627 /* 443453401 */, 10000000, FA023EF2BBA7CDA2, 4C5D19CF0CE2099F, 250841775ECFADF0 ),
- _( 18446744073265777349, 18446744073709551615 /* 443774267 */, 18446744073265777349, 18446744073709551557 /* 443774209 */, 10000000, FFF81E195834C5F0, 4EA8082765E419D5, 0FEE0BBB15025848 )
- };
- #undef _
- digest_pod_t<uint64_t> Digest_4G =
- {
- 203280221, 0x05E8362E0C903F86u, 0x9B20CCAA5B253F12u, 0xE364FF90C46F979Cu
- };
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // Computing the maximum factor - i.e. the integer square root - via uint32_t(sqrt(double(n))) is
- // not safe, for various reasons. One of them is that the result for UINT64_MAX would be 0 instead
- // of UINT32_MAX. See http://codereview.stackexchange.com/questions/69069
- //
- // Simpler versions work well enough with VC++ and gcc but the version shown here is the only one
- // that is guaranteed to return the correct result regardless of rounding mode, fp strictness and
- // compiler optimisation settings. Since the function is not time-critical I did not clean it up,
- // to show the strange antisymmetry. The absolute minimum necessary here is checking whether the
- // float result is greater than UINT32_MAX before casting it to integral type; in that case the
- // worst that can happen is that the result is one or two above the perfect result, leading to at
- // most one unnecessary iteration of the outer sieve loop. That can be avoided in almost all cases
- // by decrementing the result once if its square is greater than n; in a tiny fraction of cases a
- // decrement of two would be necessary, but that is extremely rare, less than one in a billion.
- template<typename word_t>
- word_t square (word_t n)
- {
- return word_t(n * n);
- }
- uintxx_t max_factor32 (uint64_t n)
- {
- uint64_t r = uint64_t(std::sqrt(double(n)));
- while (r < UINT32_MAX && square(r) < n)
- ++r;
- while (r > UINT32_MAX || square(r) > n)
- --r;
- return uintxx_t(r);
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- unsigned char odd_composites32[UINT32_MAX / (2 * CHAR_BIT) + 1]; // the small factor sieve
- uintxx_t sieved_bits = 0; // how far it's been initialised
- void extend_factor_sieve_to_cover (uintxx_t max_factor_bit); // bit, not number!
- void sieve32 (unsigned char *target_segment, uint64_t bit_offset, uintxx_t bit_count)
- {
- assert( bit_count > 0 && bit_count <= UINT32_MAX / 2 + 1 );
- uintxx_t max_bit = bit_count - 1;
- uint64_t max_num = 2 * (bit_offset + max_bit) + 1;
- uintxx_t max_factor_bit = (max_factor32(max_num) - 1) / 2;
- if (target_segment != odd_composites32)
- {
- extend_factor_sieve_to_cover(max_factor_bit);
- }
- std::memset(target_segment, 0, std::size_t((max_bit + CHAR_BIT) / CHAR_BIT));
- for (uintxx_t i = 3u >> 1; i <= max_factor_bit; ++i)
- {
- if (bit(odd_composites32, i)) continue;
- uintxx_t n = (i << 1) + 1; // the actual prime represented by bit i (< 2^32)
- uintxx_t stride = n; // == (n * 2) / 2
- uint64_t start = (uint64_t(n) * n) >> 1;
- uintxx_t k;
- if (start >= bit_offset)
- {
- k = uintxx_t(start - bit_offset);
- }
- else // start < offset
- {
- k = stride - (bit_offset - start - 1) % stride - 1;
- }
- while (k <= max_bit)
- {
- set_bit(target_segment, k);
- if ((k += stride) < stride) // k can wrap since strides go up to almost 2^32
- {
- break;
- }
- }
- }
- }
- //-------------------------------------------------------------------------------------------------
- // Using sieve32() for sieving the small factors as well here; that takes about three times as long
- // as a specialised small sieve with all the bells and whistles but it is also a couple hundred
- // lines of code shorter. See zrbj_sx_sieve32_v4.cpp for the full Monty. Sieving in small segments
- // is a necessity since doing it in one go would slow things down by an order of magnitude.
- void extend_factor_sieve_to_cover (uintxx_t max_factor_bit)
- {
- uintxx_t const SIEVE_BITS = sizeof(odd_composites32) * CHAR_BIT;
- assert( max_factor_bit < SIEVE_BITS );
- if (max_factor_bit >= sieved_bits)
- {
- assert( sieved_bits % CHAR_BIT == 0 );
- uintxx_t bits_to_sieve = max_factor_bit - sieved_bits + 1;
- uintxx_t segment_size = 1u << 18; // == 32K * CHAR_BIT == L1 cache size
- uintxx_t partial_bits = bits_to_sieve % segment_size;
- uintxx_t segments = bits_to_sieve / segment_size + (partial_bits != 0);
- if ((SIEVE_BITS - sieved_bits) % segment_size == 0)
- {
- partial_bits = 0; // always sieve full segments as long as there is enough space
- }
- for ( ; segments--; )
- {
- uintxx_t bits_this_round = segments == 0 && partial_bits ? partial_bits : segment_size;
- sieve32(odd_composites32 + sieved_bits / CHAR_BIT, sieved_bits, bits_this_round);
- sieved_bits += bits_this_round;
- }
- }
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // utility functions for the tests
- namespace util
- {
- uint32_t random_seed32 = 42; // must be nonzero
- uint32_t random_uint32 () // xorshift32*
- {
- uint32_t x = random_seed32;
- x ^= x << 13; x ^= x >> 17; x ^= x << 5;
- random_seed32 = x;
- return x * 0xEA4E58EDu;
- }
- uint32_t random_uint32 (uint32_t modulus)
- {
- if (modulus > 1)
- {
- for ( ; ; )
- {
- uint32_t raw_bits = random_uint32();
- uint32_t result = raw_bits % modulus;
- uint32_t check = uint32_t(raw_bits - result + modulus);
- if (check >= raw_bits || check == 0)
- {
- return result;
- }
- }
- }
- return 0;
- }
- double ms (std::clock_t clock_delta)
- {
- return 1000 * double(clock_delta) / CLOCKS_PER_SEC;
- }
- void terminate_with_failure (char const *message = 0)
- {
- std::printf("%s", message ? message : "\n\n*** FAIL ***\n");
- std::exit(3);
- }
- } // namepace util
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- // various tests...
- void time_factor_sieve ()
- {
- std::printf("\ninitialising the small factor sieve... ");
- sieved_bits = 0;
- std::clock_t t0 = std::clock();
- extend_factor_sieve_to_cover(UINT32_MAX / 2 - 1);
- double seconds = double(std::clock() - t0) / CLOCKS_PER_SEC;
- std::printf("%.3f s (%.1f M/s)", seconds, sieved_bits * std::ldexp(2.0, -20) / seconds);
- std::printf("\nverifying... ");
- digest64_t digest;
- t0 = std::clock();
- digest.add_odd_composites_bitmap(odd_composites32, 0, sieved_bits - 1, 0);
- seconds = double(std::clock() - t0) / CLOCKS_PER_SEC;
- std::printf("%.3f s (%.1f M/s)", seconds, sieved_bits * std::ldexp(2.0, -20) / seconds);
- if (digest != Digest_4G)
- {
- std::printf("\n\ndigest != Digest_4G\n");
- print("\ntst ", digest);
- print("\nref ", Digest_4G);
- util::terminate_with_failure();
- }
- }
- //-------------------------------------------------------------------------------------------------
- void test_factor_sieve ()
- {
- std::printf("\ntesting incremental updates of the small factor sieve...");
- uint32_t ref_bytes = uint32_t(sieved_bits) / CHAR_BIT;
- std::vector<unsigned char> ref(odd_composites32, odd_composites32 + ref_bytes);
- uint32_t compared = 0;
- uint32_t limit = sizeof(odd_composites32) * CHAR_BIT;
- sieved_bits = 0;
- while (sieved_bits < limit)
- {
- uint32_t growth_limit = 1u << 23;
- if (util::random_uint32(100) < 33) growth_limit <<= 4;
- if (util::random_uint32(100) < 33) growth_limit >>= 4;
- growth_limit = std::min(growth_limit, limit - uint32_t(sieved_bits));
- uint32_t extend_by = util::random_uint32(growth_limit);
- std::printf("\n %8X + %7X... ", uint32_t(sieved_bits), extend_by);
- std::clock_t t0 = std::clock();
- extend_factor_sieve_to_cover(sieved_bits + extend_by);
- std::printf("%6.1f ms", util::ms(std::clock() - t0));
- uint32_t full_bytes = std::min(ref_bytes, uint32_t(sieved_bits) / CHAR_BIT);
- if (uint32_t compare_now = full_bytes - compared)
- {
- if (std::memcmp(odd_composites32, &ref[0], compare_now))
- {
- util::terminate_with_failure();
- }
- compared += compare_now;
- }
- }
- std::printf("\n-> sieved_bits = %8X", uint32_t(sieved_bits));
- if (compared < sizeof(odd_composites32))
- {
- std::printf("\nNOTE: only %u of %u sieve bytes have been compared",
- compared, uint32_t(sizeof(odd_composites32)) );
- }
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- void sieve_and_print_range (
- uint64_t start, // number, not bit index
- uint32_t count,
- digest_pod_t<uint64_t> const *ref = 0 )
- {
- unsigned odd_start = unsigned(start & 1);
- if (count < 2 - odd_start) return;
- std::printf("\n%20llu .. %20llu", start, start + count - 1);
- uint32_t max_bit = (count - (2 - odd_start)) >> 1;
- std::vector<unsigned char> buffer((max_bit + CHAR_BIT) / CHAR_BIT);
- unsigned char *bitmap = &buffer[0];
- std::clock_t t0 = std::clock();
- if (count > 1 - odd_start)
- {
- sieve32(bitmap, start >> 1, max_bit + 1);
- }
- double ms = util::ms(std::clock() - t0);
- std::printf(" %6.0f ms %6.1f M/s", ms, count / ms * 1000 * std::ldexp(2, -20));
- digest64_t digest;
- if (count > 1 - odd_start && start + count - 1 >= 2)
- {
- if (start == 2) digest.add_prime(2); // digest does not add 2 by itself in this case
- digest.add_odd_composites_bitmap(bitmap, 0, max_bit + 1, start >> 1);
- }
- std::printf(" %9u ", uint32_t(digest.count));
- if (ref != 0) // bloody gcc complains if inner conditional is not wrapped in braces
- {
- if (digest == *ref)
- std::printf(" OK");
- else
- util::terminate_with_failure();
- }
- }
- //-------------------------------------------------------------------------------------------------
- void sieve_and_print_range (test_range_t const &range)
- {
- assert( range.lo <= range.hi );
- assert( range.hi - range.lo + 1 <= UINT32_MAX );
- sieve_and_print_range(range.lo, uint32_t(range.hi - range.lo + 1), &range.digest);
- }
- //-------------------------------------------------------------------------------------------------
- void sieve_and_print_ranges (test_range_t const *ranges, std::size_t count)
- {
- for (test_range_t const *p = ranges, *e = p + count; p < e; ++p)
- {
- sieve_and_print_range(*p);
- }
- }
- //-------------------------------------------------------------------------------------------------
- template<std::size_t N>
- void sieve_and_print_ranges (test_range_t const (&ranges)[N])
- {
- sieve_and_print_ranges(ranges, N);
- }
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- int main ()
- {
- std::printf("\nsizeof(uintxx_t) == %u", unsigned(sizeof(uintxx_t)));
- std::printf("\nsizeof(void*) == %u\n", unsigned(sizeof(void*)));
- time_factor_sieve();
- test_factor_sieve();
- std::printf("\nsieving some ranges (1M)...");
- sieve_and_print_ranges(Ranges_1M);
- std::printf("\nsieving some bigger ranges (10M)...");
- sieve_and_print_ranges(Ranges_10M);
- std::printf("\nsieving with maximum window size...");
- sieve_and_print_range(0, UINT32_MAX, &Digest_4G);
- sieve_and_print_range(1, UINT32_MAX, &Digest_4G);
- sieve_and_print_range(2, UINT32_MAX, &Digest_4G);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement