Advertisement
DarthGizka

Sieve of Eratosthenes - segmented for sieving up to 2^64-1

Nov 10th, 2014
942
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 25.71 KB | None | 0 0
  1. // zrbj_sx_sieve64_v1.cpp - Sieve of Eratosthenes reference implementation
  2. // 2014-11-09
  3. //
  4. // This implements a segmented odds-only Sieve of Eratosthenes for sieving up to 2^64-1,
  5. // for a maximum window size of 32 bits (corresponding to bitmaps with 2^31 bits). All
  6. // index-related (32-bit) variables use the uintxx_t typedef so that potential bugs
  7. // related to index math can be experimentally excluded by changing that typedef to
  8. // uint64_t. The digests for the test ranges and the actual primes have been verified
  9. // against the console version of the primesieve.org program, except for those that are
  10. // beyond its limits.
  11. //
  12. // The actual sieve is implemented in sieve32(), the rest is just overhead for testing.
  13.  
  14. #undef NDEBUG
  15.  
  16. //#include "zrbj/hdr_pre.h"
  17. #include <cassert>
  18. #include <climits>
  19. #include <cmath>
  20. #include <cstdint>
  21. #include <cstdio>
  22. #include <cstdlib>
  23. #include <cstring>
  24. #include <ctime>
  25. #include <algorithm>
  26. #include <vector>
  27. //#include "zrbj/hdr_post.h"
  28.  
  29. // Dealing only with odd numbers and packed (odd-only) bitmaps makes a lot of things easier.
  30. // The same goes for specifying all ranges - like sieve windows - via bit indices and not via
  31. // the corresponding numbers.
  32. //
  33. // For one thing, the small factors bitmap needs only 31 bits, which buys a bit of head room
  34. // for bitmap index variables even if those are uint32_t. It also reduces drastically the
  35. // number of special cases that need to be handled, pretty much everywhere, and this makes
  36. // the code much more robust. Last but not least, this makes it possible to specify all ranges
  37. // as half-open or base + count, without dirty tricks like decreeing that 0 be treated as 2^32.
  38. // Closed ranges (i.e. 'max_bit' instead of 'end_bit') can be made to work but they require
  39. // more care and cause a lot of +1/-1 noise.
  40. //
  41. // The code here is designed for an index type with 32 bits. When bit indexes can go beyond that,
  42. // the corresponding number type (uint64_t) is used instead.
  43. //
  44. // Changing this typedef to uint64_t eliminates all potential bugs related to 32-bit index math
  45. // but it makes the code slower (slightly so if compiled as 64-bit code, signficantly for 32-bit
  46. // code). Compiling as 32-bit code with gcc, the difference is 8.9 s instead of 4.7 s for the
  47. // sieve init, 9.7 s instead of 5 s for sieving a million primes just below 2^64.
  48.  
  49. typedef uint_fast32_t uintxx_t;
  50.  
  51. ///////////////////////////////////////////////////////////////////////////////////////////////////
  52. // Bit manipulation functions are sometimes faster using the machine word size, sometimes using
  53. // unsigned char. Using templated functions makes it possible to mix and match. It also makes
  54. // it easy to experiment with intrinsics, by the simple expedient of defining specialisations
  55. // for certain types and implementing them via intrinsics.
  56.  
  57. template<typename word_t>
  58. void set_bit (word_t *p, uintxx_t index)
  59. {
  60.    enum {  BITS_PER_WORD = sizeof(word_t) * CHAR_BIT  };
  61.  
  62.    // we can trust the compiler to use masking and shifting instead of division; we cannot do that
  63.    // ourselves without having the log2 which cannot easily be computed as a constexpr
  64.    
  65.    p[index / BITS_PER_WORD] |= word_t(1) << (index % BITS_PER_WORD);
  66. }
  67.  
  68. //-------------------------------------------------------------------------------------------------
  69. // Returning the result as unsigned char makes this compatible with the intrinsics for equivalent
  70. // CPU instructions; forcing the intrinsics to bool instead would cause potential inefficiencies.
  71.  
  72. template<typename word_t>
  73. unsigned char bit (word_t const *p, uintxx_t index)
  74. {
  75.    enum {  BITS_PER_WORD = sizeof(word_t) * CHAR_BIT  };
  76.  
  77.    return (unsigned char)((p[index / BITS_PER_WORD] >> (index % BITS_PER_WORD)) & 1u);
  78. }
  79.  
  80. ///////////////////////////////////////////////////////////////////////////////////////////////////
  81. // As far as prime counts and checksums are concerned, bit 0 is often treated as a stand-in for the
  82. // non-representable number 2 (which, being even, is the odd man out in such a bitmap). This makes
  83. // some things simpler; for example, it makes the number of primes in a bitmap equal to the number
  84. // of 0 bits.
  85. //
  86. // Bit 0 needs special consideration in any case since 1 it is neither prime nor composite.
  87. //
  88. // Prime counts, sums and products (primorials) are often available from sources like oeis.org.
  89. // They are independent of position and order, which makes it easy to combine the results of
  90. // separately processed blocks. The checksum is order-dependent - intendedly so - and it further
  91. // enhances the error detection capability of the digest.
  92.  
  93. template<typename word_t>
  94. struct digest_pod_t
  95. {
  96.    word_t count;
  97.    word_t sum;
  98.    word_t product;
  99.    word_t checksum;
  100.  
  101.    bool operator != (digest_pod_t const &other) const
  102.    {  
  103.       return !(*this == other);  
  104.    }
  105.  
  106.    bool operator == (digest_pod_t const &other) const
  107.    {  
  108.       return checksum == other.checksum
  109.           && product  == other.product
  110.           && sum      == other.sum
  111.           && count    == other.count;
  112.    }
  113.  
  114.    void add_prime (word_t n)
  115.    {
  116.       count    += 1;
  117.       sum      += n;
  118.       product  *= n;
  119.       checksum += n * count;
  120.    }
  121.  
  122.    template<typename const_forward_iterator_t>
  123.    void add_primes (const_forward_iterator_t begin, const_forward_iterator_t end)
  124.    {
  125.       for (const_forward_iterator_t p = begin; p != end; ++p)
  126.       {
  127.          add_prime(*p);
  128.       }
  129.    }
  130.  
  131.    void add_odd_composites_bitmap (
  132.       void const *bitmap,
  133.       uintxx_t    min_bit,
  134.       uintxx_t    end_bit,           // first bit past the range (max_bit + 1)
  135.       word_t      bit_offset = 0  )  // virtual offset of bit 0 of the bitmap (first number >> 1)
  136.    {
  137.       unsigned char const *odd_composites = static_cast<unsigned char const *>(bitmap);
  138.  
  139.       if (min_bit < end_bit)
  140.       {
  141.          if (min_bit == 0 && bit_offset == 0)
  142.          {
  143.             if (bit(odd_composites, 0) == 0)  add_prime(2);
  144.    
  145.             ++min_bit;
  146.          }
  147.  
  148.          word_t base = (bit_offset << 1) + 1;
  149.  
  150.          for (uintxx_t k = min_bit; k < end_bit; ++k)
  151.          {
  152.             if (bit(odd_composites, k))  continue;
  153.  
  154.             add_prime(word_t(base + (k << 1)));
  155.          }
  156.       }
  157.    }
  158. };
  159.  
  160. //-------------------------------------------------------------------------------------------------
  161. // mixed-size comparison
  162.  
  163. bool operator == (digest_pod_t<uint32_t> const &d32, digest_pod_t<uint64_t> const &d64)
  164. {
  165.    return d32.checksum == uint32_t(d64.checksum)
  166.        && d32.product  == uint32_t(d64.product)
  167.        && d32.sum      == uint32_t(d64.sum)
  168.        && d32.count    == uint32_t(d64.count);
  169. }
  170.  
  171. bool operator == (digest_pod_t<uint64_t> const &d64, digest_pod_t<uint32_t> const &d32)
  172. {
  173.    return d32 == d64;
  174. }
  175.  
  176. bool operator != (digest_pod_t<uint32_t> const &d32, digest_pod_t<uint64_t> const &d64)
  177. {
  178.    return !(d32 == d64);
  179. }
  180.  
  181. bool operator != (digest_pod_t<uint64_t> const &d64, digest_pod_t<uint32_t> const &d32)
  182. {
  183.    return !(d64 == d32);
  184. }
  185.  
  186. //-------------------------------------------------------------------------------------------------
  187. // convenience
  188.  
  189. template<typename value_t>
  190. void print (
  191.    char const *prefix,
  192.    digest_pod_t<value_t> const &digest,
  193.    unsigned count_digits = 9  )
  194. {
  195.    enum {  H = 2 * sizeof(value_t)  };
  196.    
  197.    std::printf("%s{ %*llu, %0*llX, %0*llX, %0*llX }",
  198.       prefix ? prefix : "",
  199.       count_digits, uint64_t(digest.count),
  200.       H, uint64_t(digest.sum),
  201.       H, uint64_t(digest.product),
  202.       H, uint64_t(digest.checksum)  );
  203. }
  204.  
  205. template<typename word_t>
  206. struct digest_t: digest_pod_t<word_t>
  207. {
  208.    digest_t ()
  209.    {
  210.       this->count = this->sum = this->checksum = 0;
  211.       this->product = 1;
  212.    }
  213.    
  214.    digest_t (word_t c, word_t s, word_t p, word_t x)
  215.    {
  216.       this->count = c;  this->sum = s;  this->product = p;  this->checksum = x;
  217.    }
  218. };
  219.  
  220. typedef digest_t<uint32_t> digest32_t;
  221. typedef digest_t<uint64_t> digest64_t;
  222.  
  223. ///////////////////////////////////////////////////////////////////////////////////////////////////
  224. // all ranges have been verified against the primesieve.org console executable, except for those
  225. // which exceed its cap (18446744030759878665 == UINT64_MAX - 10 * UINT32_MAX).
  226.  
  227. struct test_range_t
  228. {
  229.    uint64_t lo, hi;
  230.    digest_pod_t<uint64_t> digest;
  231. };
  232.  
  233. #define _( lo, hi, first_prime, last_prime, cnt, sum, prd, chk )  \
  234.    {  lo ## u, hi ## u, {  cnt ## u, 0x ## sum ## u, 0x ## prd ## u, 0x ## chk ## u  }  }
  235.  
  236. test_range_t const Ranges_1M[] =
  237. {
  238.    _(                    1,             10000000 /*   10000000 */,                    2,              9999991 /*   9999990 */,    664579, 000002E9D50C6334, E284CDD9939E68AA, 13F0C0CBB8773897 ),
  239.    _(             81673073,            100000000 /*   18326928 */,             81673073,             99999989 /*  18326917 */,   1000000, 00005299CB5A5478, 7236ED226E8BA3D5, 8B636C3E423B6BAA ),
  240.    _(            979292933,           1000000000 /*   20707068 */,            979292933,            999999937 /*  20707005 */,   1000000, 000384138D549EA0, 879FC41968FAD3E9, EAFCC52696740814 ),
  241.    _(           9976982339,          10000000000 /*   23017662 */,           9976982339,           9999999967 /*  23017629 */,   1000000, 00237C7AA33A931A, FEEAC78D65A10667, D7C1D531E4400712 ),
  242.    _(          99974683489,         100000000000 /*   25316512 */,          99974683489,          99999999977 /*  25316489 */,   1000000, 016339F4D1418D36, A1C1A63E3E302FEF, 478434FCE8AB133E ),
  243.    _(         999972400027,        1000000000000 /*   27599974 */,         999972400027,         999999999989 /*  27599963 */,   1000000, 0DE0AA269BDFD8BA, CEE122E76B72227F, D50470D359BE4DFE ),
  244.    _(        9999970054519,       10000000000000 /*   29945482 */,        9999970054519,        9999999999971 /*  29945453 */,   1000000, 8AC715685B09F258, 8593DE5292613F85, 8B32A42E25B99C64 ),
  245.    _(       99999967808393,      100000000000000 /*   32191608 */,       99999967808393,       99999999999973 /*  32191581 */,   1000000, 6BC74F88FD0C5A6C, 1AB5F68D9123D491, D9CDC114AA6B0AAE ),
  246.    _(      999999965485057,     1000000000000000 /*   34514944 */,      999999965485057,      999999999999989 /*  34514933 */,   1000000, 35C99E13AAF0731E, FC1D792962ADB95B, 1AF84035F20F6A04 ),
  247.    _(     9999999963142769,    10000000000000000 /*   36857232 */,     9999999963142769,     9999999999999937 /*  36857169 */,   1000000, 19E0B8F8BCF0E2DE, 98216464A377198F, D6CB044A5230ADB8 ),
  248.    _(    99999999960819169,   100000000000000000 /*   39180832 */,    99999999960819169,    99999999999999997 /*  39180829 */,   1000000, 02C7CF7BB7826270, 3B6966E20059EEA9, 5D85F14A7AF08E06 ),
  249.    _(   999999999958516801,  1000000000000000000 /*   41483200 */,   999999999958516801,   999999999999999989 /*  41483189 */,   1000000, 1BCEBA0D5ECD2FEE, 5946DFC574AB3CF3, D118C1B8D724181C ),
  250.    _(  9999999999956286497, 10000000000000000000 /*   43713504 */,  9999999999956286497,  9999999999999999961 /*  43713465 */,   1000000, 1613ED6695FB8F08, C1237D83F03C887D, 862E500F9255057E ),
  251.    _( 18446744030715545269, 18446744030759878665 /*   44333397 */, 18446744030715545269, 18446744030759878627 /*  44333359 */,   1000000, FF675557CDDE0298, 7EA5838E55BEEEC5, 71F3BDD5137CBDEC ),
  252.    _( 18446744073665204447, 18446744073709551615 /*   44347169 */, 18446744073665204447, 18446744073709551557 /*  44347111 */,   1000000, FFFFEBD3966C841C, 5E37B0500AB5B8BD, 995B0CEF6CA1E4A8 )
  253. };
  254.  
  255. test_range_t const Ranges_10M[] =
  256. {
  257.    _(                    1,            100000000 /*  100000000 */,                    2,             99999989 /*  99999988 */,   5761455, 0000FDF0985FB44C, 6D921F88B2D434D2, C0B16F72901E85FF ),
  258.    _(            793877173,           1000000000 /*  206122828 */,            793877173,            999999937 /* 206122765 */,  10000000, 001FDBE1C333ECF8, 651A0A9574621079, C45243BCC5A786A2 ),
  259.    _(           9769857367,          10000000000 /*  230142634 */,           9769857367,           9999999967 /* 230142601 */,  10000000, 015F2EBED9399456, 9E5053E10173B0AB, 13AC8B7227297280 ),
  260.    _(          99746761237,         100000000000 /*  253238764 */,          99746761237,          99999999977 /* 253238741 */,  10000000, 0DDC371E71307554, AABB2057BB24D2C9, C6B49F02CC79C6F4 ),
  261.    _(         999723733787,        1000000000000 /*  276266214 */,         999723733787,         999999999989 /* 276266203 */,  10000000, 8AC23ACFC174A930, 66BAD7F180005B21, 23284B3010659CD0 ),
  262.    _(        9999700629011,       10000000000000 /*  299370990 */,        9999700629011,        9999999999971 /* 299370961 */,  10000000, 6BC20C8E38DE8F36, ED985713434EEDFB, 7B1FD08D2CF8ECD4 ),
  263.    _(       99999677617663,      100000000000000 /*  322382338 */,       99999677617663,       99999999999973 /* 322382311 */,  10000000, 35C3F3CFA654F69A, 72861CA76E6E313F, F8CCDA86C924E3C6 ),
  264.    _(      999999654617569,     1000000000000000 /*  345382432 */,      999999654617569,      999999999999989 /* 345382421 */,  10000000, 19DAA726AC5A2024, 2E27E7F123B0F7B1, 38455B855C8EECAE ),
  265.    _(     9999999631636541,    10000000000000000 /*  368363460 */,     9999999631636541,     9999999999999937 /* 368363397 */,  10000000, 02C1563C4B6432C4, C36AB4E6E6B37F65, D29BB9FED3D6DA14 ),
  266.    _(    99999999608465399,   100000000000000000 /*  391534602 */,    99999999608465399,    99999999999999997 /* 391534599 */,  10000000, 1BC7D81E877E2454, 292A3536FD400481, 9395B877E4E6EEC2 ),
  267.    _(   999999999585415333,  1000000000000000000 /*  414584668 */,   999999999585415333,   999999999999999989 /* 414584657 */,  10000000, 160CA3C90CF00EF0, 7E1A4A66DE1CA111, ADB5D2CEF74C6B04 ),
  268.    _(  9999999999562573471, 10000000000000000000 /*  437426530 */,  9999999999562573471,  9999999999999999961 /* 437426491 */,  10000000, DCC0478D1CC9FA52, 2F64319F30800833, D59808CFCB4ADAEA ),
  269.    _( 18446744030316425227, 18446744030759878665 /*  443453439 */, 18446744030316425227, 18446744030759878627 /* 443453401 */,  10000000, FA023EF2BBA7CDA2, 4C5D19CF0CE2099F, 250841775ECFADF0 ),
  270.    _( 18446744073265777349, 18446744073709551615 /*  443774267 */, 18446744073265777349, 18446744073709551557 /* 443774209 */,  10000000, FFF81E195834C5F0, 4EA8082765E419D5, 0FEE0BBB15025848 )
  271. };
  272.  
  273. #undef _
  274.  
  275. digest_pod_t<uint64_t> Digest_4G =
  276. {  
  277.    203280221, 0x05E8362E0C903F86u, 0x9B20CCAA5B253F12u, 0xE364FF90C46F979Cu
  278. };
  279.  
  280. ///////////////////////////////////////////////////////////////////////////////////////////////////
  281. // Computing the maximum factor - i.e. the integer square root - via uint32_t(sqrt(double(n))) is
  282. // not safe, for various reasons. One of them is that the result for UINT64_MAX would be 0 instead
  283. // of UINT32_MAX. See http://codereview.stackexchange.com/questions/69069
  284. //
  285. // Simpler versions work well enough with VC++ and gcc but the version shown here is the only one
  286. // that is guaranteed to return the correct result regardless of rounding mode, fp strictness and
  287. // compiler optimisation settings. Since the function is not time-critical I did not clean it up,
  288. // to show the strange antisymmetry. The absolute minimum necessary here is checking whether the
  289. // float result is greater than UINT32_MAX before casting it to integral type; in that case the
  290. // worst that can happen is that the result is one or two above the perfect result, leading to at
  291. // most one unnecessary iteration of the outer sieve loop. That can be avoided in almost all cases
  292. // by decrementing the result once if its square is greater than n; in a tiny fraction of cases a
  293. // decrement of two would be necessary, but that is extremely rare, less than one in a billion.
  294.  
  295. template<typename word_t>
  296. word_t square (word_t n)  
  297. {  
  298.    return word_t(n * n);  
  299. }
  300.  
  301. uintxx_t max_factor32 (uint64_t n)
  302. {
  303.    uint64_t r = uint64_t(std::sqrt(double(n)));
  304.      
  305.    while (r < UINT32_MAX && square(r) < n)  
  306.       ++r;            
  307.    while (r > UINT32_MAX || square(r) > n)  
  308.       --r;
  309.      
  310.    return uintxx_t(r);
  311. }
  312.  
  313. ///////////////////////////////////////////////////////////////////////////////////////////////////
  314.  
  315. unsigned char odd_composites32[UINT32_MAX / (2 * CHAR_BIT) + 1];   // the small factor sieve
  316. uintxx_t sieved_bits = 0;                                          // how far it's been initialised
  317.  
  318. void extend_factor_sieve_to_cover (uintxx_t max_factor_bit);       // bit, not number!
  319.  
  320. void sieve32 (unsigned char *target_segment, uint64_t bit_offset, uintxx_t bit_count)
  321. {
  322.    assert( bit_count > 0 && bit_count <= UINT32_MAX / 2 + 1 );
  323.  
  324.    uintxx_t max_bit = bit_count - 1;
  325.    uint64_t max_num = 2 * (bit_offset + max_bit) + 1;
  326.    uintxx_t max_factor_bit = (max_factor32(max_num) - 1) / 2;
  327.  
  328.    if (target_segment != odd_composites32)
  329.    {
  330.       extend_factor_sieve_to_cover(max_factor_bit);
  331.    }
  332.  
  333.    std::memset(target_segment, 0, std::size_t((max_bit + CHAR_BIT) / CHAR_BIT));
  334.    
  335.    for (uintxx_t i = 3u >> 1; i <= max_factor_bit; ++i)
  336.    {
  337.       if (bit(odd_composites32, i))  continue;
  338.  
  339.       uintxx_t n = (i << 1) + 1;   // the actual prime represented by bit i (< 2^32)
  340.      
  341.       uintxx_t stride = n;         // == (n * 2) / 2
  342.       uint64_t start = (uint64_t(n) * n) >> 1;
  343.       uintxx_t k;
  344.  
  345.       if (start >= bit_offset)
  346.       {
  347.          k = uintxx_t(start - bit_offset);
  348.       }
  349.       else // start < offset
  350.       {
  351.          k = stride - (bit_offset - start - 1) % stride - 1;
  352.       }
  353.  
  354.       while (k <= max_bit)
  355.       {
  356.          set_bit(target_segment, k);
  357.          
  358.          if ((k += stride) < stride)  // k can wrap since strides go up to almost 2^32
  359.          {
  360.             break;
  361.          }
  362.       }
  363.    }
  364. }
  365.  
  366. //-------------------------------------------------------------------------------------------------
  367. // Using sieve32() for sieving the small factors as well here; that takes about three times as long
  368. // as a specialised small sieve with all the bells and whistles but it is also a couple hundred
  369. // lines of code shorter. See zrbj_sx_sieve32_v4.cpp for the full Monty. Sieving in small segments
  370. // is a necessity since doing it in one go would slow things down by an order of magnitude.
  371.  
  372. void extend_factor_sieve_to_cover (uintxx_t max_factor_bit)
  373. {
  374.    uintxx_t const SIEVE_BITS = sizeof(odd_composites32) * CHAR_BIT;
  375.  
  376.    assert( max_factor_bit < SIEVE_BITS );
  377.  
  378.    if (max_factor_bit >= sieved_bits)
  379.    {
  380.       assert( sieved_bits % CHAR_BIT == 0 );
  381.  
  382.       uintxx_t bits_to_sieve = max_factor_bit - sieved_bits + 1;
  383.       uintxx_t segment_size = 1u << 18;  // == 32K * CHAR_BIT == L1 cache size
  384.       uintxx_t partial_bits = bits_to_sieve % segment_size;
  385.       uintxx_t segments     = bits_to_sieve / segment_size + (partial_bits != 0);
  386.  
  387.       if ((SIEVE_BITS - sieved_bits) % segment_size == 0)
  388.       {
  389.          partial_bits = 0;   // always sieve full segments as long as there is enough space
  390.       }
  391.      
  392.       for ( ; segments--; )
  393.       {
  394.          uintxx_t bits_this_round = segments == 0 && partial_bits ? partial_bits : segment_size;
  395.  
  396.          sieve32(odd_composites32 + sieved_bits / CHAR_BIT, sieved_bits, bits_this_round);
  397.            
  398.          sieved_bits += bits_this_round;
  399.       }
  400.    }
  401. }
  402.  
  403. ///////////////////////////////////////////////////////////////////////////////////////////////////
  404. // utility functions for the tests
  405.  
  406. namespace util
  407. {
  408.    uint32_t random_seed32 = 42;  // must be nonzero
  409.  
  410.    uint32_t random_uint32 ()  // xorshift32*
  411.    {
  412.       uint32_t x = random_seed32;
  413.  
  414.       x ^= x << 13;  x ^= x >> 17;  x ^= x << 5;
  415.  
  416.       random_seed32 = x;
  417.  
  418.       return x * 0xEA4E58EDu;
  419.    }
  420.  
  421.    uint32_t random_uint32 (uint32_t modulus)
  422.    {
  423.       if (modulus > 1)
  424.       {
  425.          for ( ; ; )
  426.          {
  427.             uint32_t raw_bits = random_uint32();
  428.             uint32_t result = raw_bits % modulus;
  429.             uint32_t check = uint32_t(raw_bits - result + modulus);
  430.  
  431.             if (check >= raw_bits || check == 0)
  432.             {
  433.                return result;
  434.             }
  435.          }
  436.       }
  437.  
  438.       return 0;
  439.    }
  440.  
  441.    double ms (std::clock_t clock_delta)
  442.    {
  443.       return 1000 * double(clock_delta) / CLOCKS_PER_SEC;
  444.    }
  445.  
  446.    void terminate_with_failure (char const *message = 0)
  447.    {
  448.       std::printf("%s", message ? message : "\n\n*** FAIL ***\n");
  449.       std::exit(3);
  450.    }
  451.  
  452. } // namepace util
  453.  
  454. ///////////////////////////////////////////////////////////////////////////////////////////////////
  455. // various tests...
  456.  
  457. void time_factor_sieve ()
  458. {
  459.    std::printf("\ninitialising the small factor sieve... ");
  460.    
  461.    sieved_bits = 0;
  462.  
  463.    std::clock_t t0 = std::clock();
  464.    extend_factor_sieve_to_cover(UINT32_MAX / 2 - 1);
  465.    double seconds = double(std::clock() - t0) / CLOCKS_PER_SEC;
  466.  
  467.    std::printf("%.3f s (%.1f M/s)", seconds, sieved_bits * std::ldexp(2.0, -20) / seconds);
  468.  
  469.    std::printf("\nverifying... ");
  470.  
  471.    digest64_t digest;
  472.  
  473.    t0 = std::clock();
  474.    digest.add_odd_composites_bitmap(odd_composites32, 0, sieved_bits - 1, 0);
  475.    seconds = double(std::clock() - t0) / CLOCKS_PER_SEC;
  476.  
  477.    std::printf("%.3f s (%.1f M/s)", seconds, sieved_bits * std::ldexp(2.0, -20) / seconds);
  478.  
  479.    if (digest != Digest_4G)  
  480.    {
  481.       std::printf("\n\ndigest != Digest_4G\n");
  482.       print("\ntst ", digest);
  483.       print("\nref ", Digest_4G);
  484.      
  485.       util::terminate_with_failure();
  486.    }
  487. }
  488.  
  489. //-------------------------------------------------------------------------------------------------
  490.  
  491. void test_factor_sieve ()
  492. {
  493.    std::printf("\ntesting incremental updates of the small factor sieve...");
  494.  
  495.    uint32_t ref_bytes = uint32_t(sieved_bits) / CHAR_BIT;
  496.    std::vector<unsigned char> ref(odd_composites32, odd_composites32 + ref_bytes);
  497.    uint32_t compared = 0;
  498.    uint32_t limit = sizeof(odd_composites32) * CHAR_BIT;
  499.  
  500.    sieved_bits = 0;
  501.  
  502.    while (sieved_bits < limit)
  503.    {
  504.       uint32_t growth_limit = 1u << 23;
  505.  
  506.       if (util::random_uint32(100) < 33)  growth_limit <<= 4;
  507.       if (util::random_uint32(100) < 33)  growth_limit >>= 4;
  508.  
  509.       growth_limit = std::min(growth_limit, limit - uint32_t(sieved_bits));
  510.          
  511.       uint32_t extend_by = util::random_uint32(growth_limit);
  512.  
  513.       std::printf("\n   %8X + %7X... ", uint32_t(sieved_bits), extend_by);
  514.          
  515.       std::clock_t t0 = std::clock();
  516.      
  517.       extend_factor_sieve_to_cover(sieved_bits + extend_by);
  518.  
  519.       std::printf("%6.1f ms", util::ms(std::clock() - t0));
  520.  
  521.       uint32_t full_bytes = std::min(ref_bytes, uint32_t(sieved_bits) / CHAR_BIT);
  522.  
  523.       if (uint32_t compare_now = full_bytes - compared)
  524.       {
  525.          if (std::memcmp(odd_composites32, &ref[0], compare_now))
  526.          {
  527.             util::terminate_with_failure();
  528.          }
  529.  
  530.          compared += compare_now;
  531.       }
  532.    }
  533.  
  534.    std::printf("\n-> sieved_bits = %8X", uint32_t(sieved_bits));
  535.  
  536.    if (compared < sizeof(odd_composites32))
  537.    {
  538.       std::printf("\nNOTE: only %u of %u sieve bytes have been compared",
  539.          compared, uint32_t(sizeof(odd_composites32))  );
  540.    }
  541. }
  542.  
  543. ///////////////////////////////////////////////////////////////////////////////////////////////////
  544.  
  545. void sieve_and_print_range (
  546.    uint64_t start,          // number, not bit index
  547.    uint32_t count,
  548.    digest_pod_t<uint64_t> const *ref = 0  )  
  549. {
  550.    unsigned odd_start = unsigned(start & 1);
  551.  
  552.    if (count < 2 - odd_start)  return;
  553.  
  554.    std::printf("\n%20llu .. %20llu", start, start + count - 1);
  555.  
  556.    uint32_t max_bit = (count - (2 - odd_start)) >> 1;
  557.  
  558.    std::vector<unsigned char> buffer((max_bit + CHAR_BIT) / CHAR_BIT);
  559.    unsigned char *bitmap = &buffer[0];
  560.  
  561.    std::clock_t t0 = std::clock();
  562.      
  563.    if (count > 1 - odd_start)
  564.    {
  565.       sieve32(bitmap, start >> 1, max_bit + 1);
  566.    }              
  567.      
  568.    double ms = util::ms(std::clock() - t0);
  569.    std::printf(" %6.0f ms %6.1f M/s", ms, count / ms * 1000 * std::ldexp(2, -20));
  570.  
  571.    digest64_t digest;
  572.  
  573.    if (count > 1 - odd_start && start + count - 1 >= 2)
  574.    {
  575.       if (start == 2)  digest.add_prime(2);  // digest does not add 2 by itself in this case
  576.      
  577.       digest.add_odd_composites_bitmap(bitmap, 0, max_bit + 1, start >> 1);
  578.    }
  579.  
  580.    std::printf(" %9u ", uint32_t(digest.count));
  581.  
  582.    if (ref != 0)  // bloody gcc complains if inner conditional is not wrapped in braces
  583.    {
  584.       if (digest == *ref)
  585.          std::printf(" OK");
  586.       else
  587.          util::terminate_with_failure();
  588.    }
  589. }
  590.  
  591. //-------------------------------------------------------------------------------------------------
  592.  
  593. void sieve_and_print_range (test_range_t const &range)
  594. {
  595.    assert( range.lo <= range.hi );
  596.    assert( range.hi - range.lo + 1 <= UINT32_MAX );
  597.    
  598.    sieve_and_print_range(range.lo, uint32_t(range.hi - range.lo + 1), &range.digest);
  599. }
  600.  
  601. //-------------------------------------------------------------------------------------------------
  602.  
  603. void sieve_and_print_ranges (test_range_t const *ranges, std::size_t count)
  604. {
  605.    for (test_range_t const *p = ranges, *e = p + count; p < e; ++p)
  606.    {
  607.       sieve_and_print_range(*p);
  608.    }
  609. }
  610.  
  611. //-------------------------------------------------------------------------------------------------
  612.  
  613. template<std::size_t N>
  614. void sieve_and_print_ranges (test_range_t const (&ranges)[N])
  615. {
  616.    sieve_and_print_ranges(ranges, N);
  617. }
  618.  
  619. ///////////////////////////////////////////////////////////////////////////////////////////////////
  620.  
  621. int main ()
  622. {
  623.    std::printf("\nsizeof(uintxx_t) == %u", unsigned(sizeof(uintxx_t)));
  624.    std::printf("\nsizeof(void*)    == %u\n", unsigned(sizeof(void*)));
  625.    
  626.    time_factor_sieve();
  627.    test_factor_sieve();
  628.  
  629.    std::printf("\nsieving some ranges (1M)...");
  630.    sieve_and_print_ranges(Ranges_1M);
  631.  
  632.    std::printf("\nsieving some bigger ranges (10M)...");
  633.    sieve_and_print_ranges(Ranges_10M);
  634.  
  635.    std::printf("\nsieving with maximum window size...");
  636.    sieve_and_print_range(0, UINT32_MAX, &Digest_4G);
  637.    sieve_and_print_range(1, UINT32_MAX, &Digest_4G);
  638.    sieve_and_print_range(2, UINT32_MAX, &Digest_4G);
  639.  
  640.    return 0;
  641. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement