Advertisement
P_P

SoE segmented sqrt

P_P
Mar 29th, 2018
196
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 9.30 KB | None | 0 0
  1. //    https://codereview.stackexchange.com/q/189083
  2.  
  3. //    https://pastebin.com/JMdTxbeJ
  4.  
  5. /*                    old-ns-new               old-s-new
  6.       primes <   25     47    47    p < 2^31   2.0   1.8
  7.                  50     63    63        2^32   4.2   3.8
  8.                 100     78    78
  9.                 200    110   140         1e9   0.9   0.8
  10.                 400    187   219         2e9   1.8   1.7
  11.                 800    344   405         4e9   3.9   3.5
  12.                1600    639   780
  13.                3200   1295  1591         scroll down for
  14.                6400   2590  2995         faster version     */
  15.  
  16. #include "stdafx.h"
  17. #include <vector>
  18. #include <iostream>
  19. #include <math.h>
  20. #include <ctime>
  21. #include <Windows.h>
  22. using namespace std;
  23. typedef unsigned __int16 u16; vector<u16> p0;
  24. typedef unsigned __int32 u32; vector<u32> x; vector<u32> p;
  25.  
  26. void buildP0(u32 m) {                               // p's <= sqrt(m)
  27.     m = (u32)sqrt(m); p0.resize(6537);              // 13 <= p < 2^16: 6537 p's
  28.     u32 a = 13, b = 2, d = 5, i = 0;
  29.     for (double rt; a <= m; d = 5, a += b ^= 6) {
  30.         for (rt = sqrt(a); d <= rt; d += 2)
  31.             if (a % d == 0) break;
  32.         if (d > rt) p0[i++] = a;
  33.     } p0.resize(i);
  34. }
  35.  
  36. void buildX(u32 m) {  // mark odd composites
  37.     if (m < 5000) {
  38.         m += m & 1; m >>= 1;
  39.         u32 a = 1, b = 2, c = 3, d = 1 + (m >> 5);
  40.         x.resize(d); x[0] = 0x9b4b3491;
  41.         for (; a < d; a += 3) x[a] = 0x24924924;
  42.         for (; b < d; b += 3) x[b] = 0x49249249;
  43.         for (; c < d; c += 3) x[c] = 0x92492492;
  44.         for (a = 1; a < d; a += 5) x[a] |= 0x42108421;
  45.         for (a = 2; a < d; a += 5) x[a] |= 0x10842108;
  46.         for (a = 3; a < d; a += 5) x[a] |= 0x84210842;
  47.         for (a = 4; a < d; a += 5) x[a] |= 0x21084210;
  48.         for (a = 5; a < d; a += 5) x[a] |= 0x08421084;
  49.         for (a = 7, b = 24, c = 12; b < m; a += 2, b += c += 4)
  50.             if ((x[a >> 6] & 1 << (a >> 1)) == 0)
  51.                 for (d = b; d < m; d += a) x[d >> 5] |= 1 << d;
  52.         x[m >> 5] |= ~0u << m;
  53.     }
  54.     else {
  55.         buildP0(m); int i, j = p0.size();
  56.         m -= m / ~0u; m += m & 1; m >>= 1;
  57.         u32 r0, r1 = 0, a = 1, b = 1, c = 1, d = m >> 5;
  58.         x.resize(d + 1); x[0] = 0x9b4b3491;
  59.         for (;;) {
  60.             if (a > d) break; x[a++] = 0x24924924;
  61.             if (a > d) break; x[a++] = 0x49249249;
  62.             if (a > d) break; x[a++] = 0x92492492;
  63.         } for (;;) {
  64.             if (b > d) break; x[b++] |= 0x42108421;
  65.             if (b > d) break; x[b++] |= 0x10842108;
  66.             if (b > d) break; x[b++] |= 0x84210842;
  67.             if (b > d) break; x[b++] |= 0x21084210;
  68.             if (b > d) break; x[b++] |= 0x08421084;
  69.         } for (;;) {
  70.             if (c > d) break; x[c++] |= 0x08102040;
  71.             if (c > d) break; x[c++] |= 0x40810204;
  72.             if (c > d) break; x[c++] |= 0x04081020;
  73.             if (c > d) break; x[c++] |= 0x20408102;
  74.             if (c > d) break; x[c++] |= 0x02040810;
  75.             if (c > d) break; x[c++] |= 0x10204081;
  76.             if (c > d) break; x[c++] |= 0x81020408;
  77.         } while ((r0 = r1) < m) {                     // L1 64 KB / 2 = 0x40000
  78.             if ((r1 += 0x40000) > m) r1 = m;
  79.             for (i = 0, a = 11, b = 60; b < r1; a = p0[i++], b = a * a >> 1) {
  80.                 if (b < r0) { b += (r0 - b) / a * a; if (b < r0) b += a; }
  81.                 while (b < r1) { x[b >> 5] |= 1 << b; b += a; }
  82.                 if (i == j) break;
  83.             }
  84.         } x[m >> 5] |= ~0u << m;
  85.     }
  86. }
  87.  
  88. void countPrimes() {
  89.     u32 c = 1; int i = x.size() - 1;
  90.     while (i >= 0) c += __popcnt(~x[i--]);
  91.     p.resize(c);
  92. }
  93.  
  94. void primes(u32 m) {  // primes <= m
  95.     if (m < 2) return;
  96.     buildX(m); countPrimes(); p[0] = 2;
  97.     u32 u = ~0u, v = ~0u, xi; DWORD tz;
  98.     for (int i = 0, n = 1, c = p.size(); n < c; v = u += 64)
  99.         for (xi = ~x[i++]; xi; p[n++] = v += (tz + 1) << 1) {
  100.             _BitScanForward(&tz, xi); xi >>= tz; xi >>= 1;
  101.         }
  102. }
  103.  
  104. void main()
  105. {
  106.     primes(1000); x.resize(0); p.resize(0);
  107.     for (u32 m = 25; m < 6401; m <<= 1)  // for (u32 m = 0; m < 9; m++)
  108.     {
  109.         primes(m);
  110.         if (m > 1)
  111.         {
  112.             u32 maxP = p[p.size() - 1];
  113.             cout << "largest prime <= " << m << ": " << maxP << "  ";
  114.         }
  115.         clock_t clock0 = clock();
  116.         for (int i = 1000000; i; i--)
  117.         {
  118.             x.resize(0); p.resize(0); primes(m);
  119.         }
  120.         cout << clock() - clock0 << " ns: ";
  121.         cout << p.size() << " primes" << endl << endl;
  122.     }
  123.     x.resize(0); p.resize(0); u32 m = (u32)4e9;
  124.     clock_t clock0 = clock(); primes(m);
  125.     cout << clock() - clock0 << " ms: ";
  126.     cout << p.size() << " primes <= " << m;
  127.     for (int j = p.size(), i = j - 5; i < j;) cout << endl << p[i++];
  128.     getchar();
  129. }
  130.  
  131.  
  132. /*                    old-ns-new               old-s-new
  133.       primes <   25     47    62    p < 2^31   1.8   1.8
  134.                  50     63    78        2^32   3.8   3.7
  135.                 100     78    78
  136.                 200    140   124         1e9   0.8   0.8
  137.                 400    219   203         2e9   1.7   1.6
  138.                 800    405   359         4e9   3.5   3.4
  139.                1600    780   687
  140.                3200   1591  1372
  141.                6400   2995  2684                            */
  142.  
  143. #include "stdafx.h"
  144. #include <vector>
  145. #include <iostream>
  146. #include <math.h>
  147. #include <ctime>
  148. #include <Windows.h>
  149. using namespace std;
  150. typedef unsigned __int16 u16; u16 *p1;
  151. typedef unsigned __int32 u32; u32 *x; vector<u32> p;
  152. int p1Size, xSize;
  153.  
  154. void buildP1(u32 m) {                             // p's <= sqrt(m)
  155.     m = (u32)sqrt(m); u16 *p0 = new u16[6537];
  156.     u32 a = 13, b = 2, d = 5, i = 0;
  157.     for (double rt; a <= m; d = 5, a += b ^= 6) {
  158.         for (rt = sqrt(a); d <= rt; d += 2)
  159.             if (a % d == 0) break;
  160.         if (d > rt) p0[i++] = a;
  161.     } p1 = new u16[p1Size = i]; memcpy(p1, p0, i << 1);
  162. }
  163.  
  164. void buildX(u32 m) {  // mark odd composites
  165.     if (m < 80000) {
  166.         m += m & 1; m >>= 1;
  167.         u32 a = 1, b = 2, c = 3, d = 1 + (m >> 5);
  168.         x = new u32[d]; x[0] = 0x9b4b3491;
  169.         for (; a < d; a += 3) x[a] = 0x24924924;
  170.         for (; b < d; b += 3) x[b] = 0x49249249;
  171.         for (; c < d; c += 3) x[c] = 0x92492492;
  172.         for (a = 1; a < d; a += 5) x[a] |= 0x42108421;
  173.         for (a = 2; a < d; a += 5) x[a] |= 0x10842108;
  174.         for (a = 3; a < d; a += 5) x[a] |= 0x84210842;
  175.         for (a = 4; a < d; a += 5) x[a] |= 0x21084210;
  176.         for (a = 5; a < d; a += 5) x[a] |= 0x08421084;
  177.         for (a = 7, b = 24, c = 12; b < m; a += 2, b += c += 4)
  178.             if ((x[a >> 6] & 1 << (a >> 1)) == 0)
  179.                 for (d = b; d < m; d += a) x[d >> 5] |= 1 << d;
  180.         x[m >> 5] |= ~0u << m;
  181.     }
  182.     else {
  183.         buildP1(m); int i, j = p1Size;
  184.         m -= m / ~0u; m += m & 1; m >>= 1;
  185.         u32 r0, r1 = 0, a = 1, b = 1, c = 1, d = xSize = m >> 5;
  186.         x = new u32[d + 1]; x[0] = 0x9b4b3491;
  187.         for (;;) {
  188.             if (a > d) break; x[a++] = 0x24924924;
  189.             if (a > d) break; x[a++] = 0x49249249;
  190.             if (a > d) break; x[a++] = 0x92492492;
  191.         } for (;;) {
  192.             if (b > d) break; x[b++] |= 0x42108421;
  193.             if (b > d) break; x[b++] |= 0x10842108;
  194.             if (b > d) break; x[b++] |= 0x84210842;
  195.             if (b > d) break; x[b++] |= 0x21084210;
  196.             if (b > d) break; x[b++] |= 0x08421084;
  197.         } for (;;) {
  198.             if (c > d) break; x[c++] |= 0x08102040;
  199.             if (c > d) break; x[c++] |= 0x40810204;
  200.             if (c > d) break; x[c++] |= 0x04081020;
  201.             if (c > d) break; x[c++] |= 0x20408102;
  202.             if (c > d) break; x[c++] |= 0x02040810;
  203.             if (c > d) break; x[c++] |= 0x10204081;
  204.             if (c > d) break; x[c++] |= 0x81020408;
  205.         } while ((r0 = r1) < m) {
  206.             if ((r1 += 0x40000) > m) r1 = m;
  207.             for (i = 0, a = 11, b = 60; b < r1; a = p1[i++], b = a * a >> 1) {
  208.                 if (b < r0) { b += (r0 - b) / a * a; if (b < r0) b += a; }
  209.                 while (b < r1) { x[b >> 5] |= 1 << b; b += a; }
  210.                 if (i == j) break;
  211.             }
  212.         } x[m >> 5] |= ~0u << m; delete[]p1;
  213.     }
  214. }
  215.  
  216. void countPrimes() {
  217.     u32 c = 1; int i = xSize;
  218.     while (i >= 0) c += __popcnt(~x[i--]);
  219.     p.resize(c);
  220. }
  221.  
  222. void primes(u32 m) {                          // primes <= m
  223.     if (m < 2) return;
  224.     buildX(m); countPrimes(); p[0] = 2;
  225.     u32 u = ~0u, v = ~0u, xi; DWORD tz;
  226.     for (int i = 0, n = 1, c = p.size(); n < c; v = u += 64)
  227.         for (xi = ~x[i++]; xi; p[n++] = v += (tz + 1) << 1) {
  228.             _BitScanForward(&tz, xi); xi >>= tz; xi >>= 1;
  229.         } delete[]x;
  230. }
  231.  
  232. void main()
  233. {
  234.     clock_t clock0; primes(1000);
  235.     for (u32 m = 25; m < 6401; m <<= 1)
  236.     {
  237.         primes(m);
  238.         if (m > 1) cout << "maxP <= " << m << ": " << p[p.size() - 1] << "  ";
  239.         clock0 = clock();
  240.         for (int i = 1000000; i; i--) primes(m);
  241.         cout << clock() - clock0 << " ns: ";
  242.         cout << p.size() << " primes" << endl << endl;
  243.     }
  244.     u32 m = (u32)1e9;
  245.     clock0 = clock(); primes(m);
  246.     cout << clock() - clock0 << " ms: ";
  247.     cout << p.size() << " primes <= " << m;
  248.     for (int j = p.size(), i = j - 5; i < j;) cout << endl << p[i++];
  249.     getchar();
  250. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement