foobar76

Bit-efficient uniform random number generators

Oct 7th, 2019
299
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <assert.h>
  2. #include <stdbool.h>
  3. #include <stdint.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6.  
  7. #define TEST_ROUNDS 1000000000
  8.  
  9. #undef UNROLLED_LOOP    /* Use unrolled loop variants (for branch predictors) */
  10. #define LONG_MULTIPLICATION     /* Long multiplication vs. bitmask with
  11.                                  * rejection */
  12.  
  13. #undef BIT_ACCOUNTING   /* Keep track of used bits */
  14. #undef PRINT_RESULTS    /* Print individual get_uniform_rand() results */
  15.  
  16. #undef BOGUS_RANDOMNESS /* Rotate seed by one bit per request instead of
  17.                          * proper PRNG */
  18.  
  19. /* The state word must be initialized to non-zero */
  20. static uint64_t state;
  21.  
  22. static uint64_t buf[2];
  23. static uint64_t left[2] = {0, 0};
  24.  
  25. /* By Marsaglia(?) */
  26. static uint64_t
  27. xorshift64s(void)
  28. {
  29.   uint64_t x = state;
  30.  
  31.   x ^= x >> 12;
  32.   x ^= x << 25;
  33.   x ^= x >> 27;
  34.   state = x;
  35.  
  36.   return x * 2685821657736338717ULL;
  37. }
  38.  
  39. static void
  40. fill_primary(uint64_t n)
  41. {
  42.   if (64 - left[0] >= left[1])
  43.   {
  44.     if (__builtin_expect(left[0] != 0, 1))
  45.     {
  46.       buf[0] = buf[0] | (buf[1] >> left[0]);
  47.       buf[1] <<= left[0];
  48.     }
  49.     else
  50.     {
  51.       buf[0] = buf[1];
  52.       buf[1] = 0;
  53.     }
  54.  
  55.     left[0] += left[1];
  56.     buf[1] = xorshift64s();
  57.     left[1] = 64;
  58.   }
  59.  
  60.   if (__builtin_expect(left[0] >= n, 1))
  61.   {
  62.     return;
  63.   }
  64.  
  65.   buf[0] |= buf[1] >> left[0];
  66.   buf[1] <<= 64 - left[0];
  67.   left[1] -= 64 - left[0];
  68.   left[0] = 64;
  69. }
  70.  
  71. static void
  72. init_fill(uint64_t x)
  73. {
  74.   state = x;
  75.   buf[1] = xorshift64s();
  76.   left[1] = 64;
  77.  
  78.   fill_primary(64);
  79. }
  80.  
  81. /* Provides n random bits on most significant bits. */
  82. static uint64_t
  83. peek_bits(uint64_t n)
  84. {
  85. #ifndef BOGUS_RANDOMNESS
  86.   if (__builtin_expect(left[0] < n, 0))
  87.   {
  88.     fill_primary(n);
  89.   }
  90.  
  91.   return buf[0];
  92. #else
  93.   state = (state << 1) + (state >> 63);
  94.  
  95.   return state;
  96. #endif
  97. }
  98.  
  99. #ifdef BIT_ACCOUNTING
  100. static uint64_t used_bits = 0;
  101. #endif
  102.  
  103. /* Consumes bits peeked by earlier peek_bits() call. Multiple calls can be
  104.  * made for one peek_bits() call, but sum of arguments must be less or equal
  105.  * to earlier peek. */
  106. static void
  107. consume_bits(uint64_t n)
  108. {
  109.   buf[0] <<= n;
  110.   left[0] -= n;
  111.  
  112. #ifdef BIT_ACCOUNTING
  113.   used_bits += n;
  114. #endif
  115. }
  116.  
  117. #ifndef LONG_MULTIPLICATION
  118. /* Variable length bitmask with rejection method. */
  119.  
  120. #ifndef UNROLLED_LOOP
  121. static uint64_t
  122. get_uniform_rand(uint64_t range)
  123. {
  124.   uint64_t shift = __builtin_clzl(range);
  125.   uint64_t res;
  126.  
  127.   if (__builtin_expect(range == 0, 0))
  128.   {
  129.     return 0;
  130.   }
  131.  
  132.   do
  133.   {
  134.     res = peek_bits(64 - shift) >> shift;
  135.     consume_bits(64 - shift);
  136.   }
  137.   while (__builtin_expect(res >= range, 0));
  138.  
  139.   return res;
  140. }
  141.  
  142. #else   /* UNROLLED_LOOP */
  143. static uint64_t
  144. get_uniform_rand(uint64_t range)
  145. {
  146.   uint64_t rounds = 2;
  147.   uint64_t shift = __builtin_clzl(range);
  148.   uint64_t bitsperloop;
  149.   uint64_t res;
  150.   uint64_t nores = 1;
  151.   uint64_t neededrounds = 0;
  152.  
  153.   if (__builtin_expect(range == 0, 0))
  154.   {
  155.     return 0;
  156.   }
  157.  
  158.   if (rounds * (64 - shift) <= 64)
  159.   {
  160.     bitsperloop = rounds * (64 - shift);
  161.   }
  162.   else
  163.   {
  164.     bitsperloop = 64 - shift;
  165.     rounds = 1;
  166.   }
  167.  
  168.   do
  169.   {
  170.     uint64_t rbits = peek_bits(bitsperloop);
  171.     uint64_t neededrounds = 0;
  172.  
  173.     /* Unrolled loop */
  174.     for (uint64_t i = 0; i < rounds; i++)
  175.     {
  176.       res = nores ? rbits >> shift : res;
  177.       rbits <<= 64 - shift;
  178.       neededrounds += nores;
  179.       nores = res >= range;
  180.     }
  181.  
  182.     consume_bits(neededrounds * (64 - shift));
  183.   }
  184.   while (__builtin_expect(nores, 0));
  185.  
  186.   return res;
  187. }
  188.  
  189. #endif  /* UNROLLED_LOOP */
  190.  
  191. #else   /* LONG_MULTIPLICATION */
  192. /* These versions effectively perform a long multiplication until a fixed
  193.  * point integer portion of the computation can't change or a carry makes it
  194.  * increment. */
  195.  
  196. #ifndef UNROLLED_LOOP
  197. static uint64_t
  198. get_uniform_rand(uint64_t range)
  199. {
  200.   uint64_t shift = __builtin_clzl(range);
  201.   uint64_t add = range << shift;
  202.   unsigned __int128 mul;
  203.   uint64_t res, frac;
  204.   uint64_t rbits;
  205.   uint64_t v0 = 1, v1 = 0;
  206.  
  207.   if (__builtin_expect(range == 0, 0))
  208.   {
  209.     return 0;
  210.   }
  211.  
  212.   rbits = peek_bits(64 - shift);
  213.   consume_bits(64 - shift);
  214.  
  215.   frac = mul = (unsigned __int128)(rbits & (~0ULL << shift)) * range;
  216.   res = mul >> 64;
  217.  
  218.   /* This is essentially arbitrary-precision multiplication, tracking 65
  219.    * bits. */
  220.   while ((v0 ^ v1) & (frac > -add))
  221.   {
  222.     uint64_t tmp;
  223.  
  224.     /* old top bit */
  225.     v0 = frac >> 63;
  226.     frac <<= 1;
  227.  
  228.     tmp = frac;
  229.     frac += ((int64_t) peek_bits(1) >> 63) & add;
  230.     /* new top bit */
  231.     v1 = (tmp > frac);
  232.  
  233.     consume_bits(1);
  234.  
  235.     /* If both v0 and v1 are set a bit carries to the result. */
  236.     res += v0 & v1;
  237.   }
  238.  
  239.   return res;
  240. }
  241.  
  242. #else   /* UNROLLED_LOOP */
  243.  
  244. static uint64_t
  245. get_uniform_rand(uint64_t range)
  246. {
  247.   const uint64_t rounds = 2;
  248.   uint64_t shift = __builtin_clzl(range);
  249.   uint64_t add = range << shift;
  250.   unsigned __int128 mul;
  251.   uint64_t res, frac;
  252.   uint64_t rbits;
  253.   uint64_t v0 = 1, v1 = 0;
  254.   uint64_t ok = 1;
  255.  
  256.   if (__builtin_expect(range == 0, 0))
  257.   {
  258.     return 0;
  259.   }
  260.  
  261.   rbits = peek_bits(64 - shift);
  262.   consume_bits(64 - shift);
  263.  
  264.   frac = mul = (unsigned __int128)(rbits & (~0ULL << shift)) * range;
  265.   res = mul >> 64;
  266.  
  267.   /* Can the fractional part still cause res to increment? */
  268.   do
  269.   {
  270.     uint64_t neededbits = 0;
  271.  
  272.     rbits = peek_bits(rounds);
  273.  
  274.     /* Unrolled loop */
  275.     for (uint64_t i = 0; i < rounds; i++)
  276.     {
  277.       uint64_t tmp;
  278.  
  279.       ok &= (v0 ^ v1) & (frac > -add);
  280.       neededbits += ok;
  281.  
  282.       /* old top bit */
  283.       v0 = frac >> 63;
  284.       frac <<= 1;
  285.  
  286.       /* random bit */
  287.       v1 = rbits >> 63;
  288.       rbits <<= 1;
  289.  
  290.       tmp = frac;
  291.       frac += -(ok & v1) & add;
  292.       /* new top bit */
  293.       v1 = (tmp > frac);
  294.  
  295.       /* If both v0 and v1 are set a bit carries to the result. */
  296.       res += ok & v0 & v1;
  297.     }
  298.  
  299.     consume_bits(neededbits);
  300.   }
  301.   while (__builtin_expect(ok, 0));
  302.  
  303.   return res;
  304. }
  305.  
  306. #endif  /* UNROLLED_LOOP */
  307. #endif  /* LONG_MULTIPLICATION */
  308.  
  309. int
  310. main(int argc, char **argv)
  311. {
  312.   uint64_t mod;
  313.   uint64_t x = 0;
  314.  
  315.   if (argc != 3)
  316.   {
  317.     return 1;
  318.   }
  319.   /* Largest possible return value minus one. */
  320.   mod = atoll(argv[1]);
  321.  
  322.   /* Seed to xorshift64s */
  323.   init_fill(atoll(argv[2]));
  324.  
  325.   for (int i = 0; i < TEST_ROUNDS; i++)
  326.   {
  327.     uint64_t res = get_uniform_rand((volatile uint64_t)mod);
  328.  
  329. #ifdef PRINT_RESULTS
  330.     printf("res %llu\n", res);
  331. #endif
  332.  
  333.     /* assert(res < mod); */
  334.     x += res;
  335.   }
  336.  
  337. #ifdef BIT_ACCOUNTING
  338.   printf("used bits: %llu\n", used_bits);
  339. #endif
  340.  
  341.   printf("sum %llu\n", x);
  342.   return 0;
  343. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×