Guest User

no alloc sqrt

a guest
Jun 2nd, 2021
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.48 KB | None | 0 0
  1. #pragma once
  2. #include "sqrt.h"
  3. #include <boost/multiprecision/gmp.hpp>
  4.  
  5. template <class Integer>
  6. Integer karatsuba_sqrt(const Integer& x, Integer& r, Integer& t, Integer& q, size_t offset, size_t bits)
  7. {
  8. #ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
  9.    // std::sqrt is not constexpr by standard, so use this
  10.    if (BOOST_MP_IS_CONST_EVALUATED(bits)) {
  11.       if (bits <= 4) {
  12.          Integer c = x;
  13.          c >>= offset;
  14.          if (c.is_zero()) {
  15.             r = 0u;
  16.             return 0u;
  17.          }
  18.          else if (c < 4) {
  19.             r = c - 1;
  20.             return 1u;
  21.          }
  22.          else if (c < 9) {
  23.             r = c - 4;
  24.             return 2u;
  25.          }
  26.          else {
  27.             r = c - 9;
  28.             return 3u;
  29.          }
  30.       }
  31.    }
  32.    else
  33. #endif
  34.    // we can calculate it faster with std::sqrt
  35.    if (bits <= 64) {
  36.       const uint64_t int32max = uint64_t((std::numeric_limits<uint32_t>::max)());
  37.       uint64_t val = static_cast<uint64_t>(x >> offset);
  38.       uint64_t s64 = static_cast<uint64_t>(std::sqrt(static_cast<long double>(val)));
  39.       // converting to long double can loose some precision, and `sqrt` can give eps error, so we'll fix this
  40.       // this is needed
  41.       while (s64 > int32max || s64 * s64 > val) s64--;
  42.       // in my tests this never fired, but theoretically this might be needed
  43.       while (s64 < int32max && (s64 + 1) * (s64 + 1) <= val) s64++;
  44.       r = val - s64 * s64;
  45.       return s64;
  46.    }
  47.    // https://hal.inria.fr/file/index/docid/72854/filename/RR-3805.pdf
  48.    size_t b = bits / 4;
  49.    Integer s = karatsuba_sqrt(x, r, t, q, offset + b * 2, bits - b * 2);
  50.  
  51.    r <<= b;
  52.    t ^= t;
  53.    bit_set(t, offset + b * 2);
  54.    t--;
  55.    t &= x;
  56.    t >>= (b + offset);
  57.    t += r;
  58.    s <<= 1;
  59.    divide_qr(t, s, q, r);
  60.    
  61.    r <<= b;
  62.    t ^= t;
  63.    bit_set(t, b + offset);
  64.    t--;
  65.    t &= x;
  66.    t >>= offset;
  67.    r += t;
  68.    s <<= (b - 1); // we already <<1 it before
  69.    s += q;
  70.    q *= q;
  71.  
  72.    // we substract after, so it works for unsigned integers too
  73.    if (r < q) {
  74.       t = s;
  75.       t <<= 1;
  76.       t--;
  77.       r += t;
  78.       s--;
  79.    }
  80.    r -= q;
  81.    return s;
  82. }
  83.  
  84. template <class Integer>
  85. Integer kar_sqrt(const Integer& x, Integer& r)
  86. {
  87.    if (x.is_zero()) {
  88.       r = 0u;
  89.       return 0u;
  90.    }
  91.    Integer t{};
  92.    Integer q{};
  93.    return karatsuba_sqrt(x, r, t, q, 0, msb(x) + 1);
  94. }
  95.  
  96. template <class Integer>
  97. Integer kar_sqrt(const Integer& x)
  98. {
  99.    Integer r(0);
  100.    return kar_sqrt(x, r);
  101. }
  102.  
Advertisement
Add Comment
Please, Sign In to add comment