Guest User

Untitled

a guest
Jul 26th, 2025
29
0
13 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.30 KB | None | 0 0
  1. #include <array>
  2. #include <cstdint>
  3. #include <limits>
  4. #include <cstddef>
  5. #include <iostream>
  6.  
  7. //generic addition and subracton with carry / borrow used for example. Real version uses intrinsics if available.
  8. unsigned char HP_Add(const unsigned char carry_in, const std::uint32_t a, const std::uint32_t b, std::uint32_t& result) noexcept
  9. {
  10.     std::uint64_t sum = static_cast<std::uint64_t>(a) + b + carry_in;
  11.     result = static_cast<T>(sum);
  12.     return static_cast<unsigned char>(sum >> 32);
  13. }
  14.  
  15. unsigned char HP_Sub(const unsigned char borrow, const std::uint32_t a, const std::uint32_t b, std::uint32_t& result) noexcept
  16. {
  17.     result = a - b + borrow * std::numeric_limits<std::uint32_t>::max();
  18.     return (a < b) || (a == b && static_cast<bool>(borrow));
  19. }
  20.  
  21. //This is the single-precision version of the function I'm trying to implement.
  22. std::uint32_t REDC(const std::uint32_t a, const std::uint32_t b, const std::uint32_t n, const std::uint32_t n_inv) noexcept
  23. {
  24.     assert(n % 2 == 1);
  25.     using std::uint64_t;
  26.     using std::uint32_t;
  27.     const uint64_t ab = static_cast<uint64_t>(a) * b;
  28.     const uint32_t t = static_cast<uint32_t>(ab);           //ab % 2^32
  29.     const uint32_t m = t * n_inv;                           //(t * n_inv) % 2^32
  30.     const uint64_t mn = static_cast<uint64_t>(m) * n;
  31.     uint32_t u = static_cast<uint32_t>((ab - mn) >> 32);    //(ab - mn) / 2^32
  32.     u += n * (ab < mn);
  33.  
  34.     return u;
  35. }
  36.  
  37. //This is a multiprecision version of REDC found above^^^. It works but there is a key difference. The input n_inv
  38. //must be different and instead of implementing (ab - mn) as seen above this version performs (ab + mn).
  39. //I can't get a multiprecision version that performs (ab - mn) to make the proper output.
  40. template <std::size_t S>
  41. std::array<std::uint32_t, S> REDC(const std::array<std::uint32_t, S>& a, const std::array<std::uint32_t, S>& b, const std::array<std::uint32_t, S>& n, const std::uint32_t n_inv) noexcept
  42. {
  43.     using std::size_t; using std::uint32_t; using std::uint64_t;
  44.     std::array<std::uint32_t, S> t = {};
  45.     uint64_t buffer;
  46.     uint32_t carry;
  47.     uint32_t t1;
  48.     uint32_t t2;
  49.  
  50.     for (size_t i = 0; i < S; ++i)
  51.     {
  52.         carry = 0;
  53.  
  54.         for (size_t j = 0; j < S; ++j)
  55.         {
  56.             buffer = static_cast<uint64_t>(a[j]) * b[i] + t[j] + carry;
  57.             t[j] = static_cast<uint32_t>(buffer);
  58.             carry = static_cast<uint32_t>(buffer >> 32);
  59.         }
  60.  
  61.         buffer = static_cast<uint64_t>(t1) + carry;
  62.         t1 = static_cast<uint32_t>(buffer);
  63.         t2 = static_cast<uint32_t>(buffer >> 32);
  64.         const uint32_t m = t[0] * n_inv;
  65.  
  66.         buffer = static_cast<uint64_t>(m) * n[0] + t[0];
  67.         carry = static_cast<uint32_t>(buffer >> 32);
  68.  
  69.         for (size_t j = 1; j < S; ++j)
  70.         {
  71.             buffer = static_cast<uint64_t>(m) * n[j] + t[j] + carry;
  72.             t[j - 1] = static_cast<uint32_t>(buffer);
  73.             carry = static_cast<uint32_t>(buffer >> 32);
  74.         }
  75.  
  76.         buffer = static_cast<uint64_t>(t1) + carry;
  77.         t[S - 1] = static_cast<uint32_t>(buffer);
  78.         t1 = t2 + static_cast<uint32_t>(buffer >> 32);
  79.     }
  80.  
  81.     std::array<std::uint32_t, S> u;
  82.     unsigned char borrow = 0;
  83.  
  84.     for (std::size_t i = 0; i < S; ++i)
  85.         borrow = jaml::detail::HP_Sub(borrow, t[i], n[i], u[i]);
  86.     borrow = jaml::detail::HP_Sub(borrow, t1, 0, t1);
  87.  
  88.     if (borrow)
  89.         return t;
  90.     else
  91.         return u;
  92. }
  93.  
  94. //This version attempts to implement (ab - mn). The output is close but wrong.
  95. template <std::size_t S>
  96. std::array<std::uint32_t, S> REDC2(const std::array<std::uint32_t, S>& a, const std::array<std::uint32_t, S>& b, const std::array<std::uint32_t, S>& n, const std::uint32_t n_inv) noexcept
  97. {
  98.     using std::size_t; using std::uint32_t; using std::uint64_t;
  99.     std::array<std::uint32_t, S> t = {};
  100.     uint64_t buffer;
  101.     uint32_t carry;
  102.     uint32_t t1;
  103.     uint32_t t2;
  104.  
  105.     for (size_t i = 0; i < S; ++i)
  106.     {
  107.         carry = 0;
  108.  
  109.         for (size_t j = 0; j < S; ++j)
  110.         {
  111.             buffer = static_cast<uint64_t>(a[j]) * b[i] + t[j] + carry;
  112.             t[j] = static_cast<uint32_t>(buffer);
  113.             carry = static_cast<uint32_t>(buffer >> 32);
  114.         }
  115.  
  116.         buffer = static_cast<uint64_t>(t1) + carry;
  117.         t1 = static_cast<uint32_t>(buffer);
  118.         t2 = static_cast<uint32_t>(buffer >> 32);
  119.         const uint32_t m = t[0] * n_inv;
  120.  
  121.         buffer = static_cast<uint64_t>(m) * n[0];
  122.         uint32_t low = static_cast<uint32_t>(buffer);
  123.         unsigned char borrow = jaml::detail::HP_Sub(0, t[0], low, t[0]);
  124.         carry = static_cast<uint32_t>(buffer >> 32);
  125.  
  126.         for (size_t j = 1; j < S; ++j)
  127.         {
  128.             buffer = static_cast<uint64_t>(m) * n[j] + carry;
  129.             low = static_cast<uint32_t>(buffer);
  130.             borrow = jaml::detail::HP_Sub(borrow, t[j], low, t[j - 1]);
  131.             carry = static_cast<uint32_t>(buffer >> 32);
  132.         }
  133.  
  134.         borrow = jaml::detail::HP_Sub(borrow, t1, carry, t[S - 1]);
  135.         t1 = t2 - borrow;
  136.     }
  137.  
  138.     std::array<std::uint32_t, S> u;
  139.     unsigned char carry2 = 0;
  140.  
  141.     for (std::size_t i = 0; i < S; ++i)
  142.         carry2 = jaml::detail::HP_Add(carry2, t[i], n[i], u[i]);
  143.     carry2 = jaml::detail::HP_Add(carry2, t1, 0, t1);
  144.  
  145.     if (!carry2)
  146.         return t;
  147.     else
  148.         return u;
  149. }
  150.  
  151. int main()
  152. {
  153.     //test values
  154.     std::array<std::uint32_t, 4> a = { 544677573, 0, 4454356, 0 };
  155.     std::array<std::uint32_t, 4> b = { 355334, 0, 88196580, 0 };
  156.     std::array<std::uint32_t, 4> c = { 696957799, 0, 427290, 0 };
  157.     std::uint32_t n_inv1 = 3223639;
  158.     std::uint32_t n_inv2 = 4291743657;
  159.  
  160.     //REDC2 output is close but wrong. Help me find the mistake?
  161.      std::array<std::uint32_t> arr1 = REDC(a, b, c, n_inv2);
  162.      std::array<std::uint32_t> arr2 = REDC2(a, b, c, n_inv1);
  163. }
Advertisement
Add Comment
Please, Sign In to add comment