Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <array>
- #include <cstdint>
- #include <limits>
- #include <cstddef>
- #include <iostream>
- //generic addition and subracton with carry / borrow used for example. Real version uses intrinsics if available.
- unsigned char HP_Add(const unsigned char carry_in, const std::uint32_t a, const std::uint32_t b, std::uint32_t& result) noexcept
- {
- std::uint64_t sum = static_cast<std::uint64_t>(a) + b + carry_in;
- result = static_cast<T>(sum);
- return static_cast<unsigned char>(sum >> 32);
- }
- unsigned char HP_Sub(const unsigned char borrow, const std::uint32_t a, const std::uint32_t b, std::uint32_t& result) noexcept
- {
- result = a - b + borrow * std::numeric_limits<std::uint32_t>::max();
- return (a < b) || (a == b && static_cast<bool>(borrow));
- }
- //This is the single-precision version of the function I'm trying to implement.
- std::uint32_t REDC(const std::uint32_t a, const std::uint32_t b, const std::uint32_t n, const std::uint32_t n_inv) noexcept
- {
- assert(n % 2 == 1);
- using std::uint64_t;
- using std::uint32_t;
- const uint64_t ab = static_cast<uint64_t>(a) * b;
- const uint32_t t = static_cast<uint32_t>(ab); //ab % 2^32
- const uint32_t m = t * n_inv; //(t * n_inv) % 2^32
- const uint64_t mn = static_cast<uint64_t>(m) * n;
- uint32_t u = static_cast<uint32_t>((ab - mn) >> 32); //(ab - mn) / 2^32
- u += n * (ab < mn);
- return u;
- }
- //This is a multiprecision version of REDC found above^^^. It works but there is a key difference. The input n_inv
- //must be different and instead of implementing (ab - mn) as seen above this version performs (ab + mn).
- //I can't get a multiprecision version that performs (ab - mn) to make the proper output.
- template <std::size_t S>
- std::array<std::uint32_t, S> REDC(const std::array<std::uint32_t, S>& a, const std::array<std::uint32_t, S>& b, const std::array<std::uint32_t, S>& n, const std::uint32_t n_inv) noexcept
- {
- using std::size_t; using std::uint32_t; using std::uint64_t;
- std::array<std::uint32_t, S> t = {};
- uint64_t buffer;
- uint32_t carry;
- uint32_t t1;
- uint32_t t2;
- for (size_t i = 0; i < S; ++i)
- {
- carry = 0;
- for (size_t j = 0; j < S; ++j)
- {
- buffer = static_cast<uint64_t>(a[j]) * b[i] + t[j] + carry;
- t[j] = static_cast<uint32_t>(buffer);
- carry = static_cast<uint32_t>(buffer >> 32);
- }
- buffer = static_cast<uint64_t>(t1) + carry;
- t1 = static_cast<uint32_t>(buffer);
- t2 = static_cast<uint32_t>(buffer >> 32);
- const uint32_t m = t[0] * n_inv;
- buffer = static_cast<uint64_t>(m) * n[0] + t[0];
- carry = static_cast<uint32_t>(buffer >> 32);
- for (size_t j = 1; j < S; ++j)
- {
- buffer = static_cast<uint64_t>(m) * n[j] + t[j] + carry;
- t[j - 1] = static_cast<uint32_t>(buffer);
- carry = static_cast<uint32_t>(buffer >> 32);
- }
- buffer = static_cast<uint64_t>(t1) + carry;
- t[S - 1] = static_cast<uint32_t>(buffer);
- t1 = t2 + static_cast<uint32_t>(buffer >> 32);
- }
- std::array<std::uint32_t, S> u;
- unsigned char borrow = 0;
- for (std::size_t i = 0; i < S; ++i)
- borrow = jaml::detail::HP_Sub(borrow, t[i], n[i], u[i]);
- borrow = jaml::detail::HP_Sub(borrow, t1, 0, t1);
- if (borrow)
- return t;
- else
- return u;
- }
- //This version attempts to implement (ab - mn). The output is close but wrong.
- template <std::size_t S>
- std::array<std::uint32_t, S> REDC2(const std::array<std::uint32_t, S>& a, const std::array<std::uint32_t, S>& b, const std::array<std::uint32_t, S>& n, const std::uint32_t n_inv) noexcept
- {
- using std::size_t; using std::uint32_t; using std::uint64_t;
- std::array<std::uint32_t, S> t = {};
- uint64_t buffer;
- uint32_t carry;
- uint32_t t1;
- uint32_t t2;
- for (size_t i = 0; i < S; ++i)
- {
- carry = 0;
- for (size_t j = 0; j < S; ++j)
- {
- buffer = static_cast<uint64_t>(a[j]) * b[i] + t[j] + carry;
- t[j] = static_cast<uint32_t>(buffer);
- carry = static_cast<uint32_t>(buffer >> 32);
- }
- buffer = static_cast<uint64_t>(t1) + carry;
- t1 = static_cast<uint32_t>(buffer);
- t2 = static_cast<uint32_t>(buffer >> 32);
- const uint32_t m = t[0] * n_inv;
- buffer = static_cast<uint64_t>(m) * n[0];
- uint32_t low = static_cast<uint32_t>(buffer);
- unsigned char borrow = jaml::detail::HP_Sub(0, t[0], low, t[0]);
- carry = static_cast<uint32_t>(buffer >> 32);
- for (size_t j = 1; j < S; ++j)
- {
- buffer = static_cast<uint64_t>(m) * n[j] + carry;
- low = static_cast<uint32_t>(buffer);
- borrow = jaml::detail::HP_Sub(borrow, t[j], low, t[j - 1]);
- carry = static_cast<uint32_t>(buffer >> 32);
- }
- borrow = jaml::detail::HP_Sub(borrow, t1, carry, t[S - 1]);
- t1 = t2 - borrow;
- }
- std::array<std::uint32_t, S> u;
- unsigned char carry2 = 0;
- for (std::size_t i = 0; i < S; ++i)
- carry2 = jaml::detail::HP_Add(carry2, t[i], n[i], u[i]);
- carry2 = jaml::detail::HP_Add(carry2, t1, 0, t1);
- if (!carry2)
- return t;
- else
- return u;
- }
- int main()
- {
- //test values
- std::array<std::uint32_t, 4> a = { 544677573, 0, 4454356, 0 };
- std::array<std::uint32_t, 4> b = { 355334, 0, 88196580, 0 };
- std::array<std::uint32_t, 4> c = { 696957799, 0, 427290, 0 };
- std::uint32_t n_inv1 = 3223639;
- std::uint32_t n_inv2 = 4291743657;
- //REDC2 output is close but wrong. Help me find the mistake?
- std::array<std::uint32_t> arr1 = REDC(a, b, c, n_inv2);
- std::array<std::uint32_t> arr2 = REDC2(a, b, c, n_inv1);
- }
Advertisement
Add Comment
Please, Sign In to add comment