Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <chrono>
- #include <cstdlib>
- #include <limits>
- #include <type_traits>
- #include <assert.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- // ** Broken **
- // template <typename T>
- // T SaturatingMultiply1(T X, T Y) {
- // // Hacker's Delight, p. 30
- // T Z = X * Y;
- // if (Y != 0 && Z / Y != X)
- // return std::numeric_limits<T>::max();
- // else
- // return Z;
- // }
- template <typename T>
- T SaturatingMultiply2(T X, T Y) {
- const T Max = std::numeric_limits<T>::max();
- if (Y != 0 && X >= Max / Y) {
- return Max;
- } else {
- return X * Y;
- }
- }
- template <typename T>
- T SaturatingAdd(T X, T Y) {
- // Hacker's Delight, p. 29
- T Z = X + Y;
- if (Z < X || Z < Y)
- return std::numeric_limits<T>::max();
- else
- return Z;
- }
- enum ZeroBehavior {
- /// \brief The returned value is undefined.
- ZB_Undefined,
- /// \brief The returned value is numeric_limits<T>::max()
- ZB_Max,
- /// \brief The returned value is numeric_limits<T>::digits
- ZB_Width
- };
- namespace detail {
- template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
- static std::size_t count(T Val, ZeroBehavior) {
- if (!Val)
- return std::numeric_limits<T>::digits;
- // Bisection method.
- std::size_t ZeroBits = 0;
- for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
- T Tmp = Val >> Shift;
- if (Tmp)
- Val = Tmp;
- else
- ZeroBits |= Shift;
- }
- return ZeroBits;
- }
- };
- #if __GNUC__ >= 4 || defined(_MSC_VER)
- template <typename T> struct LeadingZerosCounter<T, 4> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
- if (ZB != ZB_Undefined && Val == 0)
- return 32;
- #if __has_builtin(__builtin_clz)
- #elif defined(_MSC_VER)
- unsigned long Index;
- _BitScanReverse(&Index, Val);
- return Index ^ 31;
- #endif
- }
- };
- #if !defined(_MSC_VER)
- template <typename T> struct LeadingZerosCounter<T, 8> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
- if (ZB != ZB_Undefined && Val == 0)
- return 64;
- #if __has_builtin(__builtin_clzll)
- return __builtin_clzll(Val);
- #elif defined(_MSC_VER)
- unsigned long Index;
- _BitScanReverse64(&Index, Val);
- return Index ^ 63;
- #endif
- }
- };
- #endif
- #endif
- } // namespace detail
- template <typename T>
- std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
- static_assert(std::numeric_limits<T>::is_integer &&
- !std::numeric_limits<T>::is_signed,
- "Only unsigned integral types are allowed.");
- return detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
- }
- /// Log2_64 - This function returns the floor log base 2 of the specified value,
- /// -1 if the value is zero. (64 bit edition.)
- inline unsigned Log2_64(uint64_t Value) {
- return 63 - countLeadingZeros(Value);
- }
- template <typename T>
- T SaturatingMultiply3(T X, T Y) {
- // Hacker's Delight, p. 30 has a different algorithm, but
- // we don't use that because it fails for uint16_t (where
- // multiplcation can have UB due to promotion to int), and
- // requires a division in addition to the multiplication.
- // Log2(Z) would be either Log2Z or Log2Z + 1.
- // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
- // will necessarily be less than Log2Max as desired.
- int Log2Z = Log2_64(X) + Log2_64(Y);
- T Max = std::numeric_limits<T>::max();
- int Log2Max = Log2_64(Max);
- if (Log2Z < Log2Max)
- return X * Y;
- if (Log2Z > Log2Max)
- return Max;
- // We're going to use the top bit, and maybe overflow one
- // bit past it. Multiply all but the bottom bit then add
- // that on at the end.
- T Z = (X >> 1) * Y;
- if (Z & ~(std::numeric_limits<T>::max() >> 1))
- return Max;
- Z <<= 1;
- return (X & 1) ? SaturatingAdd(Z, Y) : Z;
- }
- int main(int argc, char **argv)
- {
- if (argc != 3) {
- fprintf(stderr, "usage: %s <start> <end>\n", argv[0]);
- return -1;
- }
- uint64_t X1 = strtoull(argv[1], NULL, 10);
- uint64_t X2 = strtoull(argv[2], NULL, 10);
- std::chrono::steady_clock::time_point T1, T2;
- uint64_t Ellapsed2, Ellapsed3;
- uint64_t R = 0;
- T1 = std::chrono::steady_clock::now();
- for (uint64_t X = X1 ; X <= X2; X++) {
- R += SaturatingMultiply2(X, X);
- }
- T2 = std::chrono::steady_clock::now();
- printf("SaturatingMultiply2(): %ld us\n", std::chrono::duration_cast<std::chrono::microseconds>(T2 - T1).count());
- fflush(stdout);
- T1 = std::chrono::steady_clock::now();
- for (uint64_t X = X1 ; X <= X2; X++) {
- R += SaturatingMultiply3(X, X);
- }
- T2 = std::chrono::steady_clock::now();
- printf("SaturatingMultiply3(): %ld us\n", std::chrono::duration_cast<std::chrono::microseconds>(T2 - T1).count());
- fflush(stdout);
- return int(R);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement