Untitled

#include <chrono>
#include <cstdlib>
#include <limits>
#include <type_traits>

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

// ** Broken **
// template <typename T>
// T SaturatingMultiply1(T X, T Y) {
//   // Hacker's Delight, p. 30
//   T Z = X * Y;
//   if (Y != 0 && Z / Y != X)
//     return std::numeric_limits<T>::max();
//   else
//     return Z;
// }

template <typename T>
T SaturatingMultiply2(T X, T Y) {
  const T Max = std::numeric_limits<T>::max();
  if (Y != 0 && X >= Max / Y) {
    return Max;
  } else {
    return X * Y;
  }
}

template <typename T>
T SaturatingAdd(T X, T Y) {
  // Hacker's Delight, p. 29
  T Z = X + Y;
  if (Z < X || Z < Y)
    return std::numeric_limits<T>::max();
  else
    return Z;
}

enum ZeroBehavior {
  /// \brief The returned value is undefined.
  ZB_Undefined,
  /// \brief The returned value is numeric_limits<T>::max()
  ZB_Max,
  /// \brief The returned value is numeric_limits<T>::digits
  ZB_Width
};

namespace detail {
template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
  static std::size_t count(T Val, ZeroBehavior) {
    if (!Val)
      return std::numeric_limits<T>::digits;

    // Bisection method.
    std::size_t ZeroBits = 0;
    for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
      T Tmp = Val >> Shift;
      if (Tmp)
        Val = Tmp;
      else
        ZeroBits |= Shift;
    }
    return ZeroBits;
  }
};

#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
  static std::size_t count(T Val, ZeroBehavior ZB) {
    if (ZB != ZB_Undefined && Val == 0)
      return 32;

#if __has_builtin(__builtin_clz)

#elif defined(_MSC_VER)
    unsigned long Index;
    _BitScanReverse(&Index, Val);
    return Index ^ 31;
#endif
  }
};

#if !defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 8> {
  static std::size_t count(T Val, ZeroBehavior ZB) {
    if (ZB != ZB_Undefined && Val == 0)
      return 64;

#if __has_builtin(__builtin_clzll)
    return __builtin_clzll(Val);
#elif defined(_MSC_VER)
    unsigned long Index;
    _BitScanReverse64(&Index, Val);
    return Index ^ 63;
#endif
  }
};
#endif
#endif
} // namespace detail

template <typename T>
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
  static_assert(std::numeric_limits<T>::is_integer &&
                    !std::numeric_limits<T>::is_signed,
                "Only unsigned integral types are allowed.");
  return detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
}

/// Log2_64 - This function returns the floor log base 2 of the specified value,
/// -1 if the value is zero. (64 bit edition.)
inline unsigned Log2_64(uint64_t Value) {
  return 63 - countLeadingZeros(Value);
}

template <typename T>
T SaturatingMultiply3(T X, T Y) {
  // Hacker's Delight, p. 30 has a different algorithm, but
  // we don't use that because it fails for uint16_t (where
  // multiplcation can have UB due to promotion to int), and
  // requires a division in addition to the multiplication.

  // Log2(Z) would be either Log2Z or Log2Z + 1.
  // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
  // will necessarily be less than Log2Max as desired.
  int Log2Z = Log2_64(X) + Log2_64(Y);
  T Max = std::numeric_limits<T>::max();
  int Log2Max = Log2_64(Max);
  if (Log2Z < Log2Max)
    return X * Y;
  if (Log2Z > Log2Max)
    return Max;

  // We're going to use the top bit, and maybe overflow one
  // bit past it. Multiply all but the bottom bit then add
  // that on at the end.
  T Z = (X >> 1) * Y;
  if (Z & ~(std::numeric_limits<T>::max() >> 1))
    return Max;
  Z <<= 1;
  return (X & 1) ? SaturatingAdd(Z, Y) : Z;
}

int main(int argc, char **argv)
{
  if (argc != 3) {
    fprintf(stderr, "usage: %s <start> <end>\n", argv[0]);
    return -1;
  }

  uint64_t X1 = strtoull(argv[1], NULL, 10);
  uint64_t X2 = strtoull(argv[2], NULL, 10);

  std::chrono::steady_clock::time_point T1, T2;

  uint64_t Ellapsed2, Ellapsed3;

  uint64_t R = 0;

  T1 = std::chrono::steady_clock::now();
  for (uint64_t X = X1 ; X <= X2; X++) {
    R += SaturatingMultiply2(X, X);
  }
  T2 = std::chrono::steady_clock::now();
  printf("SaturatingMultiply2(): %ld us\n", std::chrono::duration_cast<std::chrono::microseconds>(T2 - T1).count());
  fflush(stdout);

  T1 = std::chrono::steady_clock::now();
  for (uint64_t X = X1 ; X <= X2; X++) {
    R += SaturatingMultiply3(X, X);
  }
  T2 = std::chrono::steady_clock::now();
  printf("SaturatingMultiply3(): %ld us\n", std::chrono::duration_cast<std::chrono::microseconds>(T2 - T1).count());
  fflush(stdout);

  return int(R);
}