mfgnik

Untitled

Jun 27th, 2020
268
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.58 KB | None | 0 0
  1. #include <algorithm>
  2. #include <cstdint>
  3. #include <optional>
  4. #include <random>
  5. #include <vector>
  6.  
  7.  
  8. class HashFunctor {
  9. public:
  10.     HashFunctor() {};
  11.  
  12.     HashFunctor(uint32_t angle, uint32_t shift, uint32_t prime)
  13.             : angle_coefficient_(angle), shift_coefficient_(shift), modulo_(prime) {};
  14.  
  15.     uint32_t operator()(int key) const;
  16.  
  17. private:
  18.     uint32_t angle_coefficient_;
  19.     uint32_t shift_coefficient_;
  20.     uint32_t modulo_;
  21. };
  22.  
  23. class Bucket {
  24. public:
  25.     void Initialize(const std::vector<int> &numbers, std::mt19937 &generator);
  26.  
  27.     bool Contains(int key) const;
  28.  
  29. private:
  30.     bool CheckHash(const std::vector<int> &numbers);
  31.     HashFunctor hash_function_;
  32.     std::vector<std::optional<int>> cells_;
  33. };
  34.  
  35. class FixedSet {
  36. public:
  37.     void Initialize(const std::vector<int> &numbers);
  38.  
  39.     bool Contains(int key) const;
  40.  
  41.     bool CheckHash(const std::vector<int> &numbers);
  42.  
  43. private:
  44.     HashFunctor hash_function_;
  45.     std::vector<Bucket> buckets_;
  46.     constexpr static int kSeedConst = 42;
  47.     constexpr static int kMemoryCoefficient = 6;
  48. };
  49.  
  50. HashFunctor CreateHashFunction(std::mt19937 &generator);
  51.  
  52. std::vector<size_t> BuildDistributionVector(const HashFunctor &hash,
  53.                                             const std::vector<int> &numbers, uint32_t size);
  54.  
  55. int64_t CalculateSumSquaresOfDistribution(const std::vector<size_t> &distribution);
  56.  
  57. uint32_t HashFunctor::operator()(int key) const {
  58.     auto result = static_cast<uint32_t>(
  59.             (key * angle_coefficient_) % modulo_);
  60.     result += shift_coefficient_;
  61.     result %= modulo_;
  62.     return result;
  63. }
  64.  
  65. HashFunctor CreateHashFunction(std::mt19937 &generator) {
  66.     constexpr int kPrimeConst = 2000000011;
  67.     std::uniform_int_distribution<> random_angle(1, kPrimeConst - 1);
  68.     std::uniform_int_distribution<> random_shift(0, kPrimeConst - 1);
  69.     auto angle = random_angle(generator);
  70.     auto shift = random_shift(generator);
  71.     return HashFunctor(angle, shift, kPrimeConst);
  72. }
  73.  
  74. std::vector<size_t> BuildDistributionVector(const HashFunctor &hash,
  75.                                             const std::vector<int> &numbers, uint32_t size) {
  76.     std::vector < size_t > distribution(size);
  77.     for (auto number : numbers) {
  78.         ++distribution[hash(number) % size];
  79.     }
  80.     return distribution;
  81. }
  82.  
  83. int64_t CalculateSumSquaresOfDistribution(const std::vector<size_t> &distribution) {
  84.     int64_t sum_squares = 0;
  85.     for (auto number : distribution) {
  86.         sum_squares += static_cast<int64_t>(number) * number;
  87.     }
  88.     return sum_squares;
  89. }
  90.  
  91. void Bucket::Initialize(const std::vector<int> &numbers, std::mt19937 &generator) {
  92.     if (numbers.empty()) {
  93.         return;
  94.     }
  95.     size_t size = numbers.size() * numbers.size();
  96.     cells_.resize(size);
  97.     do {
  98.         hash_function_ = CreateHashFunction(generator);
  99.     } while (!CheckHash(numbers));
  100.     for (auto number : numbers) {
  101.         cells_[hash_function_(number) % size] = number;
  102.     }
  103. }
  104.  
  105. bool Bucket::Contains(int key) const {
  106.     if (cells_.empty()) {
  107.         return false;
  108.     }
  109.     auto cell = cells_[hash_function_(key) % cells_.size()];
  110.     return cell == key;
  111. }
  112.  
  113. bool Bucket::CheckHash(const std::vector<int> &numbers) {
  114.     size_t size = cells_.size();
  115.     std::vector < size_t > distribution = BuildDistributionVector(hash_function_, numbers, size);
  116.     return *std::max_element(distribution.begin(), distribution.end()) <= 1;
  117. }
  118.  
  119. void FixedSet::Initialize(const std::vector<int> &numbers) {
  120.     buckets_.clear();
  121.     if (numbers.empty()) {
  122.         return;
  123.     }
  124.     buckets_.resize(numbers.size());
  125.     std::mt19937 random_gen(kSeedConst);
  126.  
  127.     do {
  128.         hash_function_ = CreateHashFunction(random_gen);
  129.     } while (!CheckHash(numbers));
  130.  
  131.     std::vector<std::vector<int>> keys(numbers.size());
  132.     for (auto number : numbers) {
  133.         keys[hash_function_(number) % numbers.size()].push_back(number);
  134.     }
  135.     for (size_t index = 0; index < numbers.size(); ++index) {
  136.         buckets_[index].Initialize(keys[index], random_gen);
  137.     }
  138. }
  139.  
  140.  
  141. bool FixedSet::CheckHash(const std::vector<int> &numbers) {
  142.     std::vector < size_t > distribution =
  143.             BuildDistributionVector(hash_function_, numbers, buckets_.size());
  144.     int64_t sum_of_squares = CalculateSumSquaresOfDistribution(distribution);
  145.     return sum_of_squares < kMemoryCoefficient * static_cast<int64_t >(buckets_.size());
  146. }
  147.  
  148. bool FixedSet::Contains(int key) const {
  149.     if (buckets_.empty()) {
  150.         return false;
  151.     }
  152.     return buckets_[hash_function_(key) % buckets_.size()].Contains(key);
  153. }
Add Comment
Please, Sign In to add comment