Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <cstdint>
- #include <optional>
- #include <random>
- #include <vector>
- class HashFunctor {
- public:
- HashFunctor() {};
- HashFunctor(uint32_t angle, uint32_t shift, uint32_t prime)
- : angle_coefficient_(angle), shift_coefficient_(shift), modulo_(prime) {};
- uint32_t operator()(int key) const;
- private:
- uint32_t angle_coefficient_;
- uint32_t shift_coefficient_;
- uint32_t modulo_;
- };
- class Bucket {
- public:
- void Initialize(const std::vector<int> &numbers, std::mt19937 &generator);
- bool Contains(int key) const;
- private:
- bool CheckHash(const std::vector<int> &numbers);
- HashFunctor hash_function_;
- std::vector<std::optional<int>> cells_;
- };
- class FixedSet {
- public:
- void Initialize(const std::vector<int> &numbers);
- bool Contains(int key) const;
- bool CheckHash(const std::vector<int> &numbers);
- private:
- HashFunctor hash_function_;
- std::vector<Bucket> buckets_;
- constexpr static int kSeedConst = 42;
- constexpr static int kMemoryCoefficient = 6;
- };
- HashFunctor CreateHashFunction(std::mt19937 &generator);
- std::vector<size_t> BuildDistributionVector(const HashFunctor &hash,
- const std::vector<int> &numbers, uint32_t size);
- int64_t CalculateSumSquaresOfDistribution(const std::vector<size_t> &distribution);
- uint32_t HashFunctor::operator()(int key) const {
- auto result = static_cast<uint32_t>(
- (key * angle_coefficient_) % modulo_);
- result += shift_coefficient_;
- result %= modulo_;
- return result;
- }
- HashFunctor CreateHashFunction(std::mt19937 &generator) {
- constexpr int kPrimeConst = 2000000011;
- std::uniform_int_distribution<> random_angle(1, kPrimeConst - 1);
- std::uniform_int_distribution<> random_shift(0, kPrimeConst - 1);
- auto angle = random_angle(generator);
- auto shift = random_shift(generator);
- return HashFunctor(angle, shift, kPrimeConst);
- }
- std::vector<size_t> BuildDistributionVector(const HashFunctor &hash,
- const std::vector<int> &numbers, uint32_t size) {
- std::vector < size_t > distribution(size);
- for (auto number : numbers) {
- ++distribution[hash(number) % size];
- }
- return distribution;
- }
- int64_t CalculateSumSquaresOfDistribution(const std::vector<size_t> &distribution) {
- int64_t sum_squares = 0;
- for (auto number : distribution) {
- sum_squares += static_cast<int64_t>(number) * number;
- }
- return sum_squares;
- }
- void Bucket::Initialize(const std::vector<int> &numbers, std::mt19937 &generator) {
- if (numbers.empty()) {
- return;
- }
- size_t size = numbers.size() * numbers.size();
- cells_.resize(size);
- do {
- hash_function_ = CreateHashFunction(generator);
- } while (!CheckHash(numbers));
- for (auto number : numbers) {
- cells_[hash_function_(number) % size] = number;
- }
- }
- bool Bucket::Contains(int key) const {
- if (cells_.empty()) {
- return false;
- }
- auto cell = cells_[hash_function_(key) % cells_.size()];
- return cell == key;
- }
- bool Bucket::CheckHash(const std::vector<int> &numbers) {
- size_t size = cells_.size();
- std::vector < size_t > distribution = BuildDistributionVector(hash_function_, numbers, size);
- return *std::max_element(distribution.begin(), distribution.end()) <= 1;
- }
- void FixedSet::Initialize(const std::vector<int> &numbers) {
- buckets_.clear();
- if (numbers.empty()) {
- return;
- }
- buckets_.resize(numbers.size());
- std::mt19937 random_gen(kSeedConst);
- do {
- hash_function_ = CreateHashFunction(random_gen);
- } while (!CheckHash(numbers));
- std::vector<std::vector<int>> keys(numbers.size());
- for (auto number : numbers) {
- keys[hash_function_(number) % numbers.size()].push_back(number);
- }
- for (size_t index = 0; index < numbers.size(); ++index) {
- buckets_[index].Initialize(keys[index], random_gen);
- }
- }
- bool FixedSet::CheckHash(const std::vector<int> &numbers) {
- std::vector < size_t > distribution =
- BuildDistributionVector(hash_function_, numbers, buckets_.size());
- int64_t sum_of_squares = CalculateSumSquaresOfDistribution(distribution);
- return sum_of_squares < kMemoryCoefficient * static_cast<int64_t >(buckets_.size());
- }
- bool FixedSet::Contains(int key) const {
- if (buckets_.empty()) {
- return false;
- }
- return buckets_[hash_function_(key) % buckets_.size()].Contains(key);
- }
Add Comment
Please, Sign In to add comment