Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <set>
- #include <random>
- #include <cassert>
- #include <climits>
- #include <cstdlib>
- #include <hash_fun.h>
- #include <iostream>
- #include <cmath>
- class UniqCounter {
- public:
- UniqCounter() {
- array.assign(size, 0);
- }
- void add(int x) {
- unsigned int hash = my_hash(x);
- unsigned int mask = hash >> (32 - K);
- array[mask] = std::max(array[mask], phi(hash));
- }
- int get_uniq_num() const {
- double c = 0;
- double res = alpha * size * size;
- for(size_t i = 0; i < size; i++) {
- c += 1.0 / (1LL << array[i]);
- }
- res = res / c;
- if (res <= 2.5 * size) {
- int delta = 0;
- for (size_t i = 0; i < size; ++i) {
- delta += array[i] == 0;
- }
- if (delta > 0) {
- res = size * log((1.0 * size) / delta);
- }
- }
- else if (res > 1.0 / 30 * (1LL << 32)) {
- res = -(1LL << 32) * log(1 - res / (1LL << 32));
- }
- return static_cast<int>(res);
- }
- private:
- const size_t K = 10;
- long long size = (1 << K);
- double alpha = 0.7213 / (1 + 1.079 / size);
- std::vector<long long> array;
- unsigned int my_hash(int number) {
- unsigned int key = number;
- key += ~(key << 16);
- key ^= (key >> 5);
- key += (key << 3);
- key ^= (key >> 13);
- key += ~(key << 9);
- key ^= (key >> 17);
- return key;
- }
- long long phi(int x) {
- long long res = 0;
- while(((x >> res) & 1) == 0 && res <= (32 - K)) {
- res++;
- }
- return res + 1;
- }
- };
- double relative_error(int expected, int got) {
- return abs(got - expected) / (double) expected;
- }
- int main() {
- std::random_device rd;
- std::mt19937 gen(rd());
- const int N = (int) 1e6;
- for (int k : {1, 10, 1000, 10000, N / 10, N, N * 10}) {
- std::uniform_int_distribution<> dis(1, k);
- std::set<int> all;
- UniqCounter counter;
- for (int i = 0; i < N; i++) {
- int value = dis(gen);
- all.insert(value);
- counter.add(value);
- }
- int expected = (int) all.size();
- int counter_result = counter.get_uniq_num();
- double error = relative_error(expected, counter_result);
- printf("%d numbers in range [1 .. %d], %d uniq, %d result, %.5f relative error\n", N, k, expected, counter_result, error);
- assert(error <= 0.1);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement