Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <set>
- #include <random>
- #include <cassert>
- #include <iostream>
- #include <algorithm>
- class UniqCounter {
- private:
- static constexpr size_t p = 11u;
- static constexpr size_t m = 1u << p;
- static constexpr size_t shift = 32u - p;
- static constexpr double alpha = 0.7213 / (1 + 1.079 / m);
- static constexpr size_t sel = 1u << (14u - p);
- uint32_t M[sel][m];
- public:
- UniqCounter() {
- for (size_t i = 0; i < sel; ++i) {
- for (size_t j = 0; j < m; j++) {
- M[i][j] = 0;
- }
- }
- }
- public:
- void add(int x) {
- uint32_t value = x;
- for (size_t i = 0; i < sel; ++i) {
- value = hash(value);
- uint32_t prefix = value >> shift;
- M[i][prefix] = std::max(M[i][prefix], rank(value));
- }
- }
- int get_uniq_num() const {
- std::vector<double> ar(sel);
- for (size_t i = 0; i < sel; ++i) {
- double sum = 0;
- for (size_t j = 0; j < m; ++j) {
- sum += 1 / pow(2, M[i][j]);
- }
- double E = alpha * m * m / sum;
- if (E <= 2.5 * m) {
- size_t V = 0;
- for (size_t j = 0; j < m; ++j) {
- if (M[i][j] == 0)
- V++;
- }
- if (V != 0) {
- E = m * log(m / (double) V);
- }
- }
- ar[i] = E;
- }
- double E = 0;
- std::sort(ar.begin(), ar.end());
- for (size_t i = sel / 4u;
- i < 3u * sel / 4u; ++i) { //if selections count divide by 4, everything is ok here
- E += ar[i];
- }
- return E / (sel / 2u);
- }
- private:
- uint32_t rank(uint32_t x) {
- uint32_t r = 1;
- while ((x & 1u) == 0 && r <= 32u - p) {
- ++r;
- x >>= 1u;
- }
- return r;
- }
- uint32_t hash(uint32_t x) {
- x = ((x >> 16) ^ x) * 0x119de1f3;
- x = ((x >> 16) ^ x) * 0x119de1f3;
- x = ((x >> 16) ^ x) * 0x119de1f3;
- x = ((x >> 16) ^ x) * 0x119de1f3;
- x = (x >> 16) ^ x;
- return x;
- }
- };
- double relative_error(int expected, int got) {
- return abs(got - expected) / (double) expected;
- }
- void BLOCK(const std::string &block_name) {
- std::cout << "BLOCK: " << block_name << ": " << std::endl;
- }
- int main() {
- {
- std::random_device rd;
- std::mt19937 gen(rd());
- const int N = (int) 1e6;
- for (int k : {1, 10, 1000, 10000, N / 10, N, N * 10}) {
- std::uniform_int_distribution<> dis(1, k);
- std::set<int> all;
- UniqCounter counter;
- for (int i = 0; i < N; i++) {
- int value = dis(gen);
- all.insert(value);
- counter.add(value);
- }
- int expected = (int) all.size();
- int counter_result = counter.get_uniq_num();
- double error = relative_error(expected, counter_result);
- printf("%d numbers in range [1 .. %d], %d uniq, %d result, %.5f relative error\n", N, k, expected,
- counter_result, error);
- assert(error <= 0.1);
- }
- }
- {
- BLOCK("Just random numbers");
- std::random_device rd;
- const int N = (int) 1e6;
- for (int i = 0; i < 4; ++i) {
- std::set<int> all;
- UniqCounter counter;
- for (int j = 0; j < N; j++) {
- int value = rd();
- all.insert(value);
- counter.add(value);
- }
- int expected = (int) all.size();
- int counter_result = counter.get_uniq_num();
- double error = relative_error(expected, counter_result);
- printf("%d numbers in range [%d .. %d], %d uniq, %d result, %.5f relative error\n", N,
- std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), expected,
- counter_result, error);
- assert(error <= 0.1);
- }
- }
- {
- //it is so long test
- BLOCK("Many insertions");
- std::random_device rd;
- const int N = std::numeric_limits<int>::max();
- UniqCounter counter;
- for (int i = 0; i < N; i++) {
- // if (i % (int) 1e7 == 0) //to see progress
- // std::cout << i / 1e7 << std::endl;
- counter.add(rd());
- }
- int expected = N;
- int counter_result = counter.get_uniq_num();
- double error = relative_error(expected, counter_result);
- printf("%d numbers in range [%d .. %d], ~%d uniq, %d result, %.5f relative error\n", N,
- std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), expected,
- counter_result, error);
- assert(error <= 0.1);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement