Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "stdafx.h"
- #include <algorithm>
- #include <vector>
- #include <stdint.h>
- #include <chrono>
- #include <thread>
- #include <stdio.h>
- using namespace std::chrono;
- /// XorShift32 generator; extremely fast, 2^32-1 period, way better quality
- /// than LCG but fail some test suites
- struct XorShift32 {
- /// This stuff allows to use this class wherever a library function
- /// requires a UniformRandomBitGenerator (e.g. std::shuffle)
- typedef uint32_t result_type;
- static uint32_t min() { return 1; }
- static uint32_t max() { return uint32_t(-1); }
- /// PRNG state
- uint32_t y;
- /// Initializes with seed
- XorShift32(uint32_t seed = 0) : y(seed) {
- if (y == 0) y = 2463534242UL;
- }
- /// Returns a value in the range [1, 1<<32)
- uint32_t operator()() {
- y ^= (y << 13);
- y ^= (y >> 17);
- y ^= (y << 15);
- return y;
- }
- /// Returns a value in the range [0, limit); this conforms to the RandomFunc
- /// requirements for std::random_shuffle
- uint32_t operator()(uint32_t limit) {
- return (*this)() % limit;
- }
- };
- uint8_t lookup(std::vector<uint8_t>& main_arr, std::vector<uint32_t>& sec_arr, unsigned idx) {
- // extract the 2 bits of our interest from the main array
- uint8_t v = (main_arr[idx >> 2] >> (2 * (idx & 3))) & 3;
- // usual (likely) case: value between 0 and 2
- if (v != 3) return v;
- // bad case: lookup the index<<8 in the secondary array
- // lower_bound finds the first >=, so we don't need to mask out the value
- auto ptr = std::lower_bound(sec_arr.begin(), sec_arr.end(), idx << 8);
- #ifdef _DEBUG
- // some coherency checks
- if (ptr == sec_arr.end()) std::abort();
- if ((*ptr >> 8) != idx) std::abort();
- #endif
- // extract our 8-bit value from the 32 bit (index, value) thingie
- return (*ptr) & 0xff;
- }
- void populate(std::vector<uint8_t>& main_arr, std::vector<uint32_t>& sec_arr, uint8_t *source, size_t size) {
- main_arr.clear(); sec_arr.clear();
- // size the main storage (round up)
- main_arr.resize((size + 3) / 4);
- for (size_t idx = 0; idx < size; ++idx) {
- uint8_t in = source[idx];
- uint8_t &target = main_arr[idx >> 2];
- // if the input doesn't fit, cap to 3 and put in secondary storage
- if (in >= 3) {
- // top 24 bits: index; low 8 bit: value
- sec_arr.push_back((idx << 8) | in);
- in = 3;
- }
- // store in the target according to the position
- target |= in << ((idx & 3) * 2);
- }
- }
- volatile unsigned out;
- int all = 0;
- int all_count = 0;
- int RunThreadTest() {
- std::vector<uint8_t> main_arr;
- std::vector<uint32_t> sec_arr;
- XorShift32 xs;
- std::vector<uint8_t> vec;
- int size = 10000000;
- for (int i = 0; i < size; ++i) {
- uint32_t v = xs();
- if (v < 1825361101) v = 0; // 42.5%
- else if (v < 4080218931) v = 1; // 95.0%
- else if (v < 4252017623) v = 2; // 99.0%
- else {
- while ((v & 0xff) < 3) v = xs();
- }
- vec.push_back(v);
- }
- populate(main_arr, sec_arr, vec.data(), vec.size());
- std::this_thread::sleep_for(2s);
- printf("Start Benchmark\n");
- for (int i = 0; i < 10; ++i) {
- unsigned o = 0;
- auto beg = high_resolution_clock::now();
- for (int i = 0; i < size; ++i) {
- o += lookup(main_arr, sec_arr, xs() % size);
- }
- out += o;
- printf("lookup: %10d µs\n", int((high_resolution_clock::now() - beg) / microseconds(1)));
- //o = 0;
- //beg = high_resolution_clock::now();
- //for (int i = 0; i < size; ++i) {
- // o += vec[xs() % size];
- //}
- //out += o;
- //printf("array: %10d µs\n", int((high_resolution_clock::now() - beg) / microseconds(1)));
- all += int((high_resolution_clock::now() - beg) / microseconds(1));
- all_count++;
- }
- printf("array all: %10d µs\n", all / all_count);
- }
- int main() {
- const int numThreads = 6;
- std::thread tt[numThreads];
- for (int t = 0; t < numThreads; t++) {
- tt[t] = std::thread(RunThreadTest);
- }
- for (int t = 0; t < numThreads; t++) {
- tt[t].join();
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement