Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * This program compares performance of
- * cpp random, c random and intel rdrand random generator functions.
- *
- * Compile with GCC or clang with -DNDEBUG -O2 -m64 -mrdrand
- */
- #include <stdlib.h>
- #include <random>
- #include <iostream>
- #include <immintrin.h>
- #include <thread>
- #include <cpuid.h>
- #include <x86intrin.h>
- #include <cinttypes>
- /* Performance benchmark taken from here
- * https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf
- * page 17-18
- */
- #if defined(__unix__)
- #define USE_ASM_RDTSC
- #endif
- static const uint64_t N_RUNS = 10000000;
- #if defined(USE_ASM_RDTSC)
- struct perf_timer {
- __attribute__((always_inline))
- void sample_start() {
- asm volatile(
- "CPUID\n\t"
- "RDTSC\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t": "=r" (high0), "=r" (low0)::
- "%rax", "%rbx", "%rcx", "%rdx");
- }
- __attribute__((always_inline))
- void sample_stop() {
- asm volatile(
- "RDTSCP\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t"
- "CPUID\n\t": "=r" (high1), "=r"
- (low1)::"%rax", "%rbx", "%rcx", "%rdx");
- }
- uint64_t cycles() const {
- unsigned int low = low1 - low0;
- unsigned int high = high1 - high0;
- return (uint64_t) high << 32 | low;
- }
- private:
- unsigned int low0, high0, low1, high1;
- };
- #else
- struct perf_timer {
- __attribute__((always_inline))
- void sample_start() {
- unsigned int a = 0, b = 0, c = 0, d = 0;
- __cpuid(0, a, b, c, d);
- start = __rdtsc();
- }
- __attribute__((always_inline))
- void sample_stop() {
- unsigned int a = 0, b = 0, c = 0, d = 0;
- stop = __rdtscp(&a);
- a = 0;
- __cpuid(0, a, b, c, d);
- }
- uint64_t cycles() const {
- return stop - start;
- }
- private:
- uint64_t start, stop;
- };
- #endif
- void warup_cache()
- {
- perf_timer tm;
- tm.sample_start();
- tm.sample_stop();
- tm.sample_start();
- tm.sample_stop();
- tm.sample_start();
- tm.sample_stop();
- tm.sample_start();
- tm.sample_stop();
- }
- __attribute__((used))
- int rand_cpp(std::uniform_int_distribution<int>& dis, std::mt19937& gen)
- {
- return dis(gen);
- }
- __attribute__((noinline))
- void test_cpprand()
- {
- std::random_device rd;
- std::mt19937 gen(rd());
- std::uniform_int_distribution<int> dis(0, INT32_MAX);
- perf_timer tm;
- warup_cache();
- tm.sample_start();
- for (uint64_t n = 0; n < N_RUNS; ++n)
- rand_cpp(dis, gen);
- tm.sample_stop();
- uint64_t cycles = tm.cycles();
- printf("C++ uniform distribution %'" PRIu64 " runs\n"
- "Total exec cycles: %'" PRIu64 "\n"
- "Avg cycles/run : %'" PRIu64 "\n",
- N_RUNS,
- cycles,
- (cycles / N_RUNS));
- }
- __attribute__((used))
- int rand_rdrand_fair()
- {
- unsigned int r;
- while(!_rdrand32_step(&r));
- return r;
- }
- __attribute__((used))
- int rand_rdrand_unfair()
- {
- unsigned int r;
- _rdrand32_step(&r);
- return r;
- }
- __attribute__((noinline))
- void test_rdrand_fair()
- {
- perf_timer tm;
- warup_cache();
- tm.sample_start();
- for (uint64_t n = 0; n < N_RUNS; ++n)
- rand_rdrand_fair();
- tm.sample_stop();
- uint64_t cycles = tm.cycles();
- printf("RDRAND fair rand %'" PRIu64 " runs\n"
- "Total exec cycles: %'" PRIu64 "\n"
- "Avg cycles/run : %'" PRIu64 "\n",
- N_RUNS,
- cycles,
- (cycles / N_RUNS));
- }
- __attribute__((noinline))
- void test_rdrand_unfair()
- {
- perf_timer tm;
- warup_cache();
- tm.sample_start();
- for (uint64_t n = 0; n < N_RUNS; ++n)
- rand_rdrand_unfair();
- tm.sample_stop();
- uint64_t cycles = tm.cycles();
- printf("RDRAND unfair rand %'" PRIu64 " runs\n"
- "Total exec cycles: %'" PRIu64 "\n"
- "Avg cycles/run : %'" PRIu64 "\n",
- N_RUNS,
- cycles,
- (cycles / N_RUNS));
- }
- __attribute__((used))
- int rand_c()
- {
- return rand();
- }
- __attribute__((noinline))
- void test_crand()
- {
- perf_timer tm;
- warup_cache();
- tm.sample_start();
- for (uint64_t n = 0; n < N_RUNS; ++n)
- rand_c();
- tm.sample_stop();
- uint64_t cycles = tm.cycles();
- printf("C rand %'" PRIu64 " runs\n"
- "Total exec cycles: %'" PRIu64 "\n"
- "Avg cycles/run : %'" PRIu64 "\n",
- N_RUNS,
- cycles,
- (cycles / N_RUNS));
- }
- /* Should return number of clocks
- * approximately equal to the CPU frequency.
- */
- __attribute__((noinline))
- void trace_timer_resulution()
- {
- perf_timer tm;
- warup_cache();
- tm.sample_start();
- std::this_thread::sleep_for(std::chrono::seconds(1));
- tm.sample_stop();
- printf("[Trace timer]: Clocks per second %'" PRIu64 "\n", tm.cycles() );
- }
- int main()
- {
- // locale for thousands separator
- setlocale(LC_ALL, "");
- trace_timer_resulution();
- test_cpprand();
- test_crand();
- test_rdrand_fair();
- test_rdrand_unfair();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement