Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <algorithm>
- #include <random>
- #include <chrono>
- #include <cstring>
- #include <iomanip>
- #include <array>
- #include <intrin.h>
- using T = int;
- const int NUM_ELEMENTS = 32;
- const int NUM_TRIALS = 10;
- const int NUM_REPEATS = 10'000'000;
- #if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
- #define COMPILER_INTEL
- #elif defined(__clang__)
- #define COMPILER_CLANG
- #elif defined(_MSC_VER)
- #define COMPILER_MSVC
- #elif defined(__GNUC__)
- #define COMPILER_GCC
- #endif
- #if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
- #define ALWAYS_INLINE inline __attribute__((__always_inline__))
- #else
- #define ALWAYS_INLINE __forceinline
- #endif
- class CpuTimer {
- #if defined(COMPILER_MSVC)
- unsigned int dummyTscAuxMsvc;
- uint64_t startCyclesMsvc = 0;
- uint64_t endCyclesMsvc = 0;
- #else
- uint32_t startCyclesLow = 0;
- uint32_t startCyclesHigh = 0;
- uint32_t endCyclesLow = 0;
- uint32_t endCyclesHigh = 0;
- #endif
- public:
- uint64_t minMeasurementOverhead = 0;
- uint64_t medianMeasurementOverhead = 0;
- CpuTimer() {
- calculateMeasurementOverhead();
- }
- ALWAYS_INLINE void start() {
- #if defined(COMPILER_MSVC)
- startCyclesMsvc = __rdtsc();
- #else
- asm volatile (
- "CPUID\n\t"
- "RDTSC\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t"
- : "=r" (startCyclesHigh), "=r" (startCyclesLow)
- :: "%rax", "%rbx", "%rcx", "%rdx"
- );
- #endif
- }
- ALWAYS_INLINE void end() {
- #if defined(COMPILER_MSVC)
- endCyclesMsvc = __rdtscp(&dummyTscAuxMsvc);
- #else
- asm volatile (
- "RDTSCP\n\t"
- "mov %%edx, %0\n\t"
- "mov %%eax, %1\n\t"
- "CPUID\n\t"
- : "=r" (endCyclesHigh), "=r" (endCyclesLow)
- :: "%rax", "%rbx", "%rcx", "%rdx"
- );
- #endif
- }
- uint64_t getTime() {
- #if defined(COMPILER_MSVC)
- auto start = startCyclesMsvc;
- auto end = endCyclesMsvc;
- #else
- auto start = ((uint64_t(startCyclesHigh) << 32) | startCyclesLow);
- auto end = ((uint64_t(endCyclesHigh) << 32) | endCyclesLow);
- #endif
- return end - start - minMeasurementOverhead;
- }
- void calculateMeasurementOverhead(int64_t sampleCount = 1'000'000) {
- std::vector<uint64_t> times;
- for (int j = 0; j < sampleCount; ++j) {
- start();
- end();
- times.push_back(getTime());
- }
- std::sort(times.begin(), times.end());
- minMeasurementOverhead = times.front();
- medianMeasurementOverhead = times[times.size() / 2];
- }
- };
- template<typename Function, typename T> int runBenchmark(CpuTimer& timer, Function testedFunction, const std::vector<T>& data, bool checkIfCorrectOutput) {
- std::vector<int> executionTimes;
- for (int i = 1; i <= NUM_REPEATS; ++i) {
- auto copiedData = data;
- timer.start();
- testedFunction(copiedData, 0, NUM_ELEMENTS - 1);
- timer.end();
- executionTimes.push_back(timer.getTime());
- if (checkIfCorrectOutput) {
- bool sorted = true;
- for (size_t i = 0; i < copiedData.size() - 1; ++i) {
- if (copiedData[i] > copiedData[i + 1]) {
- sorted = false;
- break;
- }
- }
- if (!sorted) {
- std::cerr << "Error: Array not sorted on trial 1!" << std::endl;
- }
- }
- }
- std::sort(executionTimes.begin(), executionTimes.end());
- return executionTimes.front();
- }
- /**
- * Applies the specific rule requested for subarrays of size 3.
- */
- void apply_three_element_rule(std::vector<T>& arr, int low, int high) {
- // The check for size 3 (high - low == 2) is assumed to pass
- // because the calling loop ensures it.
- T pivot = arr[low];
- T third_num = arr[high];
- if (third_num <= pivot) {
- // Swap all 3 to make pivot middle, third_num first
- T temp_mid = arr[low + 1];
- arr[high] = temp_mid;
- arr[low + 1] = pivot;
- arr[low] = third_num;
- } else {
- // Swap all 3 to make pivot middle, third_num last
- T temp_mid = arr[low + 1];
- arr[low] = temp_mid;
- arr[low + 1] = pivot;
- arr[high] = third_num;
- }
- }
- void insertionSortMonty(std::vector<T>& arr, int low, int high) {
- if (NUM_ELEMENTS >= 3) {
- for (int i = 0; i <= NUM_ELEMENTS - 3; i += 3) {
- apply_three_element_rule(arr, i, i + 2);
- }
- }
- for (int i = low + 1; i <= high; ++i) {
- int key = arr[i];
- int j = i - 1;
- while (j >= low && arr[j] > key) {
- arr[j + 1] = arr[j];
- j = j - 1;
- }
- arr[j + 1] = key;
- }
- }
- void insertionSort(std::vector<T>& arr, int low, int high) {
- for (int i = low + 1; i <= high; ++i) {
- int key = arr[i];
- int j = i - 1;
- while (j >= low && arr[j] > key) {
- arr[j + 1] = arr[j];
- j = j - 1;
- }
- arr[j + 1] = key;
- }
- }
- int main() {
- CpuTimer timer;
- std::cout << "Elements per array : " << NUM_ELEMENTS << std::endl;
- std::cout << "Total Trials : " << NUM_TRIALS << std::endl;
- std::cout << "Repeats per Trial : " << NUM_REPEATS << std::endl;
- std::cout << "minTimerOverhead : " << timer.minMeasurementOverhead << std::endl;
- std::cout << "medianTimerOverhead : " << timer.medianMeasurementOverhead << std::endl;
- std::mt19937 gen(0x1234567);
- std::uniform_int_distribution<> distrib(1, 10000000);
- std::vector<T> data(NUM_ELEMENTS);
- std::cout << std::left;
- std::cout << std::setw(10) << "Trial nr" << " | "
- << std::setw(20) << "Normal [cycles]" << " | "
- << std::setw(20) << "Monty [cycles]" << "\n";
- int sumOfMinTimesNormal = 0;
- int sumOfMinTimesMonty = 0;
- for (int t = 1; t <= NUM_TRIALS; ++t) {
- for (int i = 0; i < NUM_ELEMENTS; ++i) {
- data[i] = distrib(gen);
- }
- int minTimeNormal = runBenchmark(timer, insertionSort, data, true);
- int minTimeMonty = runBenchmark(timer, insertionSortMonty, data, true);
- std::cout << std::setw(10) << t << " | "
- << std::setw(20) << minTimeNormal << " | "
- << std::setw(20) << minTimeMonty << "\n";
- sumOfMinTimesNormal += minTimeNormal;
- sumOfMinTimesMonty += minTimeMonty;
- }
- std::cout << std::setw(10) << "Total" << " | "
- << std::setw(20) << sumOfMinTimesNormal << " | "
- << std::setw(20) << sumOfMinTimesMonty << "\n";
- std::cout << std::setw(30) << "Monty / Normal " << " = " << double(sumOfMinTimesMonty) / sumOfMinTimesNormal << '\n';
- }
Advertisement
Add Comment
Please, Sign In to add comment