Advertisement
froleyks

sum.cpp

Dec 4th, 2018
184
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.03 KB | None | 0 0
  1. #include <atomic>
  2. #include <chrono>
  3. #include <functional>
  4. #include <limits>
  5. #include <memory>
  6. #include <omp.h>
  7. #include <random>
  8.  
  9. #include "utils/commandline.hpp"
  10.  
  11. #define CACHE_LINE_SIZE 64
  12.  
  13. void randomize(size_t *array, size_t length) {
  14.   std::mt19937_64 re(1092347087309ull);
  15.   std::uniform_int_distribution<uint64_t> dis(
  16.       0, std::numeric_limits<size_t>::max() / length);
  17.   for (size_t i = 0; i < length; ++i) {
  18.     array[i] = dis(re);
  19.   }
  20. }
  21.  
  22. size_t addUp_sequential(size_t *array, size_t length) {
  23.   size_t sum = 0;
  24.   for (size_t i = 0; i < length; ++i) {
  25.     sum += array[i];
  26.   }
  27.   return sum;
  28. }
  29.  
  30. size_t addUp_atomic(size_t *array, size_t length) {
  31.   std::atomic_size_t current_sum = 0;
  32.  
  33. #pragma omp parallel for
  34.   for (size_t i = 0; i < length; ++i) {
  35.     current_sum.fetch_add(array[i]);
  36.   }
  37.   return current_sum.load();
  38. }
  39.  
  40. size_t addUp_false_sharing(size_t *array, size_t length) {
  41.   // size_t maxNumThreads = omp_get_max_threads();
  42.   size_t maxNumThreads = omp_
  43.   std::vector<size_t> partial_sum(maxNumThreads, 0);
  44.  
  45.   // std::cout << alignof(partial_sum) << std::endl;
  46. #pragma omp parallel
  47.   {
  48.     size_t id = omp_get_thread_num();
  49. #pragma omp for
  50.     for (size_t i = 0; i < length; ++i) {
  51.       partial_sum[id] += array[i];
  52.     }
  53.   }
  54.  
  55.   size_t sum = 0;
  56.   for (size_t i = 0; i < maxNumThreads; ++i) {
  57.     sum += partial_sum[i];
  58.   }
  59.   return sum;
  60. }
  61.  
  62. size_t addUp_fixed(size_t *array, size_t length) {
  63.   size_t maxNumThreads = omp_get_max_threads();
  64.  
  65.   struct padded_size_t {
  66.     size_t value;
  67.     char padding[CACHE_LINE_SIZE - sizeof(size_t)];
  68.  
  69.     padded_size_t(size_t value) {
  70.       this->value = value;
  71.     }
  72.   };
  73.   alignas(CACHE_LINE_SIZE) std::vector<padded_size_t> partial_sum(maxNumThreads,
  74.                                                                   {0});
  75.   // std::cout << sizeof(partial_sum[0]) << std::endl;
  76.  
  77. #pragma omp parallel
  78.   {
  79.     size_t id = omp_get_thread_num();
  80. #pragma omp for
  81.     for (size_t i = 0; i < length; ++i) {
  82.       partial_sum[id].value += array[i];
  83.     }
  84.   }
  85.  
  86.   size_t sum = 0;
  87.   for (size_t i = 0; i < maxNumThreads; ++i) {
  88.     sum += partial_sum[i].value;
  89.   }
  90.   return sum;
  91. }
  92.  
  93. struct paddedSize {
  94.   size_t l;
  95.   char pad[CACHE_LINE_SIZE - sizeof(size_t)];
  96. };
  97.  
  98. size_t addUp_private(size_t *array, size_t length) {
  99.   size_t sum = 0;
  100.   size_t local_sum = 0;
  101. #pragma omp parallel firstprivate(local_sum) shared(sum)
  102.   {
  103. #pragma omp for
  104.     for (size_t i = 0; i < length; ++i) {
  105.       local_sum += array[i];
  106.     }
  107.  
  108. #pragma omp atomic
  109.     sum += local_sum;
  110.   }
  111.   return sum;
  112. }
  113.  
  114. size_t addUp_the_omp_way(size_t *array, size_t length) {
  115.   size_t sum = 0;
  116. #pragma omp parallel
  117.   {
  118.     size_t local_sum = 0;
  119. // #pragma omp for schedule(static, 10)
  120. #pragma omp for
  121.     for (size_t i = 0; i < length; ++i) {
  122.       local_sum += array[i];
  123.     }
  124.  
  125. #pragma omp atomic
  126.     sum += local_sum;
  127.   }
  128.   return sum;
  129. }
  130.  
  131. size_t addUp_reduction(size_t *array, size_t length) {
  132.   size_t sum = 0;
  133. #pragma omp parallel for reduction(+ : sum)
  134.   for (size_t i = 0; i < length; ++i) {
  135.     sum += array[i];
  136.   }
  137.   return sum;
  138. }
  139.  
  140. template <class T> void print(const T &t, size_t w) {
  141.   std::cout.width(w);
  142.   std::cout << t << " " << std::flush;
  143. }
  144.  
  145. void print_headline() {
  146.   print("#it", 3);
  147.   print("n", 12);
  148.   print("sum", 22);
  149.   print("p", 3);
  150.   print("name", 25);
  151.   print("time", 13);
  152.   std::cout << std::endl;
  153. }
  154.  
  155. void print_timing(size_t it, size_t n, size_t sum, size_t p, std::string name,
  156.                   size_t time) {
  157.   print(it, 3);
  158.   print(n, 12);
  159.   print(sum, 22);
  160.   print(p, 3);
  161.   print(name, 25);
  162.   print(time / 1000000.0, 13); // in ms
  163.   std::cout << std::endl;
  164. }
  165.  
  166. void runExp(std::function<size_t(size_t *, size_t)> testFunction, size_t *array,
  167.             size_t it, size_t n, size_t p, std::string name) {
  168.   for (size_t i = 0; i < it + 1; ++i) {
  169.     auto start = std::chrono::high_resolution_clock::now();
  170.     size_t sum = testFunction(array, n);
  171.     auto end = std::chrono::high_resolution_clock::now();
  172.  
  173.     if (i == 0)
  174.       continue;
  175.  
  176.     size_t time =
  177.         std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)
  178.             .count();
  179.     print_timing(i, n, sum, p, name, time);
  180.   }
  181. }
  182.  
  183. void runAll(size_t *array, size_t p, size_t n, size_t it) {
  184.  
  185. #ifdef OMP
  186.   omp_set_num_threads(p);
  187. #endif
  188.  
  189.   // std::cout << "sequential" << std::endl;
  190.   // runExp(addUp_sequential, array.get(), 1, n);
  191.  
  192.   runExp(addUp_atomic, array, it, n, p, "a)_atomic");
  193.  
  194.   runExp(addUp_false_sharing, array, it, n, p, "b)_false_sharing");
  195.  
  196.   runExp(addUp_fixed, array, it, n, p, "c)_fixed_false_sharing");
  197.  
  198.   runExp(addUp_private, array, it, n, p, "d)_private");
  199.  
  200.   runExp(addUp_reduction, array, it, n, p, "reduction");
  201.  
  202.   runExp(addUp_the_omp_way, array, it, n, p, "the_omp_way");
  203. }
  204.  
  205. int main(int argn, char **argc) {
  206.   CommandLine c{argn, argc};
  207.   // size_t n = c.intArg("-n", 1'000'000'000);
  208.   size_t n = c.intArg("-n", 10'000'000);
  209.   size_t it = c.intArg("-it", 5);
  210.  
  211.   print_headline();
  212.   size_t maxNumberOfThreads = 8;
  213.  
  214.   std::unique_ptr<size_t[]> array = std::make_unique<size_t[]>(n);
  215.  
  216.   randomize(array.get(), n);
  217.  
  218.   for (size_t p = 1; p <= maxNumberOfThreads; ++p) {
  219.     runAll(array.get(), p, n, it);
  220.   }
  221. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement