Advertisement
froleyks

sum.cpp

Dec 3rd, 2018
244
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.95 KB | None | 0 0
  1. #include <atomic>
  2. #include <chrono>
  3. #include <functional>
  4. #include <limits>
  5. #include <memory>
  6. #include <omp.h>
  7. #include <random>
  8.  
  9. #include "utils/commandline.hpp"
  10.  
  11. #define CACHE_LINE_SIZE 64
  12.  
  13. void randomize(size_t *array, size_t length) {
  14.   std::mt19937_64 re(1092347087309ull);
  15.   std::uniform_int_distribution<uint64_t> dis(
  16.       0, std::numeric_limits<size_t>::max() / length);
  17.   for (size_t i = 0; i < length; ++i) {
  18.     array[i] = dis(re);
  19.   }
  20. }
  21.  
  22. size_t addUp_sequential(size_t *array, size_t length) {
  23.   size_t sum = 0;
  24.   for (size_t i = 0; i < length; ++i) {
  25.     sum += array[i];
  26.   }
  27.   return sum;
  28. }
  29.  
  30. size_t addUp_atomic(size_t *array, size_t length) {
  31.   std::atomic_size_t current_sum = 0;
  32.  
  33. #pragma omp parallel for
  34.   for (size_t i = 0; i < length; ++i) {
  35.     current_sum.fetch_add(array[i]);
  36.   }
  37.   return current_sum.load();
  38. }
  39.  
  40. size_t addUp_false_sharing(size_t *array, size_t length) {
  41.   size_t maxNumThreads = omp_get_max_threads();
  42.   std::vector<size_t> partial_sum(maxNumThreads, 0);
  43.  
  44.   // std::cout << alignof(partial_sum) << std::endl;
  45. #pragma omp parallel
  46.   {
  47.     size_t id = omp_get_thread_num();
  48. #pragma omp for
  49.     for (size_t i = 0; i < length; ++i) {
  50.       partial_sum[id] += array[i];
  51.     }
  52.   }
  53.  
  54.   size_t sum = 0;
  55.   for (size_t i = 0; i < maxNumThreads; ++i) {
  56.     sum += partial_sum[i];
  57.   }
  58.   return sum;
  59. }
  60.  
  61. size_t addUp_fixed(size_t *array, size_t length) {
  62.   size_t maxNumThreads = omp_get_max_threads();
  63.  
  64.   struct padded_size_t {
  65.     size_t value;
  66.     char padding[CACHE_LINE_SIZE - sizeof(size_t)];
  67.  
  68.     padded_size_t(size_t value) {
  69.       this->value = value;
  70.     }
  71.   };
  72.   alignas(CACHE_LINE_SIZE) std::vector<padded_size_t> partial_sum(maxNumThreads,
  73.                                                                   {0});
  74.   // std::cout << sizeof(partial_sum[0]) << std::endl;
  75.  
  76. #pragma omp parallel
  77.   {
  78.     size_t id = omp_get_thread_num();
  79. #pragma omp for
  80.     for (size_t i = 0; i < length; ++i) {
  81.       partial_sum[id].value += array[i];
  82.     }
  83.   }
  84.  
  85.   size_t sum = 0;
  86.   for (size_t i = 0; i < maxNumThreads; ++i) {
  87.     sum += partial_sum[i].value;
  88.   }
  89.   return sum;
  90. }
  91.  
  92. struct paddedSize {
  93.   size_t l;
  94.   char pad[CACHE_LINE_SIZE - sizeof(size_t)];
  95. };
  96.  
  97. size_t addUp_private(size_t *array, size_t length) {
  98.   size_t sum = 0;
  99.   size_t local_sum = 0;
  100. #pragma omp parallel firstprivate(local_sum) shared(sum)
  101.   {
  102. #pragma omp for
  103.     for (size_t i = 0; i < length; ++i) {
  104.       local_sum += array[i];
  105.     }
  106.  
  107. #pragma omp atomic
  108.     sum += local_sum;
  109.   }
  110.   return sum;
  111. }
  112.  
  113. size_t addUp_the_omp_way(size_t *array, size_t length) {
  114.   size_t sum = 0;
  115. #pragma omp parallel
  116.   {
  117.     size_t local_sum = 0;
  118. // #pragma omp for schedule(static, 10)
  119. #pragma omp for
  120.     for (size_t i = 0; i < length; ++i) {
  121.       local_sum += array[i];
  122.     }
  123.  
  124. #pragma omp atomic
  125.     sum += local_sum;
  126.   }
  127.   return sum;
  128. }
  129.  
  130. size_t addUp_reduction(size_t *array, size_t length) {
  131.   size_t sum = 0;
  132. #pragma omp parallel for reduction(+ : sum)
  133.   for (size_t i = 0; i < length; ++i) {
  134.     sum += array[i];
  135.   }
  136.   return sum;
  137. }
  138.  
  139. template <class T> void print(const T &t, size_t w) {
  140.   std::cout.width(w);
  141.   std::cout << t << " " << std::flush;
  142. }
  143.  
  144. void print_headline() {
  145.   print("#it", 3);
  146.   print("n", 12);
  147.   print("sum", 22);
  148.   print("p", 3);
  149.   print("name", 25);
  150.   print("time", 13);
  151.   std::cout << std::endl;
  152. }
  153.  
  154. void print_timing(size_t it, size_t n, size_t sum, size_t p, std::string name,
  155.                   size_t time) {
  156.   print(it, 3);
  157.   print(n, 12);
  158.   print(sum, 22);
  159.   print(p, 3);
  160.   print(name, 25);
  161.   print(time / 1000000.0, 13); // in ms
  162.   std::cout << std::endl;
  163. }
  164.  
  165. void runExp(std::function<size_t(size_t *, size_t)> testFunction, size_t *array,
  166.             size_t it, size_t n, size_t p, std::string name) {
  167.   for (size_t i = 0; i < it + 1; ++i) {
  168.     auto start = std::chrono::high_resolution_clock::now();
  169.     size_t sum = testFunction(array, n);
  170.     auto end = std::chrono::high_resolution_clock::now();
  171.  
  172.     if (i == 0)
  173.       continue;
  174.  
  175.     size_t time =
  176.         std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)
  177.             .count();
  178.     print_timing(i, n, sum, p, name, time);
  179.   }
  180. }
  181.  
  182. void runAll(size_t *array, size_t p, size_t n, size_t it) {
  183.  
  184. #ifdef OMP
  185.   omp_set_num_threads(p);
  186. #endif
  187.  
  188.   // std::cout << "sequential" << std::endl;
  189.   // runExp(addUp_sequential, array.get(), 1, n);
  190.  
  191.   runExp(addUp_atomic, array, it, n, p, "a)_atomic");
  192.  
  193.   runExp(addUp_false_sharing, array, it, n, p, "b)_false_sharing");
  194.  
  195.   runExp(addUp_fixed, array, it, n, p, "c)_fixed_false_sharing");
  196.  
  197.   runExp(addUp_private, array, it, n, p, "d)_private");
  198.  
  199.   runExp(addUp_reduction, array, it, n, p, "reduction");
  200.  
  201.   runExp(addUp_the_omp_way, array, it, n, p, "the_omp_way");
  202. }
  203.  
  204. int main(int argn, char **argc) {
  205.   CommandLine c{argn, argc};
  206.   size_t n = c.intArg("-n", 1000000000);
  207.   size_t it = c.intArg("-it", 5);
  208.  
  209.   print_headline();
  210.   size_t maxNumberOfThreads = 8;
  211.  
  212.   std::unique_ptr<size_t[]> array = std::make_unique<size_t[]>(n);
  213.  
  214.   randomize(array.get(), n);
  215.  
  216.   for (size_t p = 1; p <= maxNumberOfThreads; ++p) {
  217.     runAll(array.get(), p, n, it);
  218.   }
  219. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement