Advertisement
Guest User

Parallel sum test

a guest
Dec 12th, 2015
207
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.89 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <numeric>
  4. #include <future>
  5. #include <iterator>
  6. #include <chrono>
  7. #include <string>
  8. using namespace std;
  9.  
  10. template <typename T, typename IT>
  11. T parallel_sum(IT _begin, IT _end, T _init) {
  12. const auto size = distance(_begin, _end);
  13. static const auto n = thread::hardware_concurrency();
  14. if (size < 10000 || n == 1) return accumulate(_begin, _end, _init);
  15. vector<future<T>> partials;
  16. partials.reserve(n);
  17. auto chunkSize = size / n;
  18. for (unsigned i{ 0 }; i < n; i++) {
  19. partials.push_back(async(launch::async, [](IT _b, IT _e){
  20. return accumulate(_b, _e, T{0});
  21. }, next(_begin, i*chunkSize), (i==n-1)?_end:next(_begin, (i+1)*chunkSize)));
  22. }
  23. for (auto& f : partials) _init += f.get();
  24. return _init;
  25. }
  26.  
  27. template <typename T, typename IT>
  28. T parallel_sum2(IT _begin, IT _end, T _init) {
  29. const auto size = distance(_begin, _end);
  30. static const auto n = thread::hardware_concurrency();
  31. if (size < 10000 || n == 1) return accumulate(_begin, _end, _init);
  32. vector<future<T>> partials;
  33. partials.reserve(n);
  34. for (unsigned i{ 0 }; i < n; i++) {
  35. partials.push_back(async(launch::async, [](IT _b, IT _e, unsigned _s){
  36. T _ret{ 0 };
  37. for (; _b < _e; advance(_b, _s)) _ret += *_b;
  38. return _ret;
  39. }, next(_begin, i), _end, n));
  40. }
  41. for (auto& f : partials) _init += f.get();
  42. return _init;
  43. }
  44.  
  45. int main() {
  46. constexpr size_t size{ 5000000 };
  47. constexpr int iterations{ 100 };
  48.  
  49. vector<uint64_t> vec1(size, 42);
  50. vector<uint64_t> vec2(size, 42);
  51. vector<uint64_t> vec3(size, 42);
  52.  
  53. auto t0 = chrono::high_resolution_clock::now();
  54. for (int i{ 0 }; i<iterations; i++)
  55. volatile uint64_t result = accumulate(begin(vec1), end(vec1), uint64_t{0});
  56. auto t1 = chrono::high_resolution_clock::now();
  57. for (int i{ 0 }; i<iterations; i++)
  58. volatile uint64_t result = parallel_sum(begin(vec2), end(vec2), uint64_t{0});
  59. auto t2 = chrono::high_resolution_clock::now();
  60. for (int i{ 0 }; i<iterations; i++)
  61. volatile uint64_t result = parallel_sum2(begin(vec3), end(vec3), uint64_t{0});
  62. auto t3 = chrono::high_resolution_clock::now();
  63.  
  64. vector<string> times;
  65. times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/iterations));
  66. times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/iterations));
  67. times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/iterations));
  68.  
  69. // Right-justify times for readability
  70. for (auto& str : times) str.insert(0, 12-str.length(), ' ');
  71.  
  72. cout << "accumulate: " << times[0] << " ns" << endl;
  73. cout << "parallel 1: " << times[1] << " ns" << endl;
  74. cout << "parallel 2: " << times[2] << " ns" << endl;
  75.  
  76. cin.get();
  77. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement