Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <numeric>
- #include <future>
- #include <iterator>
- #include <chrono>
- #include <string>
- using namespace std;
- template <typename T, typename IT>
- T parallel_sum(IT _begin, IT _end, T _init) {
- const auto size = distance(_begin, _end);
- static const auto n = thread::hardware_concurrency();
- if (size < 10000 || n == 1) return accumulate(_begin, _end, _init);
- vector<future<T>> partials;
- partials.reserve(n);
- auto chunkSize = size / n;
- for (unsigned i{ 0 }; i < n; i++) {
- partials.push_back(async(launch::async, [](IT _b, IT _e){
- return accumulate(_b, _e, T{0});
- }, next(_begin, i*chunkSize), (i==n-1)?_end:next(_begin, (i+1)*chunkSize)));
- }
- for (auto& f : partials) _init += f.get();
- return _init;
- }
- template <typename T, typename IT>
- T parallel_sum2(IT _begin, IT _end, T _init) {
- const auto size = distance(_begin, _end);
- static const auto n = thread::hardware_concurrency();
- if (size < 10000 || n == 1) return accumulate(_begin, _end, _init);
- vector<future<T>> partials;
- partials.reserve(n);
- for (unsigned i{ 0 }; i < n; i++) {
- partials.push_back(async(launch::async, [](IT _b, IT _e, unsigned _s){
- T _ret{ 0 };
- for (; _b < _e; advance(_b, _s)) _ret += *_b;
- return _ret;
- }, next(_begin, i), _end, n));
- }
- for (auto& f : partials) _init += f.get();
- return _init;
- }
- int main() {
- constexpr size_t size{ 5000000 };
- constexpr int iterations{ 100 };
- vector<uint64_t> vec1(size, 42);
- vector<uint64_t> vec2(size, 42);
- vector<uint64_t> vec3(size, 42);
- auto t0 = chrono::high_resolution_clock::now();
- for (int i{ 0 }; i<iterations; i++)
- volatile uint64_t result = accumulate(begin(vec1), end(vec1), uint64_t{0});
- auto t1 = chrono::high_resolution_clock::now();
- for (int i{ 0 }; i<iterations; i++)
- volatile uint64_t result = parallel_sum(begin(vec2), end(vec2), uint64_t{0});
- auto t2 = chrono::high_resolution_clock::now();
- for (int i{ 0 }; i<iterations; i++)
- volatile uint64_t result = parallel_sum2(begin(vec3), end(vec3), uint64_t{0});
- auto t3 = chrono::high_resolution_clock::now();
- vector<string> times;
- times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t1-t0).count()/iterations));
- times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t2-t1).count()/iterations));
- times.push_back(to_string(chrono::duration_cast<chrono::nanoseconds>(t3-t2).count()/iterations));
- // Right-justify times for readability
- for (auto& str : times) str.insert(0, 12-str.length(), ' ');
- cout << "accumulate: " << times[0] << " ns" << endl;
- cout << "parallel 1: " << times[1] << " ns" << endl;
- cout << "parallel 2: " << times[2] << " ns" << endl;
- cin.get();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement