Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <atomic>
- #include <chrono>
- #include <functional>
- #include <limits>
- #include <memory>
- #include <omp.h>
- #include <random>
- #include "utils/commandline.hpp"
- #define CACHE_LINE_SIZE 64
- void randomize(size_t *array, size_t length) {
- std::mt19937_64 re(1092347087309ull);
- std::uniform_int_distribution<uint64_t> dis(
- 0, std::numeric_limits<size_t>::max() / length);
- for (size_t i = 0; i < length; ++i) {
- array[i] = dis(re);
- }
- }
- size_t addUp_sequential(size_t *array, size_t length) {
- size_t sum = 0;
- for (size_t i = 0; i < length; ++i) {
- sum += array[i];
- }
- return sum;
- }
- size_t addUp_atomic(size_t *array, size_t length) {
- std::atomic_size_t current_sum = 0;
- #pragma omp parallel for
- for (size_t i = 0; i < length; ++i) {
- current_sum.fetch_add(array[i]);
- }
- return current_sum.load();
- }
- size_t addUp_false_sharing(size_t *array, size_t length) {
- // size_t maxNumThreads = omp_get_max_threads();
- size_t maxNumThreads = omp_
- std::vector<size_t> partial_sum(maxNumThreads, 0);
- // std::cout << alignof(partial_sum) << std::endl;
- #pragma omp parallel
- {
- size_t id = omp_get_thread_num();
- #pragma omp for
- for (size_t i = 0; i < length; ++i) {
- partial_sum[id] += array[i];
- }
- }
- size_t sum = 0;
- for (size_t i = 0; i < maxNumThreads; ++i) {
- sum += partial_sum[i];
- }
- return sum;
- }
- size_t addUp_fixed(size_t *array, size_t length) {
- size_t maxNumThreads = omp_get_max_threads();
- struct padded_size_t {
- size_t value;
- char padding[CACHE_LINE_SIZE - sizeof(size_t)];
- padded_size_t(size_t value) {
- this->value = value;
- }
- };
- alignas(CACHE_LINE_SIZE) std::vector<padded_size_t> partial_sum(maxNumThreads,
- {0});
- // std::cout << sizeof(partial_sum[0]) << std::endl;
- #pragma omp parallel
- {
- size_t id = omp_get_thread_num();
- #pragma omp for
- for (size_t i = 0; i < length; ++i) {
- partial_sum[id].value += array[i];
- }
- }
- size_t sum = 0;
- for (size_t i = 0; i < maxNumThreads; ++i) {
- sum += partial_sum[i].value;
- }
- return sum;
- }
- struct paddedSize {
- size_t l;
- char pad[CACHE_LINE_SIZE - sizeof(size_t)];
- };
- size_t addUp_private(size_t *array, size_t length) {
- size_t sum = 0;
- size_t local_sum = 0;
- #pragma omp parallel firstprivate(local_sum) shared(sum)
- {
- #pragma omp for
- for (size_t i = 0; i < length; ++i) {
- local_sum += array[i];
- }
- #pragma omp atomic
- sum += local_sum;
- }
- return sum;
- }
- size_t addUp_the_omp_way(size_t *array, size_t length) {
- size_t sum = 0;
- #pragma omp parallel
- {
- size_t local_sum = 0;
- // #pragma omp for schedule(static, 10)
- #pragma omp for
- for (size_t i = 0; i < length; ++i) {
- local_sum += array[i];
- }
- #pragma omp atomic
- sum += local_sum;
- }
- return sum;
- }
- size_t addUp_reduction(size_t *array, size_t length) {
- size_t sum = 0;
- #pragma omp parallel for reduction(+ : sum)
- for (size_t i = 0; i < length; ++i) {
- sum += array[i];
- }
- return sum;
- }
- template <class T> void print(const T &t, size_t w) {
- std::cout.width(w);
- std::cout << t << " " << std::flush;
- }
- void print_headline() {
- print("#it", 3);
- print("n", 12);
- print("sum", 22);
- print("p", 3);
- print("name", 25);
- print("time", 13);
- std::cout << std::endl;
- }
- void print_timing(size_t it, size_t n, size_t sum, size_t p, std::string name,
- size_t time) {
- print(it, 3);
- print(n, 12);
- print(sum, 22);
- print(p, 3);
- print(name, 25);
- print(time / 1000000.0, 13); // in ms
- std::cout << std::endl;
- }
- void runExp(std::function<size_t(size_t *, size_t)> testFunction, size_t *array,
- size_t it, size_t n, size_t p, std::string name) {
- for (size_t i = 0; i < it + 1; ++i) {
- auto start = std::chrono::high_resolution_clock::now();
- size_t sum = testFunction(array, n);
- auto end = std::chrono::high_resolution_clock::now();
- if (i == 0)
- continue;
- size_t time =
- std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)
- .count();
- print_timing(i, n, sum, p, name, time);
- }
- }
- void runAll(size_t *array, size_t p, size_t n, size_t it) {
- #ifdef OMP
- omp_set_num_threads(p);
- #endif
- // std::cout << "sequential" << std::endl;
- // runExp(addUp_sequential, array.get(), 1, n);
- runExp(addUp_atomic, array, it, n, p, "a)_atomic");
- runExp(addUp_false_sharing, array, it, n, p, "b)_false_sharing");
- runExp(addUp_fixed, array, it, n, p, "c)_fixed_false_sharing");
- runExp(addUp_private, array, it, n, p, "d)_private");
- runExp(addUp_reduction, array, it, n, p, "reduction");
- runExp(addUp_the_omp_way, array, it, n, p, "the_omp_way");
- }
- int main(int argn, char **argc) {
- CommandLine c{argn, argc};
- // size_t n = c.intArg("-n", 1'000'000'000);
- size_t n = c.intArg("-n", 10'000'000);
- size_t it = c.intArg("-it", 5);
- print_headline();
- size_t maxNumberOfThreads = 8;
- std::unique_ptr<size_t[]> array = std::make_unique<size_t[]>(n);
- randomize(array.get(), n);
- for (size_t p = 1; p <= maxNumberOfThreads; ++p) {
- runAll(array.get(), p, n, it);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement