Advertisement
Guest User

Untitled

a guest
Jun 17th, 2020
166
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 3.64 KB | None | 0 0
  1. #define _GNU_SOURCE
  2. #include <immintrin.h>
  3. #include <limits.h>
  4. #include <omp.h>
  5. #include <pthread.h>
  6. #include <stdint.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <unistd.h>
  11. typedef __m256d vec_rwc_type;
  12. typedef double scal_rwc_type;
  13. #define VEC_TYPE_ALIGNMENT 64
  14. #define ARRAY_SIZE_IN_BYTE 2 * 1024 * 1024 * 1024u
  15. #define ARRAY_SIZE = ARRAY_SIZE_IN_BYTE / sizeof(vec_rwc_type)
  16. #define SCAL_IN_VEC sizeof(vec_rwc_type) / sizeof(scal_rwc_type)
  17. #define UNROLL_COF 2
  18. #define ALL_ITER 50
  19. void bind_thread_to_core(void);
  20. int main(int argc, const char* argv[]) {
  21.     puts("Before atoi");
  22.     int number_of_thread = atoi(argv[1]);
  23.     puts("Before parallel section");
  24.     size_t array_size_per_vec_type = ARRAY_SIZE_IN_BYTE / sizeof(vec_rwc_type);
  25.     array_size_per_vec_type /= UNROLL_COF;
  26.     size_t iteration_step = SCAL_IN_VEC * UNROLL_COF;
  27.     const uint32_t second_cof = SCAL_IN_VEC;
  28.     omp_set_num_threads(number_of_thread);
  29.     puts("Before parallel section");
  30. #pragma omp parallel
  31.     {
  32.         bind_thread_to_core();
  33.  
  34.         int thread_self_number = omp_get_thread_num();
  35.         printf("Thread[%d]: affter bind\n", thread_self_number);
  36.         int thread_array_size = ARRAY_SIZE_IN_BYTE / number_of_thread;
  37.  
  38.         vec_rwc_type dummy_val = _mm256_set1_pd(0.0f);
  39.  
  40.         scal_rwc_type* rwc_array =
  41.             (scal_rwc_type*)_mm_malloc(thread_array_size, VEC_TYPE_ALIGNMENT);
  42.         scal_rwc_type* dummy_upack_vec =
  43.             _mm_malloc(SCAL_IN_VEC * sizeof(scal_rwc_type), VEC_TYPE_ALIGNMENT);
  44.  
  45.         memset(dummy_upack_vec, 0, SCAL_IN_VEC * sizeof(scal_rwc_type));
  46.         memset(rwc_array, 0xFF, thread_array_size);
  47.  
  48.         scal_rwc_type* array_end =
  49.             rwc_array + thread_array_size / sizeof(scal_rwc_type);
  50.  
  51.         double start_time = 0.0;
  52.         double end_time = 0.0;
  53.         double time = 10.0;
  54.         printf("Thread[%d]: before barrier\n", thread_self_number);
  55. #pragma omp barrier
  56.         for (register scal_rwc_type* iter_ptr = rwc_array; iter_ptr < array_end;
  57.              iter_ptr += iteration_step) {
  58.             dummy_val += _mm256_load_pd(iter_ptr);
  59.             dummy_val += _mm256_load_pd(iter_ptr + second_cof);
  60.         }
  61.         for (uint_fast32_t iter = 0; iter < ALL_ITER; ++iter) {
  62.             start_time = omp_get_wtime();
  63.             for (register scal_rwc_type* iter_ptr = rwc_array;
  64.                  iter_ptr < array_end; iter_ptr += iteration_step) {
  65.                 dummy_val += _mm256_load_pd(iter_ptr);
  66.                 dummy_val += _mm256_load_pd(iter_ptr + second_cof);
  67.             }
  68.             end_time = omp_get_wtime();
  69.             if (time > end_time - start_time) {
  70.                 time = end_time - start_time;
  71.             }
  72.         }
  73.         for (register scal_rwc_type* iter_ptr = rwc_array; iter_ptr < array_end;
  74.              iter_ptr += iteration_step) {
  75.             dummy_val += _mm256_load_pd(iter_ptr);
  76.             dummy_val += _mm256_load_pd(iter_ptr + second_cof);
  77.         }
  78.         _mm256_store_pd(dummy_upack_vec, dummy_val);
  79.         printf("Dummy print:\n");
  80.         for (int i = 0; i < SCAL_IN_VEC; ++i) {
  81.             printf("[%d]%f\n", i, dummy_upack_vec[i]);
  82.         }
  83.         printf("Thread[%d] read time: %f sec\n", thread_self_number, time);
  84.         _mm_free(rwc_array);
  85.         _mm_free(dummy_upack_vec);
  86.     }
  87.     return 0;
  88. }
  89. void bind_thread_to_core(void) {
  90.     pthread_t thread = pthread_self();
  91.     int thread_num = omp_get_thread_num();
  92.     cpu_set_t affinity_masks;
  93.     CPU_ZERO(&affinity_masks);
  94.     CPU_SET(thread_num, &affinity_masks);
  95.     pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinity_masks);
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement