Advertisement
Maximuss23

AoMMaM_4

Jun 6th, 2020
1,208
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.36 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <immintrin.h>
  4. #include <malloc.h>
  5. #include <time.h>
  6. #include <string.h>
  7. #include <pthread.h>
  8. #include <sched.h>
  9. #include <sys/time.h>
  10. #include <sys/resource.h>
  11.  
  12. #define ITERATION_COUNT 200
  13. #define BUFFER_SIZE 128 * 1024 * 1024
  14. #define MEMCPY_BUFFER_SIZE 8192
  15. //#define NO_CACHE
  16. #define thread_data struct _thread_data
  17.  
  18. int THREAD_COUNT = 0;
  19.  
  20. thread_data {
  21.     double* speeds;
  22.     int id;
  23. };
  24.  
  25. void test_write(__m128i* array) {
  26.     register __m128i tmp;  
  27.     #ifdef NO_CACHE
  28.         for(int it = 0; it < ITERATION_COUNT; it++) {
  29.             for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
  30.                 _mm_stream_si128(&(array[i]), tmp);
  31.             }
  32.         }
  33.     #else
  34.         for(int it = 0; it < ITERATION_COUNT; it++) {
  35.             for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i += MEMCPY_BUFFER_SIZE / sizeof(__m128i)) {
  36.                 memset(&(array[i]), 0, MEMCPY_BUFFER_SIZE);
  37.             }
  38.         }
  39.     #endif
  40. }
  41.  
  42. void test_read(__m128i* array) {
  43.     register __m128i tmp;
  44.     for(int it = 0; it < ITERATION_COUNT; it++) {
  45.         for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
  46.             #ifdef NO_CACHE
  47.                 tmp += _mm_stream_load_si128(&(array[i]));
  48.             #else
  49.                 tmp += array[i];
  50.             #endif
  51.         }
  52.     }
  53.     printf("%d ", ((int*)&tmp)[0]);
  54. }
  55.  
  56. void test_copy(__m128i* source, __m128i* dest) {
  57.     #ifdef NO_CACHE
  58.         for(int it = 0; it < ITERATION_COUNT; it++) {
  59.             for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
  60.                 _mm_stream_si128(&(dest[i]), _mm_stream_load_si128(&(source[i])));
  61.             }
  62.         }
  63.     #else
  64.         for(int it = 0; it < ITERATION_COUNT; it++) {
  65.             for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i += MEMCPY_BUFFER_SIZE / sizeof(__m128i)) {
  66.                 //dest[i] = source[i];
  67.                 memcpy(&(dest[i]), &(source[i]), MEMCPY_BUFFER_SIZE);
  68.             }
  69.         }
  70.     #endif
  71. }
  72.  
  73. void test_copy_no_cache_read(__m128i* source, __m128i* dest) {
  74.     for(int it = 0; it < ITERATION_COUNT; it++) {
  75.         for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
  76.             dest[i] = _mm_stream_load_si128(&(source[i]));
  77.         }
  78.     }
  79. }
  80.  
  81. void test_copy_no_cache_write(__m128i* source, __m128i* dest) {
  82.     for(int it = 0; it < ITERATION_COUNT; it++) {
  83.         for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
  84.             _mm_stream_si128(&(dest[i]), source[i]);
  85.         }
  86.     }
  87. }
  88.  
  89. void bind_thread(int id) {
  90.     pthread_t thread = pthread_self();
  91.     cpu_set_t cpuset;
  92.     CPU_SET(id, &cpuset);
  93.     pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
  94. }
  95.  
  96. double get_seconds(struct rusage* start, struct rusage* end) {
  97.     return end->ru_utime.tv_sec - start->ru_utime.tv_sec + (end->ru_utime.tv_usec - start->ru_utime.tv_usec) * 0.000001;
  98. }
  99.  
  100. void* test_single_thread(void* ptr) {
  101.     thread_data* data = (thread_data*)ptr;
  102.     if(data->id >= 0) {
  103.         bind_thread(data->id);
  104.     }
  105.     double* speeds = data->speeds;
  106.    
  107.     __m128i* source = (__m128i*)malloc(BUFFER_SIZE);
  108.     __m128i* dest = (__m128i*)malloc(BUFFER_SIZE);
  109.    
  110.     struct rusage start, end;
  111.    
  112.     //read
  113.     getrusage(RUSAGE_THREAD, &start);
  114.     test_read(source);
  115.     getrusage(RUSAGE_THREAD, &end);
  116.     speeds[0] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
  117.    
  118.     //write
  119.     getrusage(RUSAGE_THREAD, &start);
  120.     test_write(dest);
  121.     getrusage(RUSAGE_THREAD, &end);
  122.     speeds[1] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
  123.    
  124.     //copy
  125.     getrusage(RUSAGE_THREAD, &start);
  126.     test_copy(source, dest);
  127.     getrusage(RUSAGE_THREAD, &end);
  128.     speeds[2] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
  129.    
  130.     //copy no cache read
  131.     getrusage(RUSAGE_THREAD, &start);
  132.     test_copy_no_cache_read(source, dest);
  133.     getrusage(RUSAGE_THREAD, &end);
  134.     speeds[3] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
  135.    
  136.     //copy no cache write
  137.     getrusage(RUSAGE_THREAD, &start);
  138.     test_copy_no_cache_write(source, dest);
  139.     getrusage(RUSAGE_THREAD, &end);
  140.     speeds[4] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
  141.    
  142.     free(source);
  143.     free(dest);
  144.    
  145.     return NULL;
  146. }
  147.  
  148. void test_multi_thread(double* speeds) {
  149.     pthread_t threads[THREAD_COUNT];
  150.     thread_data datas[THREAD_COUNT];
  151.     for(int i = 0; i < THREAD_COUNT; i++) {
  152.         datas[i].speeds = &(speeds[i * 5]);
  153.         datas[i].id = i;
  154.         pthread_create(&(threads[i]), NULL, test_single_thread, &(datas[i]));
  155.     }
  156.    
  157.     for(int i = 0; i < THREAD_COUNT; i++) {
  158.         pthread_join(threads[i], NULL);
  159.     }
  160.    
  161.     double speeds_overall[5];
  162.     for(int i = 0; i < 5; i++) {
  163.         double speed = 0;
  164.         for(int it = 0; it < THREAD_COUNT; it++) {
  165.             speed += speeds[it * 5 + i];
  166.         }
  167.         speeds_overall[i] = speed;
  168.     }
  169.    
  170.     printf("multithread read speed: %lf MB/S\n", speeds_overall[0]);
  171.     printf("multithread write speed: %lf MB/S\n", speeds_overall[1]);
  172.     printf("multithread copy speed: %lf MB/S\n", speeds_overall[2]);
  173.     printf("multithread copy no cache read speed: %lf MB/S\n", speeds_overall[3]);
  174.     printf("multithread copy no cache write speed: %lf MB/S\n", speeds_overall[4]);
  175. }
  176.  
  177. int main(int argc, char** argv) {
  178.     THREAD_COUNT = atoi(argv[1]);
  179.    
  180.     double speeds_single[5];
  181.     thread_data data;
  182.     data.speeds = speeds_single;
  183.     data.id = -1;
  184.     test_single_thread(&data);
  185.     printf("read speed: %lf MB/S\n", speeds_single[0]);
  186.     printf("write speed: %lf MB/S\n", speeds_single[1]);
  187.     printf("copy speed: %lf MB/S\n", speeds_single[2]);
  188.     printf("copy no cache read speed: %lf MB/S\n", speeds_single[3]);
  189.     printf("copy no cache write speed: %lf MB/S\n", speeds_single[4]);
  190.    
  191.     double* speeds_multi = (double*)malloc(THREAD_COUNT * 5 * sizeof(double));
  192.     test_multi_thread(speeds_multi);
  193.     return EXIT_SUCCESS;
  194. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement