SHARE
TWEET

Untitled

a guest Apr 26th, 2019 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <stdio.h>
  2. #include <assert.h>
  3. #include <stdlib.h>
  4. #include "timer.h"
  5. #include <iostream>
  6. #include <string>
  7.  
  8. using namespace std;
  9.  
  10. static void handleError(cudaError_t err)
  11. {
  12.     if (err != cudaSuccess)
  13.     {
  14.         cout << cudaGetErrorString(err) << "\n";
  15.         exit(1);
  16.     }
  17. }
  18.  
  19. #define BLOCK_SIZE 512
  20.  
  21. template<int VECTOR_SIZE, int DUMMY, typename T>
  22. __global__
  23. void reduce(T* vector, T* matrix)
  24. {  
  25.     int id = threadIdx.x + blockIdx.x * blockDim.x;
  26.  
  27.     if (VECTOR_SIZE != BLOCK_SIZE)
  28.     vector[id] = 0;
  29.     matrix[id] = 1;
  30.  
  31.     for (int i = 0; i < VECTOR_SIZE; i++)
  32.     {
  33.         vector[id] += matrix[id];
  34.     }
  35. }
  36.  
  37.  
  38. string convertToComaNotation(double value)
  39. {
  40.     string initial = to_string(value);
  41.     for (auto& letter : initial)
  42.     {
  43.         letter = letter != '.' ? letter : ',';
  44.     }
  45.     return initial;
  46. }
  47.  
  48. template<int VECTOR_SIZE,int STREAMS>
  49. void singleTest(int count)
  50. {
  51.       const int num_streams = STREAMS;
  52.  
  53.     cudaStream_t streams[num_streams];
  54.  
  55.     for (int i = 0; i < num_streams; i++) {
  56.         cudaStreamCreate(&streams[i]);
  57.     }
  58.  
  59.     size_t vector_size = sizeof(int) * VECTOR_SIZE;
  60.  
  61.     int *h_res = (int *) malloc(sizeof(int) * VECTOR_SIZE);
  62.    
  63.     int *d_vector = 0;
  64.     int *d_matrix = 0;
  65.  
  66.     handleError(cudaMalloc((void **) &d_vector, vector_size));
  67.     handleError(cudaMalloc((void **) &d_matrix, vector_size));
  68.  
  69.     dim3 threads(BLOCK_SIZE);
  70.     dim3 grid(VECTOR_SIZE/BLOCK_SIZE);
  71.  
  72.     for (int id = 0; id < count; id++)
  73.     {
  74.     reduce<VECTOR_SIZE,BLOCK_SIZE,int><<< grid, threads, 0, streams[id%num_streams] >>>(d_vector, d_matrix);
  75.     }
  76.  
  77.     handleError(cudaPeekAtLastError());
  78.  
  79.     cudaDeviceSynchronize();
  80.  
  81.     handleError(cudaMemcpy(h_res, d_vector, vector_size, cudaMemcpyDeviceToHost));
  82.    
  83.     handleError(cudaFree(d_vector));
  84.     handleError(cudaFree(d_matrix));
  85. }
  86.  
  87. int main()
  88. {
  89.     Timer timer;
  90.     singleTest<512 * 8, 1>(100);
  91.     cout <<convertToComaNotation(timer.elapsed())<<"\n";
  92.  
  93.     timer.reset();
  94.     singleTest<512 * 8, 4>(100);
  95.     cout <<convertToComaNotation(timer.elapsed())<<"\n";
  96. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top