Advertisement
Guest User

Untitled

a guest
Apr 26th, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.01 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <assert.h>
  3. #include <stdlib.h>
  4. #include "timer.h"
  5. #include <iostream>
  6. #include <string>
  7.  
  8. using namespace std;
  9.  
  10. static void handleError(cudaError_t err)
  11. {
  12. if (err != cudaSuccess)
  13. {
  14. cout << cudaGetErrorString(err) << "\n";
  15. exit(1);
  16. }
  17. }
  18.  
  19. #define BLOCK_SIZE 512
  20.  
  21. template<int VECTOR_SIZE, int DUMMY, typename T>
  22. __global__
  23. void reduce(T* vector, T* matrix)
  24. {
  25. int id = threadIdx.x + blockIdx.x * blockDim.x;
  26.  
  27. if (VECTOR_SIZE != BLOCK_SIZE)
  28. vector[id] = 0;
  29. matrix[id] = 1;
  30.  
  31. for (int i = 0; i < VECTOR_SIZE; i++)
  32. {
  33. vector[id] += matrix[id];
  34. }
  35. }
  36.  
  37.  
  38. string convertToComaNotation(double value)
  39. {
  40. string initial = to_string(value);
  41. for (auto& letter : initial)
  42. {
  43. letter = letter != '.' ? letter : ',';
  44. }
  45. return initial;
  46. }
  47.  
  48. template<int VECTOR_SIZE,int STREAMS>
  49. void singleTest(int count)
  50. {
  51. const int num_streams = STREAMS;
  52.  
  53. cudaStream_t streams[num_streams];
  54.  
  55. for (int i = 0; i < num_streams; i++) {
  56. cudaStreamCreate(&streams[i]);
  57. }
  58.  
  59. size_t vector_size = sizeof(int) * VECTOR_SIZE;
  60.  
  61. int *h_res = (int *) malloc(sizeof(int) * VECTOR_SIZE);
  62.  
  63. int *d_vector = 0;
  64. int *d_matrix = 0;
  65.  
  66. handleError(cudaMalloc((void **) &d_vector, vector_size));
  67. handleError(cudaMalloc((void **) &d_matrix, vector_size));
  68.  
  69. dim3 threads(BLOCK_SIZE);
  70. dim3 grid(VECTOR_SIZE/BLOCK_SIZE);
  71.  
  72. for (int id = 0; id < count; id++)
  73. {
  74. reduce<VECTOR_SIZE,BLOCK_SIZE,int><<< grid, threads, 0, streams[id%num_streams] >>>(d_vector, d_matrix);
  75. }
  76.  
  77. handleError(cudaPeekAtLastError());
  78.  
  79. cudaDeviceSynchronize();
  80.  
  81. handleError(cudaMemcpy(h_res, d_vector, vector_size, cudaMemcpyDeviceToHost));
  82.  
  83. handleError(cudaFree(d_vector));
  84. handleError(cudaFree(d_matrix));
  85. }
  86.  
  87. int main()
  88. {
  89. Timer timer;
  90. singleTest<512 * 8, 1>(100);
  91. cout <<convertToComaNotation(timer.elapsed())<<"\n";
  92.  
  93. timer.reset();
  94. singleTest<512 * 8, 4>(100);
  95. cout <<convertToComaNotation(timer.elapsed())<<"\n";
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement