Advertisement
Guest User

cuda_reduce

a guest
Mar 29th, 2013
170
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.04 KB | None | 0 0
  1. #include <iostream>
  2. #include <cstdlib>
  3. #include <ctime>
  4. #include <cuda_runtime.h>
  5. using namespace std;
  6.  
  7.  
  8. void init(float*, int);
  9.  
  10. // CUDA kernel code
  11. __global__ void sum_reduce(float* da, float* db, float* dc, int n)
  12. {
  13. int tid = threadIdx.x;
  14. dc[tid] = 0;
  15. for (int stride = 1; stride < n; stride *= 2) {
  16. if (tid % (2 * stride) == 0)
  17. dc[tid] += (da[tid] * db[tid]) + (da[tid+stride] * db[tid+stride]);
  18. __syncthreads();
  19. }
  20. }
  21.  
  22. int main(int argc, char** argv) {
  23. // interpret command-line arguments
  24. if (argc != 2) {
  25. cerr << "**invalid number of arguments**" << endl;
  26. return 1;
  27. }
  28. int n = atoi(argv[1]);
  29. srand((unsigned)time(NULL));
  30.  
  31. // host vectors
  32. float* ha = new float[n];
  33. float* hb = new float[n];
  34. float* hc = new float[1];
  35. init(ha, n);
  36. init(hb, n);
  37. // device vectors (da[n], db[n], dc[n])
  38. float* da;
  39. float* db;
  40. float* dc;
  41.  
  42. cudaMalloc((void**)&da, n * sizeof(float));
  43. cudaMalloc((void**)&db, n * sizeof(float));
  44. cudaMalloc((void**)&dc, n * sizeof(float));
  45.  
  46. cudaMemcpy(da, ha, n *sizeof(float),cudaMemcpyHostToDevice);
  47. cudaMemcpy(db, hb, n * sizeof(float),cudaMemcpyHostToDevice);
  48.  
  49. // copy from the host to the device ha -> da, hb -> db
  50. sum_reduce<<<1, n>>>(da, db, dc, n);
  51.  
  52. // copy the result from the device to the host dc -> hc
  53. cudaMemcpy(hc, dc, sizeof(float), cudaMemcpyDeviceToHost);
  54.  
  55. float dx = hc[0];
  56. // dot product on the host
  57. float hx = 0;
  58. for (int i = 0; i < n; i++)
  59. hx += ha[i] * hb[i];
  60.  
  61. // compare the results
  62. cout << "Device = " << dx << " Host = " << hx << endl;
  63.  
  64. // free device memory
  65. cudaFree(da);
  66. cudaFree(db);
  67. cudaFree(dc);
  68.  
  69.  
  70. // free host memory
  71. delete [] ha;
  72. delete [] hb;
  73. delete [] hc;
  74.  
  75. return 0;
  76. }
  77.  
  78. void init(float* a, int n) {
  79. float f = 1.0f / RAND_MAX;
  80. for (int i = 0; i < n; i++)
  81. a[i] = ::rand() * f; // [0.0f 1.0f]
  82. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement