Advertisement
Guest User

CUDA

a guest
Apr 24th, 2019
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.18 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <iostream>
  6. #include <math.h>
  7. using namespace std;
  8.  
  9. #define SIZE_DARR 1000000
  10. float hres[SIZE_DARR] = { 0 };
  11. float hh[1] = { 0 };
  12. float* dres, *dh;
  13. float gpuTime;
  14.  
  15. int threadsPerBlock = 1024;
  16. int blocksPerGrid = (SIZE_DARR + threadsPerBlock - 1) / threadsPerBlock;
  17.  
  18.  
  19. __global__ void CalcIntegral(int n, float* dres, float* dh)
  20. {
  21. float a = 1.0;
  22. float b = 8.0;
  23. float x = 0.0;
  24.  
  25. dh[0] = (b - a) / n;
  26.  
  27. int tid = threadIdx.x + blockIdx.x * blockDim.x;
  28.  
  29. while (tid < n)
  30. {
  31. x = a + dh[0] * (tid + 0.5);
  32. dres[tid] += (exp(x) + exp(-x)) / 2;
  33. tid += blockDim.x * gridDim.x;
  34. }
  35.  
  36. }
  37.  
  38. void experiment(int n)
  39. {
  40. hres[SIZE_DARR] = { 0 };
  41. hh[1] = { 0 };
  42.  
  43. cudaMalloc((void**)&dres, sizeof(float) * SIZE_DARR);
  44. cudaMalloc((void**)&dh, sizeof(float));
  45. cudaMemcpy(dres, hres, sizeof(float) * SIZE_DARR, cudaMemcpyKind::cudaMemcpyHostToDevice);
  46. cudaMemcpy(dh, hh, sizeof(float) * SIZE_DARR, cudaMemcpyKind::cudaMemcpyHostToDevice);
  47.  
  48. cudaEvent_t start, stop;
  49. gpuTime = 0.0f;
  50.  
  51. cout << endl << n << " Элементов" << endl << "Время (ms) \n";
  52.  
  53. //for (int k = 1; k <= 100000000 / n; k++)
  54. //{
  55. cudaEventCreate(&start);
  56. cudaEventCreate(&stop);
  57.  
  58. cudaEventRecord(start, 0);
  59.  
  60. CalcIntegral<<<blocksPerGrid, threadsPerBlock>>> (n, dres, dh);
  61.  
  62. cudaEventRecord(stop, 0);
  63. cudaEventSynchronize(stop);
  64.  
  65. cudaEventElapsedTime(&gpuTime, start, stop);
  66.  
  67. cudaEventDestroy(start);
  68. cudaEventDestroy(stop);
  69.  
  70. //}
  71.  
  72. cudaMemcpy(hres, dres, sizeof(float) * SIZE_DARR, cudaMemcpyKind::cudaMemcpyDeviceToHost);
  73. cudaMemcpy(hh, dh, sizeof(float), cudaMemcpyKind::cudaMemcpyDeviceToHost);
  74.  
  75. float result = 0.0f;
  76. for (int i = 0; i < n; i++)
  77. {
  78. result += hres[i];
  79. }
  80. result *= hh[0];
  81.  
  82. cout.width(10);
  83. cout.setf(ios::right);
  84. cout << gpuTime << endl;
  85.  
  86. cudaFree(dres);
  87. cudaFree(dh);
  88. }
  89.  
  90. int main()
  91. {
  92. setlocale(LC_CTYPE, "rus");
  93.  
  94. cout << endl << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << endl;
  95.  
  96. for (int i = 100; i <= SIZE_DARR; i *= 10)
  97. {
  98. experiment(i);
  99. }
  100.  
  101. getchar();
  102. return 0;
  103. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement