Advertisement
Guest User

Untitled

a guest
Jan 26th, 2020
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.44 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6. #include <math.h>
  7. #include <iostream>
  8. #include <iomanip>
  9. #include <ctime>
  10.  
  11. #define M_PI 3.14159265358979323846
  12.  
  13. static const int blockSize = 1024;
  14. static const int gridSize = 500;
  15. static const int size = gridSize * blockSize;
  16.  
  17. void sumWithCuda(double* table, double cpu_time);
  18.  
  19.  
  20. __global__ void sum(double* in, double* out) {
  21. __shared__ double sh_sum[blockSize];
  22. unsigned int tid = threadIdx.x;
  23. unsigned int i = blockIdx.x * blockSize + tid;
  24.  
  25. sh_sum[tid] = in[i];
  26. __syncthreads();
  27.  
  28. for (int s = blockSize / 2; s > 0; s >>= 1) {
  29. if (tid < s) {
  30. sh_sum[tid] += sh_sum[tid + s];
  31. }
  32. __syncthreads();
  33. }
  34.  
  35. if (tid == 0) out[blockIdx.x] = sh_sum[0];
  36. }
  37.  
  38.  
  39. int main()
  40. {
  41. double org_pi = M_PI;
  42. double sum = 0, pi;
  43. double* a = new double[size];
  44. a[0] = 0;
  45. for (int i = 1; i < size; i++) {
  46. a[i] = 1 / (pow(i, 2));
  47. }
  48.  
  49. clock_t begin = clock();
  50. //CPU
  51. for (int i = 1; i < size; i++) {
  52. sum += a[i];
  53. }
  54. clock_t end = clock();
  55. double elapsed_time = double(end - begin) / CLOCKS_PER_SEC;
  56. pi = sqrt(6 * sum);
  57. double accuracy = 100 * pi / org_pi;
  58. std::cout << std::setprecision(20) << "CPU: " << pi << "\tczas w sekundach: " << elapsed_time << "\t\tdokladnosc: " << accuracy << "%\n";
  59.  
  60. //GPU
  61. sumWithCuda(a, elapsed_time);
  62.  
  63. return 0;
  64. }
  65.  
  66.  
  67. // Helper function for using CUDA to add vectors in parallel.
  68. void sumWithCuda(double* table, double cpu_time)
  69. {
  70. double org_pi = M_PI;
  71. double* dev_table, * dev_out;
  72. double* out = new double[size];
  73.  
  74. cudaMalloc((void**)&dev_table, size * sizeof(double));
  75. cudaMemcpy(dev_table, table, size * sizeof(double), cudaMemcpyHostToDevice);
  76.  
  77. cudaMalloc((void**)&dev_out, size * sizeof(double));
  78.  
  79. clock_t begin = clock();
  80.  
  81. sum << <gridSize, blockSize >> > (dev_table, dev_out);
  82. sum << <1, blockSize >> > (dev_out, dev_out);
  83.  
  84. clock_t end = clock();
  85. double elapsed_time = double(end - begin) / CLOCKS_PER_SEC;
  86.  
  87.  
  88. cudaMemcpy(out, dev_out, size * sizeof(double), cudaMemcpyDeviceToHost);
  89.  
  90. cudaFree(dev_table);
  91. cudaFree(dev_out);
  92.  
  93. double pi_gpu = sqrt(6 * out[0]);
  94. double accuracy = 100 * pi_gpu / org_pi;
  95. std::cout << std::setprecision(20) << "GPU: " << pi_gpu << "\tczas w sekundach: " << elapsed_time << "\t\tdokladnosc: " << accuracy << "%\n";
  96. double speed_up = cpu_time / elapsed_time;
  97. std::cout << std::setprecision(5) << "SpeedUp: " << speed_up << "\n";
  98. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement