Advertisement
Elnidas

Untitled

Dec 1st, 2020
31
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.00 KB | None | 0 0
  1. #include <cuda_runtime.h>
  2. #include <cstdio>
  3. #include <cstdlib>
  4. #include <ctime>
  5. #include "../../../../Desktop/code/GPUTimer.h"
  6.  
  7. void sumaVector(const float in[], const float* in2, float* out, size_t SIZE) {
  8.  
  9. for (size_t i = 0; i < SIZE; i++)
  10. out[i] = in[i] + in2[i];
  11. }
  12.  
  13. __global__ void sumaVectorG(const float* in, const float* in2, float* out, size_t SIZE) {
  14.  
  15. size_t i = blockDim.x * blockIdx.x + threadIdx.x;
  16.  
  17. out[i] = in[i] + in2[i];
  18. }
  19.  
  20.  
  21. void sumaVectorCPU(float* in, float* in2, float* out, size_t SIZE) {
  22.  
  23. sumaVector(in, in2, out, SIZE);
  24.  
  25.  
  26.  
  27.  
  28. /*
  29. printf("Resultados en CPU\n");
  30. for (size_t i = 0; i < SIZE; i++)
  31. {
  32. printf("h_out[%d]= %.f ------ h_in[%d]= %.f--------h_in2[%d]= %.f\n", i, out[i], i, in[i], i, in2[i]);
  33. }
  34. */
  35. }
  36.  
  37.  
  38. void sumaVectorGPU(const float* in, const float* in2, float* out, size_t SIZE, size_t BYTES) {
  39.  
  40. float* d_in;
  41. float* d_in2;
  42. float* d_out;
  43. cudaMalloc(&d_in, BYTES);
  44. cudaMalloc(&d_in2, BYTES);
  45. cudaMalloc(&d_out, BYTES);
  46.  
  47. cudaMemcpy(d_in, in, BYTES, cudaMemcpyHostToDevice);
  48. cudaMemcpy(d_in2, in2, BYTES, cudaMemcpyHostToDevice);
  49.  
  50. const size_t THREADS = 258;
  51. const size_t BLOCKS = ceil((float)SIZE / THREADS);
  52.  
  53. //GpuTimer timer;
  54. //timer.Start();
  55.  
  56. sumaVectorG << <BLOCKS, THREADS >> > (d_in, d_in2, d_out, SIZE);
  57. //timer.Stop();
  58. //printf("Elapsed time on GPU (discarding memory transfers): %lf\n", timer.Elapsed() / 1000);
  59.  
  60. cudaMemcpy(out, d_out, BYTES, cudaMemcpyDeviceToHost);
  61.  
  62. /*
  63. printf("Resultados en GPU\n");
  64. for (size_t i = 0; i < SIZE; i++)
  65. {
  66. printf("h_out[%d]= %.f ------ h_in[%d]= %.f--------h_in2[%d]= %.f\n", i, out[i], i, in[i], i, in2[i]);
  67. }
  68. */
  69.  
  70. cudaFree(d_in);
  71. cudaFree(d_in2);
  72. cudaFree(d_out);
  73. d_in =d_in2=d_out= NULL;
  74.  
  75. }
  76.  
  77.  
  78.  
  79.  
  80. bool compare(const float* CPU, const float* GPU, const size_t SIZE) {
  81. for (size_t i = 0; i < SIZE; i++)
  82. {
  83. if (CPU[i] != GPU[i])
  84. return false;
  85. }
  86. return true;
  87. }
  88.  
  89.  
  90.  
  91. int main()
  92. {
  93. const size_t SIZE = 64;
  94. const size_t BYTES = sizeof(float) * SIZE;
  95. const long long TRIALS = 100000000;
  96.  
  97. float* h_in = (float*)malloc(BYTES);
  98. float* h_in2 = (float*)malloc(BYTES);
  99. float* h_out = (float*)malloc(BYTES);
  100. float* h_outD = (float*)malloc(BYTES);
  101.  
  102. for (size_t i = 0; i < SIZE; i++)
  103. {
  104. h_in[i] = i;
  105.  
  106. h_in2[i] = i + rand() % 20;
  107. }
  108.  
  109.  
  110. clock_t t;
  111. t = clock();
  112.  
  113. for (size_t i = 0; i < TRIALS; i++) {
  114. sumaVectorCPU(h_in, h_in2, h_out, SIZE);
  115. }
  116. t = clock() - t;
  117. printf("CPU - average time elapsed: %f\n", ((float)t) / (TRIALS * CLOCKS_PER_SEC));
  118.  
  119.  
  120. GpuTimer timer;
  121.  
  122. timer.Start();
  123. for (size_t i = 0; i < TRIALS; i++){
  124. sumaVectorGPU(h_in, h_in2, h_outD, SIZE, BYTES);
  125. }
  126. timer.Stop();
  127. printf("GPU- average time elapsed: %f\n", timer.Elapsed() / (TRIALS * 1000));
  128.  
  129.  
  130. if (compare(h_out, h_outD, SIZE)) {
  131. printf("Ha sido todo un exito\n");
  132. }
  133. else {
  134. printf("Ha sido todo un FRACASO........ABSOLUTOOOO\n");
  135. }
  136.  
  137. free(h_in);
  138. free(h_in2);
  139. free(h_out);
  140. free(h_outD);
  141. h_in = h_in2 =h_out =h_outD=NULL;
  142.  
  143. }
  144.  
  145.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement