Advertisement
Mickey13

Untitled

Nov 17th, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.14 KB | None | 0 0
  1. // Includes
  2. #include <stdio.h>
  3. #include <ctime>
  4. #include <iostream>
  5. #include <algorithm>
  6.  
  7.  
  8. // includes CUDA
  9. #include <cuda_runtime.h>
  10.  
  11. #include "cudautil.cuh"
  12. #include "cuda_call.h"
  13.  
  14. using namespace std;
  15.  
  16. const int DIM_PORTION = 32;
  17. const int LIGNES_BLOC = 8;
  18.  
  19.  
  20. // Code GPU
  21. __global__ void copymat_device(float* A, float* B)
  22. {
  23. // A compléter
  24.  
  25. int x = blockIdx.x * DIM_PORTION + threadIdx.x;
  26. int y = blockIdx.y * DIM_PORTION + threadIdx.y;
  27. int largeur = gridDim.x * DIM_PORTION;
  28.  
  29. for (int j = 0; j < DIM_PORTION; j+= LIGNES_BLOC){
  30. B[(y+j)*largeur + x] = A[(y+j)*largeur + x];
  31. }
  32.  
  33.  
  34. }
  35.  
  36.  
  37. // Code CPU
  38. void genmat(float *A, int n)
  39. {
  40. for (int i=0; i<n; i++)
  41. for (int j=0; j<n; j++)
  42. A[i*n + j] = rand()/(float) RAND_MAX;
  43. }
  44.  
  45.  
  46. float verify(const float *A, const float* B, int n)
  47. {
  48. float error = 0;
  49. for (int i=0; i<n; i++)
  50. for (int j=0; j<n; j++)
  51. error = (std::max)(error, abs(A[i*n + j] - B[i*n + j]));
  52.  
  53. return error;
  54. }
  55.  
  56.  
  57.  
  58.  
  59. // Host code
  60. int main(void)
  61. {
  62.  
  63.  
  64. int dev;
  65. cudaError_t check = cudaGetDevice(&dev);
  66.  
  67. if(check == cudaSuccess)
  68. printf("OK\n");
  69.  
  70.  
  71. printf("Copie de matrice\n");
  72.  
  73. // n ne peut exceder 256 (pb d'allocation mémoire)
  74. int n = 256;
  75. size_t size = n*n*sizeof(float);
  76.  
  77. // Initialisation de CUDA
  78. //checkCUDA(0);
  79.  
  80. // Matrices CPU
  81. float *h_A = NULL, *h_B = NULL;
  82.  
  83. // Matrices GPU
  84. float *d_A = NULL, *d_B = NULL;
  85.  
  86. // Allocatation des vecteurs dans la mémoire CPU
  87. h_A = new float[n*n];
  88. h_B = new float[n*n];
  89.  
  90.  
  91.  
  92. // Allocation des vecteurs dans la mémoire GPU
  93. // A compléter
  94. CUDA_SAFE_CALL(cudaMalloc((void **) &d_A, size));
  95. CUDA_SAFE_CALL(cudaMalloc((void **) &d_B, size));
  96.  
  97. // Initialisation de la matrice A
  98. genmat(h_A, n);
  99.  
  100. // Copie de la matrice A dans la mémoire GPU
  101. cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
  102.  
  103. // Appel du kernel
  104. const int block_size = 32;
  105. dim3 threadsPerBlock(block_size, block_size);
  106. dim3 numBlocks(n / threadsPerBlock.x, n / threadsPerBlock.y);
  107.  
  108. copymat_device<<<numBlocks, threadsPerBlock>>>(d_A, d_B);
  109.  
  110. // Copie du résultat
  111. cudaMemcpy(h_B, d_B, size, cudaMemcpyDeviceToHost);
  112.  
  113. // Verification
  114. printf("Erreur max: %e\n", verify(h_A, h_B, n));
  115.  
  116.  
  117.  
  118. // Timing
  119. cudaEvent_t start, stop;
  120. CUDA_SAFE_CALL(cudaEventCreate(&start));
  121. CUDA_SAFE_CALL(cudaEventCreate(&stop));
  122.  
  123. const int nb = 10;
  124. CUDA_SAFE_CALL(cudaEventRecord(start, 0));
  125.  
  126.  
  127.  
  128. for (int i = 0; i < nb; i++){
  129. copymat_device<<<numBlocks, threadsPerBlock>>>(d_A, d_B);
  130. }
  131.  
  132. CUDA_SAFE_CALL(cudaEventRecord(stop, 0));
  133. CUDA_SAFE_CALL(cudaEventSynchronize(stop));
  134.  
  135. float t_ms;
  136. cudaEventElapsedTime(&t_ms, start, stop);
  137. t_ms /= nb;
  138.  
  139.  
  140.  
  141. printf ("Time for the kernel: %f ms\n", t_ms); // Print Elapsed time
  142.  
  143. // printf("Bande passante: %e GB/s\n", (size*2)/(1024*1024*1024)/(t_ms*1e-6));
  144. printf("Bande passante: %e B/s\n", (size*2)/(t_ms*1e-6));
  145.  
  146.  
  147.  
  148. // Deallocation de la memoire GPU
  149. // A compléter
  150.  
  151. CUDA_SAFE_CALL(cudaFree( d_A ));
  152. CUDA_SAFE_CALL(cudaFree( d_B ));
  153.  
  154. // Deallocation de la memoire CPU
  155. delete [] h_A;
  156. delete [] h_B;
  157. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement