Advertisement
Guest User

Untitled

a guest
Nov 17th, 2017
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.77 KB | None | 0 0
  1. // Includes
  2. #include <stdio.h>
  3. #include <ctime>
  4. #include <iostream>
  5.  
  6. // includes CUDA
  7. #include <cuda_runtime.h>
  8.  
  9. #include "../include/cudautil.cu"
  10. #include "../include/cuda_call.h"
  11.  
  12. #define DIM_PORTION 32
  13. #define LIGNES_BLOC 8
  14.  
  15. // Code GPU
  16. __global__ void copymat_device(const float* A, float* B){
  17. // A compléter
  18. //column
  19. int x = blockIdx.x * DIM_PORTION + threadIdx.x;
  20.  
  21. //line
  22. int y = blockIdx.y * DIM_PORTION + threadIdx.y;
  23. int largeur = gridDim.x * DIM_PORTION;
  24.  
  25. //indice : line*largeur + column
  26.  
  27. for (int j = 0; j < DIM_PORTION; j+= LIGNES_BLOC)
  28. B[(y+j)*largeur + x] = A[(y+j)*largeur + x];
  29. }
  30.  
  31.  
  32. // Code CPU
  33. void genmat(float *A, int n){
  34. for (int i=0; i<n; i++)
  35. for (int j=0; j<n; j++)
  36. A[i*n + j] = rand()/(float) RAND_MAX;
  37. }
  38.  
  39.  
  40. float verify(const float *A, const float* B, int n){
  41. float error = 0;
  42. for (int i=0; i<n; i++)
  43. for (int j=0; j<n; j++)
  44. error = std::max(error, abs(A[i*n + j] - B[i*n + j]));
  45.  
  46. return error;
  47. }
  48.  
  49. // Host code
  50. int main(int argc, char** argv){
  51. printf("Copie de matrice\n");
  52. int n = 256;
  53. size_t size = n*n*sizeof(float);
  54.  
  55. // Initialisation de CUDA
  56. checkCUDA(0);
  57.  
  58. // Matrices CPU
  59. float *h_A = NULL, *h_B = NULL;
  60. // Matrices GPU
  61. float *d_A = NULL, *d_B = NULL;
  62.  
  63. // Allocatation des vecteurs dans la mémoire CPU
  64. h_A = new float[n*n];
  65. h_B = new float[n*n];
  66.  
  67. // Allocation des vecteurs dans la mémoire GPU
  68. // A compléter
  69. CUDA_SAFE_CALL( cudaMalloc((void **)&d_A, size));
  70. CUDA_SAFE_CALL( cudaMalloc((void **)&d_B, size));
  71.  
  72. // Initialisation de la matrice A
  73. genmat(h_A, n);
  74.  
  75. // Copie de la matrice A dans la mémoire GPU
  76. CUDA_SAFE_CALL(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice));
  77.  
  78. // Appel du kernel
  79. const int block_size = 32;
  80. dim3 threadsPerBlock(block_size, block_size);
  81. dim3 numBlocks(n / threadsPerBlock.x, n / threadsPerBlock.y);
  82.  
  83. // Verification
  84. printf("Erreur max: %e\n", verify(h_A, h_B, n));
  85.  
  86. // Timing
  87. cudaEvent_t start, stop;
  88. CUDA_SAFE_CALL(cudaEventCreate(&start));
  89. CUDA_SAFE_CALL(cudaEventCreate(&stop));
  90.  
  91. const int nb = 10;
  92. CUDA_SAFE_CALL(cudaEventRecord(start, 0));
  93. for (int i = 0; i < nb; i++)
  94. copymat_device<<<numBlocks, threadsPerBlock>>>(d_A, d_B);
  95.  
  96. CUDA_SAFE_CALL(cudaEventRecord(stop, 0));
  97. CUDA_SAFE_CALL(cudaEventSynchronize(stop));
  98. float t_ms;
  99. CUDA_SAFE_CALL(cudaEventElapsedTime(&t_ms, start, stop));
  100. t_ms /= nb;
  101.  
  102. // Copie du résultat
  103. CUDA_SAFE_CALL(cudaMemcpy(h_B, d_B, size, cudaMemcpyDeviceToHost));
  104.  
  105. // A compléter
  106. printf("Bande passante: %f Go/s\n", (2*size*4/t_ms)/1073741824);
  107.  
  108. // Deallocation de la memoire GPU
  109. // A compléter
  110. CUDA_SAFE_CALL(cudaFree (d_A));
  111. CUDA_SAFE_CALL(cudaFree (d_B));
  112.  
  113. // Deallocation de la memoire CPU
  114. delete [] h_A;
  115. delete [] h_B;
  116.  
  117. return 0;
  118. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement