Aaaaa988

Untitled

Jun 16th, 2020
112
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. void cudaCublas(){
  2. float elapsedTime;
  3. cudaEvent_t start, stop;
  4. float *cx_d, *cx_h, *cy_h, *cy_d;
  5. cudaEventCreate(&start);
  6. cudaEventCreate(&stop);
  7.  
  8. cudaMallocHost((void**)&cx_h, N * sizeof(float));
  9. cudaMallocHost((void**)&cy_h, N * sizeof(float));
  10. cudaMalloc((void**)&cx_d, N * sizeof(float));
  11. cudaMalloc((void**)&cy_d, N * sizeof(float));
  12.  
  13. for(int i = 0; i < N; i++) {
  14. cx_h[i] = (float) i;
  15. cy_h[i] = 0.87f;
  16. }
  17.  
  18. cublasHandle_t cublas_handle;
  19. cublasCreate(&cublas_handle);
  20.  
  21. const int num_rows = N;
  22. const int num_cols = 1;
  23. const int stride = 1;
  24. //const size_t elem_size = sizeof(float);
  25.  
  26. //cublasSetMatrix(num_rows, num_cols, elem_size, cx_h, num_rows, cx_d, num_rows);
  27. //cublasSetMatrix(num_rows, num_cols, elem_size, cy_h, num_rows, cy_d, num_rows);
  28.  
  29. cublasSetVector(num_rows, sizeof(float), cx_h, stride, cx_d, stride);
  30. cublasSetVector(num_rows, sizeof(float), cy_h, stride, cy_d, stride);
  31.  
  32.  
  33. float alpha = 3.0f;
  34.  
  35. cudaEventRecord(start, 0);
  36. cublasSaxpy(cublas_handle, N, &alpha, cx_d, stride, cy_d, stride);
  37. cudaEventRecord(stop, 0);
  38.  
  39. cudaEventSynchronize(stop);
  40. cudaEventElapsedTime(&elapsedTime, start, stop);
  41.  
  42. cublasGetVector(num_rows, sizeof(float), cy_d, stride, cy_h, stride);
  43. printf("cuBLAS Time:\n \t %f ms\n", elapsedTime);
  44. cublasDestroy(cublas_handle);
  45. cudaFreeHost(cx_h);
  46. cudaFreeHost(cy_h);
  47. cudaFree(cx_d);
  48. cudaFree(cy_d);
  49. }
RAW Paste Data