Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void cudaCublas(){
- float elapsedTime;
- cudaEvent_t start, stop;
- float *cx_d, *cx_h, *cy_h, *cy_d;
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
- cudaMallocHost((void**)&cx_h, N * sizeof(float));
- cudaMallocHost((void**)&cy_h, N * sizeof(float));
- cudaMalloc((void**)&cx_d, N * sizeof(float));
- cudaMalloc((void**)&cy_d, N * sizeof(float));
- for(int i = 0; i < N; i++) {
- cx_h[i] = (float) i;
- cy_h[i] = 0.87f;
- }
- cublasHandle_t cublas_handle;
- cublasCreate(&cublas_handle);
- const int num_rows = N;
- const int num_cols = 1;
- const int stride = 1;
- //const size_t elem_size = sizeof(float);
- //cublasSetMatrix(num_rows, num_cols, elem_size, cx_h, num_rows, cx_d, num_rows);
- //cublasSetMatrix(num_rows, num_cols, elem_size, cy_h, num_rows, cy_d, num_rows);
- cublasSetVector(num_rows, sizeof(float), cx_h, stride, cx_d, stride);
- cublasSetVector(num_rows, sizeof(float), cy_h, stride, cy_d, stride);
- float alpha = 3.0f;
- cudaEventRecord(start, 0);
- cublasSaxpy(cublas_handle, N, &alpha, cx_d, stride, cy_d, stride);
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- cudaEventElapsedTime(&elapsedTime, start, stop);
- cublasGetVector(num_rows, sizeof(float), cy_d, stride, cy_h, stride);
- printf("cuBLAS Time:\n \t %f ms\n", elapsedTime);
- cublasDestroy(cublas_handle);
- cudaFreeHost(cx_h);
- cudaFreeHost(cy_h);
- cudaFree(cx_d);
- cudaFree(cy_d);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement