Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cuda.h>
- #include <cuda_runtime.h>
- #include <iostream>
- #include <ctime>
- #include "cublas_v2.h"
- #define cudaErrChk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
- inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
- {
- if (code != cudaSuccess)
- {
- fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
- if (abort) exit(code);
- }
- }
- static const char *cublasErrChk(cublasStatus_t error)
- {
- switch (error)
- {
- case CUBLAS_STATUS_SUCCESS:
- return "CUBLAS_STATUS_SUCCESS";
- case CUBLAS_STATUS_NOT_INITIALIZED:
- return "CUBLAS_STATUS_NOT_INITIALIZED";
- case CUBLAS_STATUS_ALLOC_FAILED:
- return "CUBLAS_STATUS_ALLOC_FAILED";
- case CUBLAS_STATUS_INVALID_VALUE:
- return "CUBLAS_STATUS_INVALID_VALUE";
- case CUBLAS_STATUS_ARCH_MISMATCH:
- return "CUBLAS_STATUS_ARCH_MISMATCH";
- case CUBLAS_STATUS_MAPPING_ERROR:
- return "CUBLAS_STATUS_MAPPING_ERROR";
- case CUBLAS_STATUS_EXECUTION_FAILED:
- return "CUBLAS_STATUS_EXECUTION_FAILED";
- case CUBLAS_STATUS_INTERNAL_ERROR:
- return "CUBLAS_STATUS_INTERNAL_ERROR";
- }
- return "<unknown>";
- }
- int main() {
- size_t dims = 4;
- double *vec, *mat, *results;
- cudaErrChk( cudaMallocManaged(&vec, dims * sizeof(double)) );
- cudaErrChk( cudaMallocManaged(&mat, dims * dims * sizeof(double)) );
- cudaErrChk( cudaMallocManaged(&results, dims * sizeof(double)) );
- printf("Vector:\n");
- for (int i = 1; i < dims + 1; i++) {
- vec[i] = 0.5 * i;
- printf("%.2lf ", vec[i]);
- }
- printf("\n\nMatrix:\n");
- for (int i = 1; i < dims * dims + 1; i++) {
- mat[i] = 1.0 * i;
- printf("%.2lf ", mat[i]);
- if (i % dims == 0)
- printf("\n");
- }
- printf("\n");
- // CUDA graph creation
- cublasHandle_t handle;
- cublasErrChk( cublasCreate(&handle) );
- cudaGraph_t gemvGraph;
- cudaStream_t stream1, streamForGraph;
- cudaErrChk(cudaStreamCreate(&stream1));
- cublasStatus_t stat = cublasSetStream(handle, stream1);
- std::cout << "stat: " << stat << std::endl;
- cudaErrChk(cudaGraphCreate(&gemvGraph, 0));
- cudaErrChk(cudaStreamCreate(&streamForGraph));
- double alpha = 1.f, beta = 1.f;
- cudaErrChk(cudaStreamBeginCapture(stream1, cudaStreamCaptureModeGlobal));
- // multiply mat by vec to get results
- cublasErrChk(
- cublasDgemv(
- handle, CUBLAS_OP_N,
- dims, dims,
- &alpha,
- mat, dims,
- vec, 1,
- &beta,
- results, 1
- )
- );
- cudaErrChk(cudaStreamEndCapture(stream1, &gemvGraph));
- cudaStreamSynchronize(stream1);
- cudaDeviceSynchronize();
- for (int i = 0; i < dims; i++)
- printf("%.2lf ", results[i]);
- printf("\n");
- cudaErrChk( cudaFree(vec) );
- cudaErrChk( cudaFree(mat) );
- cudaErrChk( cudaFree(results) );
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement