Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <iostream>
- #include <iomanip>
- // Utility Functions
- int** matrixAlloc(int rows, int cols);
- void matrixFill(int** ptr, int rows, int cols);
- void matrixPrint(int** ptr, int rows, int cols);
- // cuda kernel
- __global__ void gpuMatrixAdd(int** matrix_a, int** matrix_b, int** matrix_c, int rows, int cols);
- int main(){
- int** host_a = nullptr; // Pointers for host matrices
- int** host_b = nullptr;
- int** host_c = nullptr;
- int** dev_a = nullptr; // Pointers for device matrices
- int** dev_b = nullptr;
- int** dev_c = nullptr;
- int N = 8; // matrices width and height
- size_t size = N * N *sizeof(int);
- cudaError_t cudaError; // CUDA-related variables
- int numBlocks = 1;
- dim3 threadsPerBlock(N, N);
- cudaSetDevice(0); // selecting first CUDA device (optional)
- host_a = matrixAlloc(N, N); // allocating matrices on host
- host_b = matrixAlloc(N, N);
- host_c = matrixAlloc(N, N);
- matrixFill(host_a, N, N); // filling matrices on host
- matrixFill(host_b, N, N);
- // allocating matrices on device
- cudaError = cudaMalloc((void **)&dev_a, size);
- cudaError = cudaMalloc((void **)&dev_b, size);
- cudaError = cudaMalloc((void **)&dev_c, size);
- // copying matrices from host to device
- cudaError = cudaMemcpy(dev_a, host_a, size, cudaMemcpyHostToDevice);
- cudaError = cudaMemcpy(dev_b, host_b, size, cudaMemcpyHostToDevice);
- // running kernel
- gpuMatrixAdd << <numBlocks, threadsPerBlock >> >(dev_a, dev_b, dev_c, N, N);
- // getting matrix c bak to host
- cudaError = cudaMemcpy(host_c, dev_c, size, cudaMemcpyDeviceToHost);
- if (cudaError != cudaSuccess)
- switch (cudaError){
- case cudaErrorInvalidValue:
- std::cout << "cudaErrorInvalidValue\n";
- return -1;
- case cudaErrorInvalidDevicePointer:
- std::cout << "cudaErrorInvaliddevicePointer\n";
- return -1;
- case cudaErrorInvalidMemcpyDirection:
- std::cout << "cudaErrorInvalidMemcpyDirection\n";
- return -1;
- default:
- std::cout << "Unknown value for cudaError\n";
- }
- else
- std::cout << "cudaSuccess\n";
- // printing result
- std::cout << "host_c = \n";
- matrixPrint(host_c, N, N);
- std::cin.get();
- return 0;
- }
- //
- // UTILITY FUNCTIONS
- //
- int** matrixAlloc(int rows, int cols){
- int** ptr = (int**)malloc(sizeof(int*) * rows);
- for (int i = 0; i < rows; i++){
- ptr[i] = (int*)malloc(sizeof(int) * cols);
- }
- return ptr;
- }
- void matrixFill(int** ptr, int rows, int cols){
- if (ptr != nullptr)
- for (int i = 0; i < rows; i++)
- for (int j = 0; j < cols; j++)
- ptr[i][j] = i + j;
- else
- std::cout << "matrixFill called with empty pointer.\n";
- }
- void matrixPrint(int** ptr, int rows, int cols){
- if (ptr != nullptr)
- for (int i = 0; i < rows; i++){
- for (int j = 0; j < cols; j++)
- std::cout << std::setw(4) << ptr[i][j];
- std::cout << std::endl;
- }
- }
- //
- // CUDA Kernel
- //
- __global__ void gpuMatrixAdd(int** matrix_a, int** matrix_b, int** matrix_c, int rows, int cols){
- int i = threadIdx.x;
- int j = threadIdx.y;
- if (i < rows && j < cols)
- matrix_c[i][j] = matrix_a[i][j] + matrix_b[i][j];
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement