Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Dawid Mocek (sekcja 15 )
- // Alina Litwiak (sekcja 17, chyba)
- // Łukasz Sałajczyk (sekcja 15)
- //
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- const int arraySize = 2;
- cudaError_t addWithCuda(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], size_t size);
- __global__ void addKernel(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], const int size)
- {
- // Places in the matrix
- int i = threadIdx.x + blockIdx.x * blockDim.x; //
- int j = threadIdx.y + blockIdx.y * blockDim.y; //
- if(i < size && j< size) {
- // Matrix suming
- c[i][j] = a[i][j] + b[i][j];
- }
- }
- // Helper function for using CUDA to add vectors in parallel.
- cudaError_t addWithCuda(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], size_t size)
- {
- int *dev_a = 0;
- int *dev_b = 0;
- int *dev_c = 0;
- cudaError_t cudaStatus;
- int blocks;
- // Choose which GPU to run on, change this on a multi-GPU system.
- cudaStatus = cudaSetDevice(0);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
- goto Error;
- }
- // Allocate GPU buffers for three vectors (two input, one output)
- cudaStatus = cudaMalloc((void**)&dev_c, size * size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- cudaStatus = cudaMalloc((void**)&dev_a, size * size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- cudaStatus = cudaMalloc((void**)&dev_b, size * size * sizeof(int));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- goto Error;
- }
- // Copying arrays
- cudaStatus = cudaMemcpy(dev_a, a, size * size * sizeof(int), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- cudaStatus = cudaMemcpy(dev_b, b, size * size * sizeof(int), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- //Block Size: 5x5
- dim3 blockSize(5, 5);
- // Counting number of blocks
- blocks = (size / 5);
- if(size % 5 != 0) blocks++;
- dim3 numberOfBlocks(blocks,blocks);
- // Core fuctnion
- addKernel<<<numberOfBlocks, blockSize>>>((int(*)[arraySize])dev_c, (int(*)[arraySize])dev_a, (int(*)[arraySize])dev_b,size);
- // cudaThreadSynchronize waits for the kernel to finish, and returns
- // any errors encountered during the launch.
- cudaStatus = cudaThreadSynchronize();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaThreadSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
- goto Error;
- }
- // Copy output vector from GPU buffer to host memory.
- cudaStatus = cudaMemcpy(c, dev_c, size * size * sizeof(int), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- Error:
- cudaFree(dev_c);
- cudaFree(dev_a);
- cudaFree(dev_b);
- return cudaStatus;
- }
- int main()
- {
- //A
- int a[arraySize][arraySize] = {{4,5},{6,7}};
- //B
- int b[arraySize][arraySize] = {{9,9999999},{1,2}};
- //Results
- int c[arraySize][arraySize] = { 0 };
- int i, j;
- cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "addWithCuda failed!");
- return 1;
- }
- //Wyswietlanie macierzy
- printf("Matrix A:\n");
- for(i = 0; i < arraySize; i++){
- for(j = 0; j < arraySize; j++){
- printf("%d ",a[i][j]);
- }
- printf("\n");
- }
- printf("Matrix B:\n");
- for(i = 0; i < arraySize; i++){
- for(j = 0; j < arraySize; j++){
- printf("%d ",b[i][j]);
- }
- printf("\n");
- }
- printf("Rezults:\n");
- for(i = 0; i < arraySize; i++){
- for(j = 0; j < arraySize; j++){
- printf("%d ",c[i][j]);
- }
- printf("\n");
- }
- // cudaThreadExit must be called before exiting in order for profiling and
- // tracing tools such as Parallel Nsight and Visual Profiler to show complete traces.
- cudaStatus = cudaThreadExit();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaThreadExit failed!");
- return 1;
- }
- getchar();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement