SHARE
TWEET

Untitled

a guest May 24th, 2019 89 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //Autorzy: Michał Gudyka, Wojciech Wichary
  2. //Laboratorium CUDA 24.05.2019
  3. //Trademark, All Rights Reserved
  4.  
  5. #include "cuda_runtime.h"
  6. #include "device_launch_parameters.h"
  7.  
  8. #include <stdio.h>
  9. #include <conio.h>
  10.  
  11. const int array_size = 5;
  12.  
  13. cudaError_t addWithCuda(int c[array_size][array_size], const int a[array_size][array_size], const int b[array_size][array_size], size_t size);
  14.  
  15. //Tworzenie wektorow dwuwmiarowych
  16. __global__ void addKernel(int c[array_size][array_size], const int a[array_size][array_size], const int b[array_size][array_size], const int size)
  17. {
  18.         int i = threadIdx.x + blockIdx.x * blockDim.x;
  19.         int j = threadIdx.y + blockIdx.y * blockDim.y;
  20.         if (i < size && j < size) {
  21.             c[i][j] = a[i][j] + b[i][j];
  22.         }
  23. }
  24.  
  25. // Helper function for using CUDA to add vectors in parallel.
  26. cudaError_t addWithCuda(int c[array_size][array_size], const int a[array_size][array_size], const int b[array_size][array_size], size_t size)
  27. {
  28.     int *dev_a = 0;
  29.     int *dev_b = 0;
  30.     int *dev_c = 0;
  31.     cudaError_t cudaStatus;
  32.  
  33.     // Choose which GPU to run on, change this on a multi-GPU system.
  34.     cudaStatus = cudaSetDevice(0);
  35.     if (cudaStatus != cudaSuccess) {
  36.         fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
  37.         goto Error;
  38.     }
  39.  
  40.     // Allocate GPU buffers for three vectors (two input, one output)  
  41.     //Memory allocation
  42.     cudaStatus = cudaMalloc((void**)&dev_c, size * size * sizeof(int));
  43.     if (cudaStatus != cudaSuccess) {
  44.         fprintf(stderr, "cudaMalloc failed!");
  45.         goto Error;
  46.     }
  47.  
  48.     cudaStatus = cudaMalloc((void**)&dev_a, size * size * sizeof(int));
  49.     if (cudaStatus != cudaSuccess) {
  50.         fprintf(stderr, "cudaMalloc failed!");
  51.         goto Error;
  52.     }
  53.  
  54.     cudaStatus = cudaMalloc((void**)&dev_b, size * size * sizeof(int));
  55.     if (cudaStatus != cudaSuccess) {
  56.         fprintf(stderr, "cudaMalloc failed!");
  57.         goto Error;
  58.     }
  59.  
  60.     // Copy input vectors from host memory to GPU buffers.
  61.     cudaStatus = cudaMemcpy(dev_a, a, size * size * sizeof(int), cudaMemcpyHostToDevice);
  62.     if (cudaStatus != cudaSuccess) {
  63.         fprintf(stderr, "cudaMemcpy failed!");
  64.         goto Error;
  65.     }
  66.  
  67.     cudaStatus = cudaMemcpy(dev_b, b, size * size * sizeof(int), cudaMemcpyHostToDevice);
  68.     if (cudaStatus != cudaSuccess) {
  69.         fprintf(stderr, "cudaMemcpy failed!");
  70.         goto Error;
  71.     }
  72.  
  73.     //przekazanie rozmiaru bloków do Kernela
  74.     dim3 block_size(2, 2); //size of blocks
  75.     int blocks = (array_size / size);
  76.     if (size % array_size != 0) {
  77.            blocks++;
  78.     }
  79.     dim3 blocks_number(blocks, blocks); //number of blocks
  80.  
  81.     // Launch a kernel on the GPU. Send the blocks for summing.
  82.     addKernel<<<blocks_number, block_size>>>((int(*)[array_size])dev_c, (int(*)[array_size])dev_a, (int(*)[array_size])dev_b,size);
  83.  
  84.     // cudaThreadSynchronize waits for the kernel to finish, and returns
  85.     // any errors encountered during the launch.
  86.     cudaStatus = cudaThreadSynchronize();
  87.     if (cudaStatus != cudaSuccess) {
  88.         fprintf(stderr, "cudaThreadSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  89.         goto Error;
  90.     }
  91.  
  92.     // Copy output vector from GPU buffer to host memory.
  93.     cudaStatus = cudaMemcpy(c, dev_c, size * size * sizeof(int), cudaMemcpyDeviceToHost);
  94.     if (cudaStatus != cudaSuccess) {
  95.         fprintf(stderr, "cudaMemcpy failed!");
  96.         goto Error;
  97.     }
  98.  
  99. Error:
  100.     cudaFree(dev_c);
  101.     cudaFree(dev_a);
  102.     cudaFree(dev_b);
  103.    
  104.     return cudaStatus;
  105. }
  106. int main()
  107. {
  108.  
  109.     //Tworzenie maciery o rozmiarze 5x5
  110.     int a[array_size][array_size] =
  111.       { {1, 2, 3, 4, 5,},
  112.         {1, 1, 1, 1, 1},
  113.         {2, 2, 2, 2, 2},
  114.         {1, 1, 1, 1, 1},
  115.         {2, 2, 2, 2, 2}
  116.     };
  117.     int b[array_size][array_size] =
  118.       { {6, 7, 8, 9, 10},
  119.         {1, 1, 1, 1, 1},
  120.         {2, 2, 2, 2, 2},
  121.         {1, 1, 1, 1, 1},
  122.         {2, 2, 2, 2, 2}
  123.     };
  124.     int c[array_size][array_size] = { 0 };
  125.     int i, j;
  126.  
  127.     cudaError_t cudaStatus = addWithCuda(c, a, b, array_size);
  128.     if (cudaStatus != cudaSuccess) {
  129.         fprintf(stderr, "addWithCuda failed!");
  130.         return 1;
  131.     }
  132.  
  133.     //Wyświetlanie macierzy
  134.     printf("A: \n");
  135.     for(i = 0; i < array_size; i++){
  136.             for (j = 0; j < array_size; j++){
  137.                 printf("%d ",a[i][j]);
  138.             }
  139.             printf("\n");
  140.     }
  141.     printf("\n");
  142.     printf("B: \n");
  143.     for(i = 0; i < array_size; i++){
  144.             for (j = 0; j < array_size; j++){
  145.                 printf("%d ",b[i][j]);
  146.             }
  147.             printf("\n");
  148.     }
  149.  
  150.     printf("\n");
  151.     printf("Wynik dodawania: \n");
  152.     for(i = 0; i<array_size; i++){
  153.             for(j = 0; j<array_size; j++){
  154.                     printf("%d ",c[i][j]);
  155.             }
  156.             printf("\n");
  157.     }
  158.  
  159.     // cudaThreadExit must be called before exiting in order for profiling and
  160.     // tracing tools such as Parallel Nsight and Visual Profiler to show complete traces.
  161.     cudaStatus = cudaThreadExit();
  162.     if (cudaStatus != cudaSuccess) {
  163.         fprintf(stderr, "cudaThreadExit failed!");
  164.         return 1;
  165.     }
  166.  
  167.     system("pause");
  168.     return 0;
  169. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top