Advertisement
m4ly

CUDA

Jun 15th, 2014
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 4.28 KB | None | 0 0
  1. // Dawid Mocek (sekcja 15 )
  2. // Alina Litwiak (sekcja 17, chyba)
  3. // Łukasz Sałajczyk (sekcja 15)
  4. //
  5. #include "cuda_runtime.h"
  6. #include "device_launch_parameters.h"
  7. #include <stdio.h>
  8.  
  9. const int arraySize = 2;
  10.  
  11. cudaError_t addWithCuda(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], size_t size);
  12.  
  13. __global__ void addKernel(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], const int size)
  14. {
  15.     // Places in the matrix
  16.     int i = threadIdx.x + blockIdx.x * blockDim.x; //
  17.     int j = threadIdx.y + blockIdx.y * blockDim.y; //
  18.    
  19.     if(i < size && j< size) {
  20.         // Matrix suming
  21.         c[i][j] = a[i][j] + b[i][j];
  22.     }
  23. }
  24.  
  25. // Helper function for using CUDA to add vectors in parallel.
  26. cudaError_t addWithCuda(int c[arraySize][arraySize], const int a[arraySize][arraySize], const int b[arraySize][arraySize], size_t size)
  27. {
  28.     int *dev_a = 0;
  29.     int *dev_b = 0;
  30.     int *dev_c = 0;
  31.     cudaError_t cudaStatus;
  32.     int blocks;
  33.  
  34.     // Choose which GPU to run on, change this on a multi-GPU system.
  35.     cudaStatus = cudaSetDevice(0);
  36.     if (cudaStatus != cudaSuccess) {
  37.         fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
  38.         goto Error;
  39.     }
  40.  
  41.     // Allocate GPU buffers for three vectors (two input, one output)  
  42.     cudaStatus = cudaMalloc((void**)&dev_c, size * size * sizeof(int));
  43.     if (cudaStatus != cudaSuccess) {
  44.         fprintf(stderr, "cudaMalloc failed!");
  45.         goto Error;
  46.     }
  47.  
  48.     cudaStatus = cudaMalloc((void**)&dev_a, size * size * sizeof(int));
  49.     if (cudaStatus != cudaSuccess) {
  50.         fprintf(stderr, "cudaMalloc failed!");
  51.         goto Error;
  52.     }
  53.  
  54.     cudaStatus = cudaMalloc((void**)&dev_b, size * size * sizeof(int));
  55.     if (cudaStatus != cudaSuccess) {
  56.         fprintf(stderr, "cudaMalloc failed!");
  57.         goto Error;
  58.     }
  59.  
  60.     // Copying arrays
  61.     cudaStatus = cudaMemcpy(dev_a, a, size * size * sizeof(int), cudaMemcpyHostToDevice);
  62.     if (cudaStatus != cudaSuccess) {
  63.         fprintf(stderr, "cudaMemcpy failed!");
  64.         goto Error;
  65.     }
  66.  
  67.     cudaStatus = cudaMemcpy(dev_b, b, size * size * sizeof(int), cudaMemcpyHostToDevice);
  68.     if (cudaStatus != cudaSuccess) {
  69.         fprintf(stderr, "cudaMemcpy failed!");
  70.         goto Error;
  71.     }
  72.  
  73.     //Block Size: 5x5
  74.     dim3 blockSize(5, 5);
  75.    
  76.     // Counting number of blocks
  77.     blocks = (size / 5);
  78.     if(size % 5 != 0) blocks++;
  79.  
  80.     dim3 numberOfBlocks(blocks,blocks);
  81.  
  82.     // Core fuctnion
  83.     addKernel<<<numberOfBlocks, blockSize>>>((int(*)[arraySize])dev_c, (int(*)[arraySize])dev_a, (int(*)[arraySize])dev_b,size);
  84.  
  85.     // cudaThreadSynchronize waits for the kernel to finish, and returns
  86.     // any errors encountered during the launch.
  87.     cudaStatus = cudaThreadSynchronize();
  88.     if (cudaStatus != cudaSuccess) {
  89.         fprintf(stderr, "cudaThreadSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  90.         goto Error;
  91.     }
  92.  
  93.     // Copy output vector from GPU buffer to host memory.
  94.     cudaStatus = cudaMemcpy(c, dev_c, size * size * sizeof(int), cudaMemcpyDeviceToHost);
  95.     if (cudaStatus != cudaSuccess) {
  96.         fprintf(stderr, "cudaMemcpy failed!");
  97.         goto Error;
  98.     }
  99.  
  100. Error:
  101.     cudaFree(dev_c);
  102.     cudaFree(dev_a);
  103.     cudaFree(dev_b);
  104.  
  105.     return cudaStatus;
  106. }
  107.  
  108. int main()
  109. {
  110.  
  111.     //A
  112.     int a[arraySize][arraySize] = {{4,5},{6,7}};
  113.    
  114.     //B
  115.     int b[arraySize][arraySize] = {{9,9999999},{1,2}};
  116.    
  117.     //Results
  118.     int c[arraySize][arraySize] = { 0 };
  119.  
  120.     int i, j;
  121.  
  122.     cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
  123.     if (cudaStatus != cudaSuccess) {
  124.         fprintf(stderr, "addWithCuda failed!");
  125.         return 1;
  126.     }
  127.  
  128.     //Wyswietlanie macierzy
  129.     printf("Matrix A:\n");
  130.     for(i = 0; i < arraySize; i++){
  131.         for(j = 0; j < arraySize; j++){
  132.             printf("%d ",a[i][j]);
  133.         }
  134.         printf("\n");
  135.     }
  136.  
  137.     printf("Matrix B:\n");
  138.     for(i = 0; i < arraySize; i++){
  139.         for(j = 0; j < arraySize; j++){
  140.             printf("%d ",b[i][j]);
  141.         }
  142.         printf("\n");
  143.     }
  144.    
  145.     printf("Rezults:\n");
  146.     for(i = 0; i < arraySize; i++){
  147.         for(j = 0; j < arraySize; j++){
  148.             printf("%d ",c[i][j]);
  149.         }
  150.         printf("\n");
  151.     }
  152.  
  153.     // cudaThreadExit must be called before exiting in order for profiling and
  154.     // tracing tools such as Parallel Nsight and Visual Profiler to show complete traces.
  155.     cudaStatus = cudaThreadExit();
  156.     if (cudaStatus != cudaSuccess) {
  157.         fprintf(stderr, "cudaThreadExit failed!");
  158.         return 1;
  159.     }
  160.    
  161.  
  162.     getchar();
  163.     return 0;
  164. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement