SHARE
TWEET

Untitled

a guest Oct 21st, 2019 83 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6.  
  7. cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);
  8.  
  9.  
  10. __global__ void addKernel(int *c, const int *a, const int *b)
  11. {
  12.     int i = threadIdx.x;
  13.     c[i] = a[i] + b[i];
  14. }
  15.  
  16. int main()
  17. {
  18.     cudaEvent_t start, stop;
  19.     const int arraySize = 10000;
  20.    // const int a[arraySize] = { 1, 2, 3, 4, 5 };
  21.     //const int b[arraySize] = { 10, 20, 30, 40, 50 };
  22.      int a[arraySize] = {};
  23.      int b[arraySize] = {};
  24.     int c[arraySize] = { 0 };
  25.  
  26.  
  27.  
  28.     cudaEventCreate(&start);
  29.     cudaEventCreate(&stop);
  30.  
  31.  
  32.     int i;
  33.  
  34.  
  35.     for (i = 0; i < arraySize; i++)
  36.     {
  37.         a[i] = i;
  38.         b[i] = 300+i;
  39.     }
  40.  
  41.     cudaEventRecord(start, 0);
  42.     cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
  43.     if (cudaStatus != cudaSuccess) {
  44.         fprintf(stderr, "addWithCuda failed!");
  45.         return 1;
  46.     }
  47.  
  48.     cudaEventRecord(stop, 0);
  49.     cudaEventSynchronize(stop);
  50.  
  51.     float elapsedTime; // Initialize elapsedTime;
  52.  
  53.     cudaEventElapsedTime(&elapsedTime, start, stop);
  54.     printf("Execution Time with GPU: %f \n", elapsedTime);
  55.    
  56.     cudaEventRecord(start, 0);
  57.  
  58.     for (i = 0; i < arraySize; i++)
  59.     {
  60.         c[i] = a[i] + b[i];
  61.     }
  62.    
  63.     cudaEventRecord(stop, 0);
  64.     cudaEventSynchronize(stop);
  65.  
  66.     float elapTime; // Initialize elapsedTime;
  67.  
  68.     cudaEventElapsedTime(&elapTime, start, stop);
  69.     printf("Execution Time with CPU: %f", elapTime);
  70.  
  71.  
  72.  
  73.  
  74.     // cudaDeviceReset must be called before exiting in order for profiling and
  75.     // tracing tools such as Nsight and Visual Profiler to show complete traces.
  76.     cudaStatus = cudaDeviceReset();
  77.     if (cudaStatus != cudaSuccess) {
  78.         fprintf(stderr, "cudaDeviceReset failed!");
  79.         return 1;
  80.     }
  81.  
  82.     return 0;
  83. }
  84.  
  85. // Helper function for using CUDA to add vectors in parallel.
  86. cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size)
  87. {
  88.     int *dev_a = 0;
  89.     int *dev_b = 0;
  90.     int *dev_c = 0;
  91.     cudaError_t cudaStatus;
  92.  
  93.     // Choose which GPU to run on, change this on a multi-GPU system.
  94.     cudaStatus = cudaSetDevice(0);
  95.     if (cudaStatus != cudaSuccess) {
  96.         fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
  97.         goto Error;
  98.     }
  99.  
  100.     // Allocate GPU buffers for three vectors (two input, one output)    .
  101.     cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
  102.     if (cudaStatus != cudaSuccess) {
  103.         fprintf(stderr, "cudaMalloc failed!");
  104.         goto Error;
  105.     }
  106.  
  107.     cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
  108.     if (cudaStatus != cudaSuccess) {
  109.         fprintf(stderr, "cudaMalloc failed!");
  110.         goto Error;
  111.     }
  112.  
  113.     cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
  114.     if (cudaStatus != cudaSuccess) {
  115.         fprintf(stderr, "cudaMalloc failed!");
  116.         goto Error;
  117.     }
  118.  
  119.     // Copy input vectors from host memory to GPU buffers.
  120.     cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
  121.     if (cudaStatus != cudaSuccess) {
  122.         fprintf(stderr, "cudaMemcpy failed!");
  123.         goto Error;
  124.     }
  125.  
  126.     cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
  127.     if (cudaStatus != cudaSuccess) {
  128.         fprintf(stderr, "cudaMemcpy failed!");
  129.         goto Error;
  130.     }
  131.  
  132.     // Launch a kernel on the GPU with one thread for each element.
  133.     addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
  134.  
  135.     // Check for any errors launching the kernel
  136.     cudaStatus = cudaGetLastError();
  137.     if (cudaStatus != cudaSuccess) {
  138.         fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
  139.         goto Error;
  140.     }
  141.    
  142.     // cudaDeviceSynchronize waits for the kernel to finish, and returns
  143.     // any errors encountered during the launch.
  144.     cudaStatus = cudaDeviceSynchronize();
  145.     if (cudaStatus != cudaSuccess) {
  146.         fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  147.         goto Error;
  148.     }
  149.  
  150.     // Copy output vector from GPU buffer to host memory.
  151.     cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
  152.     if (cudaStatus != cudaSuccess) {
  153.         fprintf(stderr, "cudaMemcpy failed!");
  154.         goto Error;
  155.     }
  156.  
  157. Error:
  158.     cudaFree(dev_c);
  159.     cudaFree(dev_a);
  160.     cudaFree(dev_b);
  161.    
  162.     return cudaStatus;
  163. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top