Guest User

waskithebest

a guest
Dec 16th, 2012
292
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.47 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6.  
  7. cudaError_t addWithCuda(int *c, const int *a, const int *b, size_t size);
  8.  
  9. __global__ void addKernel(int *c, const int *a, const int *b)
  10. {
  11.     int i = threadIdx.x;
  12.     c[i] = a[i] + b[i];
  13. }
  14.  
  15. int main()
  16. {
  17.     const int arraySize = 5;
  18.     const int a[arraySize] = { 1, 2, 3, 4, 5 };
  19.     const int b[arraySize] = { 10, 20, 30, 40, 50 };
  20.     int c[arraySize] = { 0 };
  21.  
  22.     // Add vectors in parallel.
  23.     cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
  24.     if (cudaStatus != cudaSuccess) {
  25.         fprintf(stderr, "addWithCuda failed!");
  26.         return 1;
  27.     }
  28.  
  29.     printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
  30.         c[0], c[1], c[2], c[3], c[4]);
  31.  
  32.     // cudaDeviceReset must be called before exiting in order for profiling and
  33.     // tracing tools such as Nsight and Visual Profiler to show complete traces.
  34.     cudaStatus = cudaDeviceReset();
  35.     if (cudaStatus != cudaSuccess) {
  36.         fprintf(stderr, "cudaDeviceReset failed!");
  37.         return 1;
  38.     }
  39.  
  40.     return 0;
  41. }
  42.  
  43. // Helper function for using CUDA to add vectors in parallel.
  44. cudaError_t addWithCuda(int *c, const int *a, const int *b, size_t size)
  45. {
  46.     int *dev_a = 0;
  47.     int *dev_b = 0;
  48.     int *dev_c = 0;
  49.     cudaError_t cudaStatus;
  50.  
  51.     // Choose which GPU to run on, change this on a multi-GPU system.
  52.     cudaStatus = cudaSetDevice(0);
  53.     if (cudaStatus != cudaSuccess) {
  54.         fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
  55.         goto Error;
  56.     }
  57.  
  58.     // Allocate GPU buffers for three vectors (two input, one output)    .
  59.     cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
  60.     if (cudaStatus != cudaSuccess) {
  61.         fprintf(stderr, "cudaMalloc failed!");
  62.         goto Error;
  63.     }
  64.  
  65.     cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
  66.     if (cudaStatus != cudaSuccess) {
  67.         fprintf(stderr, "cudaMalloc failed!");
  68.         goto Error;
  69.     }
  70.  
  71.     cudaStatus = cudaMalloc((void**)&dev_b, size * sizeof(int));
  72.     if (cudaStatus != cudaSuccess) {
  73.         fprintf(stderr, "cudaMalloc failed!");
  74.         goto Error;
  75.     }
  76.  
  77.     // Copy input vectors from host memory to GPU buffers.
  78.     cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
  79.     if (cudaStatus != cudaSuccess) {
  80.         fprintf(stderr, "cudaMemcpy failed!");
  81.         goto Error;
  82.     }
  83.  
  84.     cudaStatus = cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);
  85.     if (cudaStatus != cudaSuccess) {
  86.         fprintf(stderr, "cudaMemcpy failed!");
  87.         goto Error;
  88.     }
  89.  
  90.     // Launch a kernel on the GPU with one thread for each element.
  91.     addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
  92.  
  93.     // cudaDeviceSynchronize waits for the kernel to finish, and returns
  94.     // any errors encountered during the launch.
  95.     cudaStatus = cudaDeviceSynchronize();
  96.     if (cudaStatus != cudaSuccess) {
  97.         fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  98.         goto Error;
  99.     }
  100.  
  101.     // Copy output vector from GPU buffer to host memory.
  102.     cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
  103.     if (cudaStatus != cudaSuccess) {
  104.         fprintf(stderr, "cudaMemcpy failed!");
  105.         goto Error;
  106.     }
  107.  
  108. Error:
  109.     cudaFree(dev_c);
  110.     cudaFree(dev_a);
  111.     cudaFree(dev_b);
  112.    
  113.     return cudaStatus;
  114. }
Add Comment
Please, Sign In to add comment