Advertisement
Ludwiq

Untitled

Apr 13th, 2019
196
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.13 KB | None | 0 0
  1. #include <stdio.h>
  2. #define SIZE    1024
  3.  
  4. // __global__ -> tell compiler that this function will be called on gpu and is callable from the host
  5. __global__ void VectorAdd(int *a, int *b, int *c, int n)
  6. {
  7.     int i = threadIdx.x; // identifying in which thread function currenly works
  8.  
  9.     //each element of the vector is independently executed
  10.     // no need for "for loop"
  11.     //for (i = 0; i < n; ++i)
  12.     //  c[i] = a[i] + b[i];
  13.  
  14.     if(i < n)
  15.         c[i] = a[i] + b[i];
  16. }
  17.  
  18. int main()
  19. {
  20.     int *a, *b, *c;
  21.  
  22.     cudaMallocManaged(&a, SIZE * sizeof(int)); // make sure variables are accessible to gpu
  23.     cudaMallocManaged(&b, SIZE * sizeof(int));
  24.     cudaMallocManaged(&c, SIZE * sizeof(int));
  25.  
  26.     for (int i = 0; i < SIZE; ++i)
  27.     {
  28.         a[i] = i;
  29.         b[i] = i;
  30.         c[i] = 0;
  31.     }
  32.  
  33.     // specyfying launch configuration of the kernel in <<< >>>
  34.     // <<< number of thread blocks, number of threads in block >>>
  35.     VectorAdd <<<1, SIZE>>>(a, b, c, SIZE);
  36.  
  37.     // to ensure CPU waits for kernel to complete before continuing
  38.     cudaDeviceSynchronize();
  39.  
  40.     for (int i = 0; i < 10; ++i)
  41.         printf("c[%d] = %d\n", i, c[i]);
  42.  
  43.     cudaFree(a);
  44.     cudaFree(b);
  45.     cudaFree(c);
  46.  
  47.     return 0;
  48. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement