Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.58 KB | None | 0 0
  1. #include <stdio.h>
  2. #define SIZE 1024
  3.  
  4. __global__ void VectorAdd(int* a, int* b, int* c, int n)
  5. {
  6. int i=threadIdx.x;
  7.  
  8. if (i < n) {
  9. c[i] = a[i] + b[i];
  10. }
  11. }
  12.  
  13. int main()
  14. {
  15. int* a, * b, * c;
  16.  
  17. cudaMallocManaged(&a, SIZE * sizeof(int));
  18. cudaMallocManaged(&b, SIZE * sizeof(int));
  19. cudaMallocManaged(&c, SIZE * sizeof(int));
  20.  
  21. for (int i = 0; i < SIZE; ++i)
  22. {
  23. a[i] = i;
  24. b[i] = i;
  25. c[i] = 0;
  26. }
  27.  
  28. VectorAdd <<<1,SIZE>>>(a, b, c, SIZE);
  29.  
  30. cudaDeviceSynchronize();
  31.  
  32.  
  33. for (int i = 0; i < 10; ++i)
  34. printf("c[%d] = %d\n", i, c[i]);
  35.  
  36. cudaFree(a);
  37. cudaFree(b);
  38. cudaFree(c);
  39.  
  40. return 0;
  41. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement