Advertisement
Guest User

Untitled

a guest
Jun 24th, 2017
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.03 KB | None | 0 0
  1. #include <stdio.h>
  2.  
  3. __global__ void fnKern(int *pA, int *pB, int *pSum)
  4. {
  5.     pSum[threadIdx.x] = pA[threadIdx.x] + pB[threadIdx.x];
  6. }
  7.  
  8. int main()
  9. {
  10.     const int N = 512;
  11.     int SIZE = N * sizeof(float);
  12.     int a[N], b[N], sum[N], aa[N], bb[N];
  13.  
  14.     for (int i = 0; i < N; i++)
  15.     {
  16.         a[i] = b[i] = 100;
  17.         //sum[i] = 0;
  18.     }
  19.    
  20.     printf("A: %d\nB: %d\n", a[0], b[0]);
  21.  
  22.     int *pA, *pB, *pSum;
  23.     cudaMalloc((void**)&pA, SIZE);
  24.     cudaMalloc((void**)&pB, SIZE);
  25.     cudaMalloc((void**)&pSum, SIZE);
  26.  
  27.     cudaMemcpy(pA, a, SIZE, cudaMemcpyHostToDevice);
  28.     cudaMemcpy(pB, b, SIZE, cudaMemcpyHostToDevice);
  29.     //cudaMemcpy(pSum, sum, SIZE, cudaMemcpyHostToDevice);
  30.  
  31.     fnKern<<<1, N>>>(pA, pB, pSum);
  32.  
  33.     cudaMemcpy(aa, pA, SIZE, cudaMemcpyDeviceToHost);
  34.     cudaMemcpy(bb, pB, SIZE, cudaMemcpyDeviceToHost);
  35.     cudaMemcpy(sum, pSum, SIZE, cudaMemcpyDeviceToHost);
  36.  
  37.     printf("A: %d\nB: %d\nSum: %d\n", aa[0], bb[0], sum[0]);
  38.     return 0;
  39. }
  40.  
  41.  
  42. /* OUTPUT:
  43. A: 100
  44. B: 100
  45. A: 0
  46. B: 0
  47. Sum: 0
  48. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement