Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- __global__ void fnKern(int *pA, int *pB, int *pSum)
- {
- pSum[threadIdx.x] = pA[threadIdx.x] + pB[threadIdx.x];
- }
- int main()
- {
- const int N = 512;
- int SIZE = N * sizeof(float);
- int a[N], b[N], sum[N], aa[N], bb[N];
- for (int i = 0; i < N; i++)
- {
- a[i] = b[i] = 100;
- //sum[i] = 0;
- }
- printf("A: %d\nB: %d\n", a[0], b[0]);
- int *pA, *pB, *pSum;
- cudaMalloc((void**)&pA, SIZE);
- cudaMalloc((void**)&pB, SIZE);
- cudaMalloc((void**)&pSum, SIZE);
- cudaMemcpy(pA, a, SIZE, cudaMemcpyHostToDevice);
- cudaMemcpy(pB, b, SIZE, cudaMemcpyHostToDevice);
- //cudaMemcpy(pSum, sum, SIZE, cudaMemcpyHostToDevice);
- fnKern<<<1, N>>>(pA, pB, pSum);
- cudaMemcpy(aa, pA, SIZE, cudaMemcpyDeviceToHost);
- cudaMemcpy(bb, pB, SIZE, cudaMemcpyDeviceToHost);
- cudaMemcpy(sum, pSum, SIZE, cudaMemcpyDeviceToHost);
- printf("A: %d\nB: %d\nSum: %d\n", aa[0], bb[0], sum[0]);
- return 0;
- }
- /* OUTPUT:
- A: 100
- B: 100
- A: 0
- B: 0
- Sum: 0
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement