Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- /* vector elements */
- #define N 512
- /* definition of an function executed on GPU */
- __global__ void vecAdd(int *A, int *B,int *C)
- {
- int i = threadIdx.x;
- C[i] = A[i] + B[i];
- }
- /* prints contents */
- void Print(const int *A, int size)
- {
- for(int i=0;i<size;i++)
- {
- printf("\t%d", A[i]);
- }
- printf("\n");
- }
- /* an entry point */
- int main(int argc, char *argv[])
- {
- int A[N], B[N], C[N],i;
- int *devA,*devB,*devC;
- /* device memory allocations */
- cudaMalloc((void**)&devA, N*sizeof(int));
- cudaMalloc((void**)&devB, N*sizeof(int));
- cudaMalloc((void**)&devC, N*sizeof(int));
- /* setting initialize values */
- for(i=0;i<N;i++)
- {
- A[i]=i;
- B[i]=i;
- }
- for(i=0;i<N;i++)
- {
- C[i]=0;
- }
- printf("\nVecotr A is:\n");
- Print( A, N);
- printf("\nVector B is:\n");
- Print(B, N);
- /* data copying from host to device */
- cudaMemcpy(devA, A, N*sizeof(int), cudaMemcpyHostToDevice);
- cudaMemcpy(devB, B, N*sizeof(int), cudaMemcpyHostToDevice);
- cudaMemcpy(devC, C, N*sizeof(int), cudaMemcpyHostToDevice);
- /* execution on GPU */
- vecAdd<<<1, N>>>(devA, devB, devC);
- /* data copying from device to host */
- cudaMemcpy(C, devC, N*sizeof(int), cudaMemcpyDeviceToHost);
- printf("\nVector A+B is\n\n");
- Print(C, N);
- /* freeing device memory */
- cudaFree((void**)&devA);
- cudaFree((void**)&devB);
- cudaFree((void**)&devC);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement