Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __global__ void VecAdd(float* A, float* B, float* C, int N)
- {
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- if (i < N)
- C[i] = A[i] + B[i];
- }
- int main()
- {
- int N = ...;
- size_t size = N * sizeof(float);
- float* h_A = (float*)malloc(size);
- float* h_B = (float*)malloc(size);
- //(wypełnienie wektorów danymi)
- float* d_A; cudaMalloc(&d_A, size);
- float* d_B; cudaMalloc(&d_B, size);
- float* d_C; cudaMalloc(&d_C, size);
- cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
- cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
- int threadsPerBlock = 256;
- int blocksPerGrid = (N + threadsPerBlock – 1) / threadsPerBlock;
- VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);
- cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
- cudaFree(d_A); cudaFree(d_B); cudaFree(d_C);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement