Advertisement
hejmus

Untitled

Apr 1st, 2012
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 0.90 KB | None | 0 0
  1. __global__ void VecAdd(float* A, float* B, float* C, int N)
  2. {
  3.     int i = blockDim.x * blockIdx.x + threadIdx.x;
  4.     if (i < N)
  5.         C[i] = A[i] + B[i];
  6.  }
  7.  
  8. int main()
  9. {
  10.     int N = ...;
  11.     size_t size = N * sizeof(float);
  12.    
  13.     float* h_A = (float*)malloc(size);
  14.     float* h_B = (float*)malloc(size);
  15.  
  16.     //(wypełnienie wektorów danymi)
  17.  
  18.     float* d_A; cudaMalloc(&d_A, size);
  19.     float* d_B; cudaMalloc(&d_B, size);
  20.     float* d_C; cudaMalloc(&d_C, size);
  21.    
  22.     cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
  23.     cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
  24.    
  25.     int threadsPerBlock = 256;
  26.     int blocksPerGrid = (N + threadsPerBlock – 1) / threadsPerBlock;
  27.    
  28.     VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);
  29.    
  30.     cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
  31.    
  32.     cudaFree(d_A); cudaFree(d_B); cudaFree(d_C);
  33. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement