Advertisement
Guest User

Untitled

a guest
Oct 24th, 2016
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.50 KB | None | 0 0
  1. __global__ void reduce_kernel(float* d_in, int n, float* d_out) {
  2. extern __shared__ float shared_mem[];
  3.  
  4. int tid = threadIdx.x;
  5. int i = threadIdx.x + blockDim.x * blockIdx.x;
  6. if (i < len) {
  7. shared_mem[tid] = d_in[i];
  8. }
  9. else {
  10. shared_mem[tid] = 0;
  11. }
  12. __syncthreads();
  13.  
  14. for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) {
  15. if (tid < s) {
  16. shared_mem[tid] += shared_mem[tid + s];
  17. }
  18. __syncthreads();
  19. }
  20.  
  21. if (tid == 0) {
  22. d_out[blockIdx.x] = shared_mem[0];
  23. }
  24. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement