Advertisement
Guest User

Untitled

a guest
Jan 21st, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 0.72 KB | None | 0 0
  1. template <int BLOCK_SIZE> __global__ void sumKernelStr2(float *c, float*a) {
  2.     unsigned int tid = 2*threadIdx.x;
  3.     unsigned int i = blockIdx.x * 2*blockDim.x + 2*threadIdx.x;
  4.  
  5.  
  6.     for (unsigned int odstep = 1; odstep < 2*blockDim.x; odstep *= 2) {
  7.         int index = odstep*tid;
  8.         if (index < 2*blockDim.x) {
  9.             a[index] += a[index + odstep];
  10.         }
  11.     }
  12.  
  13.     if (tid == 0) c[blockIdx.x] = a[0];
  14. }
  15.  
  16. template <int BLOCK_SIZE> __global__ void sumKernelStr3(float *c, float *a) {
  17.     unsigned int tid = threadIdx.x;
  18.     unsigned int i = blockIdx.x * 2*blockDim.x + threadIdx.x;
  19.  
  20.     for (unsigned int odstep = blockDim.x; odstep > 0; odstep /= 2) {
  21.         if (tid < odstep) a[tid] += a[tid + odstep];
  22.     }
  23.     if (tid == 0) c[blockIdx.x] = sdata[0];
  24. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement