Advertisement
Guest User

Untitled

a guest
Feb 21st, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.01 KB | None | 0 0
  1.  
  2. template <int BLOCK_SIZE> __global__ void sumKernelStr2(float *c, float*a) {
  3.     __shared__ float sdata[BLOCK_SIZE*2];
  4.     unsigned int tid = 2*threadIdx.x;
  5.     unsigned int i = blockIdx.x * 2*blockDim.x + 2*threadIdx.x;
  6.  
  7.     sdata[tid] = a[i];
  8.     sdata[tid + 1] = a[i + 1];
  9.     __syncthreads();
  10.  
  11.     for (unsigned int odstep = 1; odstep < 2*blockDim.x; odstep *= 2) {
  12.         int index = odstep*tid;
  13.         if (index < 2*blockDim.x) {
  14.             sdata[index] += sdata[index + odstep];
  15.         }
  16.         __syncthreads();
  17.     }
  18.  
  19.     if (tid == 0) c[blockIdx.x] = sdata[0];
  20. }
  21.  
  22. template <int BLOCK_SIZE> __global__ void sumKernelStr3(float *c, float *a) {
  23.     __shared__ float sdata[BLOCK_SIZE * 2];
  24.     unsigned int tid = threadIdx.x;
  25.     unsigned int i = blockIdx.x * 2*blockDim.x + threadIdx.x;
  26.  
  27.     sdata[tid] = a[i];
  28.     sdata[tid + blockDim.x] = a[i + blockDim.x];
  29.     __syncthreads();
  30.  
  31.     for (unsigned int odstep = blockDim.x; odstep > 0; odstep /= 2) {
  32.         if (tid < odstep) sdata[tid] += sdata[tid + odstep];
  33.  
  34.         __syncthreads();
  35.     }
  36.     if (tid == 0) c[blockIdx.x] = sdata[0];
  37. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement