Advertisement
Guest User

Untitled

a guest
Oct 20th, 2014
192
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 0.86 KB | None | 0 0
  1. #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
  2. __kernel void reduce(__global int *in, __global int *out,
  3.              __local int *buf, int n)
  4. {
  5.   //Get ids
  6.   size_t tid = get_local_id(0);
  7.   size_t gid = get_group_id(0);
  8.   size_t idx = get_global_id(0);
  9.   size_t dim = get_local_size(0);
  10.  
  11.   //Level 0
  12.   if (n <= 512) {
  13.    
  14.     //Do an atomic add
  15.     if (idx < n)
  16.       atomic_add(&out[0], in[idx]);
  17.   }
  18.   //Level 1
  19.   else {
  20.     //Read to local memory
  21.     buf[tid] = in[idx];
  22.     barrier(CLK_LOCAL_MEM_FENCE);
  23.    
  24.     //Reduce and filter the local memories
  25.     for (int i = dim, n = 2; i > 1; i /= 2, n *= 2) {
  26.       if (tid % n == 0) {
  27.         buf[tid] += buf[tid+n/2];
  28.       }
  29.       barrier(CLK_LOCAL_MEM_FENCE);
  30.     }
  31.    
  32.     //Use a single worker to write back to input
  33.     if (tid == 0) {
  34.       in[gid] = buf[0];
  35.     }
  36.   }
  37. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement