Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
- __kernel void reduce(__global int *in, __global int *out,
- __local int *buf, int n)
- {
- //Get ids
- size_t tid = get_local_id(0);
- size_t gid = get_group_id(0);
- size_t idx = get_global_id(0);
- size_t dim = get_local_size(0);
- //Level 0
- if (n <= 512) {
- //Do an atomic add
- if (idx < n)
- atomic_add(&out[0], in[idx]);
- }
- //Level 1
- else {
- //Read to local memory
- buf[tid] = in[idx];
- barrier(CLK_LOCAL_MEM_FENCE);
- //Reduce and filter the local memories
- for (int i = dim, n = 2; i > 1; i /= 2, n *= 2) {
- if (tid % n == 0) {
- buf[tid] += buf[tid+n/2];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- //Use a single worker to write back to input
- if (tid == 0) {
- in[gid] = buf[0];
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement