Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int pout, pin;
- pout = 0;
- pin = 1;
- // Load input into shared memory.
- // This is exclusive scan, so shift right by one
- // and set first element to 0
- if (idx <= n) {
- buf[tid] = 0;
- buf[dim+tid] = 0;
- buf[pout*dim+tid] = in[idx];
- barrier(CLK_LOCAL_MEM_FENCE);
- for (int offset = 1; offset < dim; offset *= 2) {
- pout = 1 - pout; // swap double buffer indices
- pin = 1 - pout;
- if (tid >= offset) {
- buf[pout*dim+tid] += buf[pin*dim+tid - offset];
- } else {
- buf[pout*dim+tid] = buf[pin*dim+tid];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- out[idx] = buf[pout*dim+tid]; // write output
- barrier(CLK_LOCAL_MEM_FENCE);
- if (tid == n-1) {
- bout[gid] = buf[pout*dim+tid]; // write output
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement