Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int pout, pin;
- pout = 0;
- pin = 1;
- // Load input into shared memory.
- // This is exclusive scan, so shift right by one
- // and set first element to 0
- if (idx <= n) {
- buf[pout*dim+tid] = (idx > 0) ? in[idx-1] : 0;
- barrier(CLK_LOCAL_MEM_FENCE);
- for (int offset = 1; offset < dim; offset *= 2) {
- pout = 1 - pout; // swap double buffer indices
- pin = 1 - pout;
- if (offset <= pin*dim+tid) {
- if (tid >= offset) {
- buf[pout*dim+tid] += buf[pin*dim+tid - offset];
- } else {
- buf[pout*dim+tid] = buf[pin*dim+tid];
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- if (idx < pout*dim+dim+1) {
- out[idx] = buf[pout*dim+tid+1]; // write output
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement