Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ------------
- --------++--
- --------++--
- ------------
- ------------
- // We break the threads up into four flights:
- //
- // 0: even X and even Y
- // 1: odd X and even Y
- // 2: even X and odd Y
- // 3: odd X and odd Y
- const int flight = ( threadIdx.x % 2 + ( ( threadIdx.y % 2 ) << 1 ) );
- for (int idx = 0; idx < flight; ++idx) {
- __syncthreads();
- }
- doWork( pixel[ threadIdx.x + threadIdx.y * blockDim.x ],
- pixel[ threadIdx.x + 1 + threadIdx.y * blockDim.x ],
- pixel[ threadIdx.x + 1 + (threadIdx.y + 1) * blockDim.x ],
- pixel[ threadIdx.x + (threadIdx.y + 1) * blockDim.x ]);
- for (int idx = 3; idx > flight; --idx) {
- __syncthreads();
- }
- doWork(...);
- __syncthreads("one");
- __syncthreads("two");
- __syncthreads("three");
- __syncthreads("one");
- doWork(...);
- __syncthreads("two");
- __syncthreads("three");
- __syncthreads("one");
- __syncthreads("two");
- doWork(...);
- __syncthreads("three");
- __syncthreads("one");
- __syncthreads("two");
- __syncthreads("three");
- doWork(...);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement