Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.04 KB | None | 0 0
  1. ------------
  2. --------++--
  3. --------++--
  4. ------------
  5. ------------
  6.  
  7. // We break the threads up into four flights:
  8. //
  9. // 0: even X and even Y
  10. // 1: odd X and even Y
  11. // 2: even X and odd Y
  12. // 3: odd X and odd Y
  13. const int flight = ( threadIdx.x % 2 + ( ( threadIdx.y % 2 ) << 1 ) );
  14.  
  15. for (int idx = 0; idx < flight; ++idx) {
  16. __syncthreads();
  17. }
  18.  
  19. doWork( pixel[ threadIdx.x + threadIdx.y * blockDim.x ],
  20. pixel[ threadIdx.x + 1 + threadIdx.y * blockDim.x ],
  21. pixel[ threadIdx.x + 1 + (threadIdx.y + 1) * blockDim.x ],
  22. pixel[ threadIdx.x + (threadIdx.y + 1) * blockDim.x ]);
  23.  
  24. for (int idx = 3; idx > flight; --idx) {
  25. __syncthreads();
  26. }
  27.  
  28. doWork(...);
  29. __syncthreads("one");
  30. __syncthreads("two");
  31. __syncthreads("three");
  32.  
  33. __syncthreads("one");
  34. doWork(...);
  35. __syncthreads("two");
  36. __syncthreads("three");
  37.  
  38. __syncthreads("one");
  39. __syncthreads("two");
  40. doWork(...);
  41. __syncthreads("three");
  42.  
  43. __syncthreads("one");
  44. __syncthreads("two");
  45. __syncthreads("three");
  46. doWork(...);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement