tomilov

StdDevs for 8x8 tiles of input image

Oct 12th, 2020 (edited)
1,469
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. constexpr size_t tileSize = 8;
  2.  
  3. const size_t width = ...;
  4. const size_t height = ...;
  5. thrust::cuda::pointer<const float3> p = ...;
  6.  
  7. assert(width % tileSize == 0);
  8. assert(height % tileSize == 0);
  9.  
  10. thrust::cuda::vector<float3> sums;
  11. sums.resize((width / tileSize) * (height / tileSize));
  12. auto c = thrust::make_counting_iterator(0u);
  13. auto getTileIndex = [width, height] __device__ (uint i) -> uint
  14. {
  15.     uint block = i / (tileSize * tileSize);
  16.     i %= tileSize * tileSize;
  17. #if 0
  18.     uint x = block % (width / tileSize);
  19.     uint y = block / (width / tileSize);
  20.     return (tileSize * tileSize) * (x + y * (width / tileSize)) + (i / tileSize) * width + (i % tileSize);
  21. #else
  22.     return (tileSize * tileSize) * block + (i / tileSize) * width + (i % tileSize);
  23. #endif
  24. };
  25. auto tile = thrust::make_transform_iterator(c, getTileIndex);
  26. auto block = thrust::make_transform_iterator(c, [] __device__ (uint i) -> uint { return i / (tileSize * tileSize); });
  27. auto input = thrust::make_permutation_iterator(p, tile);
  28. thrust::reduce_by_key(block, thrust::next(block, width * height), input, thrust::make_discard_iterator(), sums.begin());
  29.  
  30. thrust::cuda::vector<float> stdDevs;
  31. stdDevs.resize(sums.size());
  32. auto sumAndInput = thrust::make_zip_iterator(thrust::make_permutation_iterator(sums.cbegin(), block), input);
  33. auto sqr = thrust::make_transform_iterator(sumAndInput, [tileSize] __device__ (thrust::tuple<float3, float3> sumAndInput) -> float3
  34. {
  35.     float3 diff = thrust::get<1>(sumAndInput) - thurst::get<0>(sumAndInput) / (tileSize * tileSize);
  36.     return diff * diff;
  37. });
  38. auto sqrt = [] __device__ (float3 sumSqr) -> float { return thrust::sqrt((sumSqr.x + sumSqr.y + sumSqr.z) / 3.0f); };
  39. auto output = thrust::make_output_transform_iterator(stdDevs.begin(), sqrt);
  40. thrust::reduce_by_key(block, thrust::next(block, width * height), sqr, thrust::make_discard_iterator(), output);
RAW Paste Data