Advertisement
Zgragselus

Mipmap.hlsli - Multiple

May 27th, 2025
17
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.33 KB | None | 0 0
  1. #ifndef NON_POWER_OF_TWO
  2. #define NON_POWER_OF_TWO 0
  3. #endif
  4.  
  5. SamplerState srcSampler : register(s0);
  6. Texture2D<float4> srcLevel : register(t0);
  7. RWTexture2D<float4> mipLevel1 : register(u0);
  8. RWTexture2D<float4> mipLevel2 : register(u1);
  9. RWTexture2D<float4> mipLevel3 : register(u2);
  10. RWTexture2D<float4> mipLevel4 : register(u3);
  11.  
  12. cbuffer InputMiplevels : register(b0)
  13. {
  14. uint srcMiplevel;
  15. uint miplevels;
  16. float2 texelSize;
  17. }
  18.  
  19. groupshared float4 tmp[64];
  20.  
  21. void StoreColor(uint idx, float4 color)
  22. {
  23. tmp[idx] = color;
  24. }
  25.  
  26. float4 LoadColor(uint idx)
  27. {
  28. return tmp[idx];
  29. }
  30.  
  31. [numthreads(8, 8, 1)]
  32. void GenerateMipmaps(uint GI : SV_GroupIndex, uint3 DTid : SV_DispatchThreadID)
  33. {
  34. // One bilinear sample is insufficient when scaling down by more than 2x.
  35. // You will slightly undersample in the case where the source dimension
  36. // is odd. This is why it's a really good idea to only generate mips on
  37. // power-of-two sized textures. Trying to handle the undersampling case
  38. // will force this shader to be slower and more complicated as it will
  39. // have to take more source texture samples.
  40. #if NON_POWER_OF_TWO == 0
  41. float2 uv = (DTid.xy + 0.5f) * texelSize;
  42. float4 src1 = srcLevel.SampleLevel(srcSampler, uv, srcMiplevel);
  43. #elif NON_POWER_OF_TWO == 1
  44. // > 2:1 in X dimension
  45. // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
  46. // horizontally.
  47. float2 uv1 = (DTid.xy + float2(0.25, 0.5)) * texelSize;
  48. float2 offset = texelSize * float2(0.5, 0.0);
  49. float4 src1 = 0.5 * (srcLevel.SampleLevel(srcSampler, uv1, srcMiplevel) +
  50. srcLevel.SampleLevel(srcSampler, uv1 + offset, srcMiplevel));
  51. #elif NON_POWER_OF_TWO == 2
  52. // > 2:1 in Y dimension
  53. // Use 2 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
  54. // vertically.
  55. float2 uv1 = (DTid.xy + float2(0.5, 0.25)) * texelSize;
  56. float2 offset = texelSize * float2(0.0, 0.5);
  57. float4 src1 = 0.5 * (srcLevel.SampleLevel(srcSampler, uv1, srcMiplevel) +
  58. srcLevel.SampleLevel(srcSampler, uv1 + offset, srcMiplevel));
  59. #elif NON_POWER_OF_TWO == 3
  60. // > 2:1 in in both dimensions
  61. // Use 4 bilinear samples to guarantee we don't undersample when downsizing by more than 2x
  62. // in both directions.
  63. float2 uv1 = (DTid.xy + float2(0.25, 0.25)) * texelSize;
  64. float2 offset = texelSize * 0.5;
  65. float4 src1 = srcLevel.SampleLevel(srcSampler, uv1, srcMiplevel);
  66. src1 += srcLevel.SampleLevel(srcSampler, uv1 + float2(offset.x, 0.0), srcMiplevel);
  67. src1 += srcLevel.SampleLevel(srcSampler, uv1 + float2(0.0, offset.y), srcMiplevel);
  68. src1 += srcLevel.SampleLevel(srcSampler, uv1 + float2(offset.x, offset.y), srcMiplevel);
  69. src1 *= 0.25;
  70. #endif
  71.  
  72. mipLevel1[DTid.xy] = PackColor(Src1);
  73.  
  74. // A scalar (constant) branch can exit all threads coherently.
  75. if (miplevels == 1)
  76. return;
  77.  
  78. // Without lane swizzle operations, the only way to share data with other
  79. // threads is through LDS.
  80. StoreColor(GI, Src1);
  81.  
  82. // This guarantees all LDS writes are complete and that all threads have
  83. // executed all instructions so far (and therefore have issued their LDS
  84. // write instructions.)
  85. GroupMemoryBarrierWithGroupSync();
  86.  
  87. // With low three bits for X and high three bits for Y, this bit mask
  88. // (binary: 001001) checks that X and Y are even.
  89. if ((GI & 0x9) == 0)
  90. {
  91. float4 src2 = LoadColor(GI + 0x01);
  92. float4 src3 = LoadColor(GI + 0x08);
  93. float4 src4 = LoadColor(GI + 0x09);
  94. src1 = 0.25 * (src1 + src2 + src3 + src4);
  95.  
  96. mipLevel2[DTid.xy / 2] = PackColor(src1);
  97. StoreColor(GI, src1);
  98. }
  99.  
  100. if (miplevels == 2)
  101. return;
  102.  
  103. GroupMemoryBarrierWithGroupSync();
  104.  
  105. // This bit mask (binary: 011011) checks that X and Y are multiples of four.
  106. if ((GI & 0x1B) == 0)
  107. {
  108. float4 src2 = LoadColor(GI + 0x02);
  109. float4 src3 = LoadColor(GI + 0x10);
  110. float4 src4 = LoadColor(GI + 0x12);
  111. src1 = 0.25 * (src1 + src2 + src3 + src4);
  112.  
  113. mipLevel3[DTid.xy / 4] = PackColor(src1);
  114. StoreColor(GI, src1);
  115. }
  116.  
  117. if (miplevels == 3)
  118. return;
  119.  
  120. GroupMemoryBarrierWithGroupSync();
  121.  
  122. // This bit mask would be 111111 (X & Y multiples of 8), but only one
  123. // thread fits that criteria.
  124. if (GI == 0)
  125. {
  126. float4 src2 = LoadColor(GI + 0x04);
  127. float4 src3 = LoadColor(GI + 0x20);
  128. float4 src4 = LoadColor(GI + 0x24);
  129. src1 = 0.25 * (src1 + src2 + src3 + src4);
  130.  
  131. mipLevel4[DTid.xy / 8] = PackColor(src1);
  132. }
  133. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement