Advertisement
Guest User

Untitled

a guest
Dec 9th, 2016
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.16 KB | None | 0 0
  1. #define MAX_KERNEL_WIDTH                    64
  2. #define THREAD_GROUP_WIDTH                  128
  3.  
  4. cbuffer BlurData                            : register(b5)
  5. {
  6.     float2 direction                        : packoffset(c0);
  7.     float width                             : packoffset(c0.z);
  8.     float buffer                            : packoffset(c0.w);
  9. };
  10.  
  11. cbuffer Kernel                              : register(b6)
  12. {
  13.     float4 gKernel[ MAX_KERNEL_WIDTH ];
  14. };
  15.  
  16. // reading from depth texture
  17. Texture2D gInputTexture                     : register(t0);
  18. Texture2D<float4> gNormalTexture            : register(t1);
  19. Texture2D<float4> gDepthTexture             : register(t1);
  20.  
  21. //specify the output to the CPU as a read-write buffer
  22. RWTexture2D<float4> gOutput                 : register(u0);
  23.  
  24. // shared memory that all threads share. sized for worst-case scenario of using the max kernel width
  25. groupshared float4 sharedMem[ 128 + 2 * MAX_KERNEL_WIDTH + 1 ];
  26.  
  27. [numthreads(128, 1, 1)]
  28. void main(uint3 groupThreadID : SV_GroupThreadID, uint3 globalThreadID : SV_DispatchThreadID)
  29. {
  30.     // weird copy, not sure why data won't align
  31.     static float tempKernel[ MAX_KERNEL_WIDTH ] = (float[ MAX_KERNEL_WIDTH ])(gKernel);
  32.  
  33.     // calculate global texel coord of this thread
  34.     int2 gpos = int2(globalThreadID.x * (int2)(direction) + globalThreadID.y * (1 - (int2)(direction)));
  35.     int i = groupThreadID.x;
  36.  
  37.     // calculate where we'll start sampling
  38.     int2 basePos = gpos - (int2)(direction) * width;
  39.  
  40.     float4 temp = gInputTexture.Load(int3(basePos, 0));
  41.  
  42.     // sample first pixel
  43.     sharedMem[i] = temp;
  44.  
  45.     // if this thread is one of the first 2*w threads, load second pixel
  46.     if (i < 2 * (int)(width))
  47.     {
  48.         sharedMem[ i + THREAD_GROUP_WIDTH ] = gInputTexture.Load(int3(basePos + THREAD_GROUP_WIDTH * (int2)(direction), 0));
  49.     }
  50.  
  51.     // wait for all memory writes
  52.     DeviceMemoryBarrier();
  53.  
  54.     // now, apply kernel to section of weights
  55.     float4 sum = float4(0,0,0,0);
  56.     for (int x = 0; x <= width * 2; ++x)
  57.     {
  58.         sum += sharedMem[ i + x ] * tempKernel[ x ].x;
  59.     }
  60.  
  61.     sum.w = 1;
  62.  
  63.     // save result
  64.     gOutput[ gpos ] = sum;
  65. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement