Advertisement
Guest User

Untitled

a guest
May 21st, 2019
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // GLSL Compute Shader \"csmain\"
  2. // Generated by XShaderCompiler
  3. // 20/05/2019 12:04:28
  4.  
  5. #version 450
  6.  
  7. layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  8.  
  9. void GroupMemoryBarrierWithGroupSync()
  10. {
  11.     groupMemoryBarrier();
  12.     barrier();
  13. }
  14.  
  15. vec3 getDirFromCubeFace(uint cubeFace, vec2 uv)
  16. {
  17.     vec3 dir;
  18.     if (cubeFace == 0u)
  19.         dir = vec3(1.0f, -uv.y, -uv.x);
  20.     else if (cubeFace == 1u)
  21.         dir = vec3(-1.0f, -uv.y, uv.x);
  22.     else if (cubeFace == 2u)
  23.         dir = vec3(uv.x, 1.0f, uv.y);
  24.     else if (cubeFace == 3u)
  25.         dir = vec3(uv.x, -1.0f, -uv.y);
  26.     else if (cubeFace == 4u)
  27.         dir = vec3(uv.x, -uv.y, 1.0f);
  28.     else
  29.         dir = vec3(-uv.x, -uv.y, -1.0f);
  30.     return dir;
  31. }
  32.  
  33. float integrateProjectedCubeArea(float u, float v)
  34. {
  35.     return atan(u * v, sqrt(u * u + v * v + 1.0f));
  36. }
  37.  
  38. float texelSolidAngle(float u, float v, float invFaceSize)
  39. {
  40.     float x0 = u - invFaceSize;
  41.     float x1 = u + invFaceSize;
  42.     float y0 = v - invFaceSize;
  43.     float y1 = v + invFaceSize;
  44.     return integrateProjectedCubeArea(x1, y1) - integrateProjectedCubeArea(x0, y1) - integrateProjectedCubeArea(x1, y0) + integrateProjectedCubeArea(x0, y0);
  45. }
  46.  
  47. struct SHVector
  48. {
  49.     float v[25];
  50. };
  51.  
  52. struct SHVectorRGB
  53. {
  54.     SHVector R;
  55.     SHVector G;
  56.     SHVector B;
  57. };
  58.  
  59. void SHZero(inout SHVector v)
  60. {
  61.     for (int i = 0; i < 5 * 5; ++i)
  62.         v.v[i] = 0.0f;
  63. }
  64.  
  65. void SHMultiplyAdd(inout SHVector lhs, SHVector rhs, float c)
  66. {
  67.     for (int i = 0; i < 5 * 5; ++i)
  68.         lhs.v[i] += rhs.v[i] * c;
  69. }
  70.  
  71. void SHAdd(inout SHVector lhs, SHVector rhs)
  72. {
  73.     for (int i = 0; i < 5 * 5; ++i)
  74.         lhs.v[i] += rhs.v[i];
  75. }
  76.  
  77. SHVector SHBasis(vec3 dir)
  78. {
  79.     float x = dir.x;
  80.     float y = dir.y;
  81.     float z = dir.z;
  82.     float x2 = x * x;
  83.     float y2 = y * y;
  84.     float z2 = z * z;
  85.     float z3 = z2 * z;
  86.     float x4 = x2 * x2;
  87.     float y4 = y2 * y2;
  88.     float z4 = z2 * z2;
  89.     SHVector o;
  90.     o.v[0] = 0.282095f;
  91.     o.v[1] = -0.488603f * y;
  92.     o.v[2] = 0.488603f * z;
  93.     o.v[3] = -0.488603f * x;
  94.     o.v[4] = 1.092548f * x * y;
  95.     o.v[5] = -1.092548f * y * z;
  96.     o.v[6] = 0.315392f * (3.0f * z2 - 1.0f);
  97.     o.v[7] = -1.092548f * x * z;
  98.     o.v[8] = 0.546274f * (x2 - y2);
  99.     o.v[9] = -0.590043f * y * (3.0f * x2 - y2);
  100.     o.v[10] = 2.890611f * y * x * z;
  101.     o.v[11] = -0.646360f * y * (-1.0f + 5.0f * z2);
  102.     o.v[12] = 0.373176f * (5.0f * z3 - 3.0f * z);
  103.     o.v[13] = -0.457045f * x * (-1.0f + 5.0f * z2);
  104.     o.v[14] = 1.445306f * (x2 - y2) * z;
  105.     o.v[15] = -0.590043f * x * (x2 - 3.0f * y2);
  106.     o.v[16] = 2.503340f * x * y * (x2 - y2);
  107.     o.v[17] = -1.770130f * y * z * (3.0f * x2 - y2);
  108.     o.v[18] = 0.946175f * y * x * (-1.0f + 7.0f * z2);
  109.     o.v[19] = -0.669046f * y * z * (-3.0f + 7.0f * z2);
  110.     o.v[20] = (105.0f * z4 - 90.0f * z2 + 9.0f) / 28.359261f;
  111.     o.v[21] = -0.669046f * x * z * (-3.0f + 7.0f * z2);
  112.     o.v[22] = 0.473087f * (x2 - y2) * (-1.0f + 7.0f * z2);
  113.     o.v[23] = -1.770130f * x * z * (x2 - 3.0f * y2);
  114.     o.v[24] = 0.625836f * (x4 - 6.0f * y2 * x2 + y4);
  115.     return o;
  116. }
  117.  
  118. struct SHCoeffsAndWeight
  119. {
  120.     SHVectorRGB coeffs;
  121.     float       weight;
  122. };
  123.  
  124. layout(binding = 1) uniform samplerCube gInputTex;
  125.  
  126. layout(std430, binding = 2) buffer gOutput
  127. {
  128.     SHCoeffsAndWeight xst_gOutput[];
  129. };
  130.  
  131. layout(std140, binding = 3) uniform Params
  132. {
  133.     uint  gCubeFace;
  134.     uint  gFaceSize;
  135.     uvec2 gDispatchSize;
  136. };
  137.  
  138. shared SHCoeffsAndWeight sCoeffs[64];
  139.  
  140. void main()
  141. {
  142.     SHCoeffsAndWeight data;
  143.     data.weight = 0.0f;
  144.     SHZero(data.coeffs.R);
  145.     SHZero(data.coeffs.G);
  146.     SHZero(data.coeffs.B);
  147.     float invFaceSize = 1.0f / float(gFaceSize);
  148.     uvec2 pixelCoords = gl_GlobalInvocationID.xy * 4u;
  149.     uvec2 pixelCoordsEnd = pixelCoords + uvec2(4, 4);
  150.     for (uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
  151.     {
  152.         for (uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
  153.         {
  154.             if (x >= gFaceSize || y >= gFaceSize)
  155.                 break;
  156.             float u = 2.0f * (float(x) + 0.5f) * invFaceSize - 1.0f;
  157.             float v = 2.0f * (float(y) + 0.5f) * invFaceSize - 1.0f;
  158.             vec3 dir = getDirFromCubeFace(gCubeFace, vec2(u, v));
  159.             dir = normalize(dir);
  160.             float weight = texelSolidAngle(u, v, invFaceSize);
  161.             SHVector shBasis = SHBasis(dir);
  162.             vec3 radiance = textureLod(gInputTex, dir, 0).rgb;
  163.             SHMultiplyAdd(data.coeffs.R, shBasis, radiance.r * weight);
  164.             SHMultiplyAdd(data.coeffs.G, shBasis, radiance.g * weight);
  165.             SHMultiplyAdd(data.coeffs.B, shBasis, radiance.b * weight);
  166.             data.weight += weight;
  167.         }
  168.     }
  169.     sCoeffs[gl_LocalInvocationIndex] = data;
  170.     GroupMemoryBarrierWithGroupSync();
  171.     int numThreads = 8 * 8;
  172.     for (int tc = numThreads / 2; tc > 0; tc >>= 1)
  173.     {
  174.         if (gl_LocalInvocationIndex < uint(tc))
  175.         {
  176.             SHAdd(sCoeffs[gl_LocalInvocationIndex].coeffs.R, sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.R);
  177.             SHAdd(sCoeffs[gl_LocalInvocationIndex].coeffs.G, sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.G);
  178.             SHAdd(sCoeffs[gl_LocalInvocationIndex].coeffs.B, sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.B);
  179.             sCoeffs[gl_LocalInvocationIndex].weight += sCoeffs[gl_LocalInvocationIndex + uint(tc)].weight;
  180.         }
  181.         GroupMemoryBarrierWithGroupSync();
  182.     }
  183.     if (gl_LocalInvocationIndex == 0u)
  184.     {
  185.         uint faceOffset = gDispatchSize.x * gDispatchSize.y * gCubeFace;
  186.         uint outputIdx = faceOffset + gl_WorkGroupID.y * gDispatchSize.x + gl_WorkGroupID.x;
  187.         xst_gOutput[outputIdx] = sCoeffs[0];
  188.     }
  189. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement