Advertisement
Guest User

Untitled

a guest
May 21st, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.10 KB | None | 0 0
  1. #pragma clang diagnostic ignored \"-Wmissing-prototypes\"
  2.  
  3. #include <metal_stdlib>
  4. #include <simd/simd.h>
  5.  
  6. using namespace metal;
  7.  
  8. struct SHVector
  9. {
  10.     float v[25];
  11. };
  12.  
  13. struct SHVector_1
  14. {
  15.     float v[25];
  16. };
  17.  
  18. struct SHVectorRGB
  19. {
  20.     SHVector R;
  21.     SHVector G;
  22.     SHVector B;
  23. };
  24.  
  25. struct SHCoeffsAndWeight
  26. {
  27.     SHVectorRGB coeffs;
  28.     float weight;
  29. };
  30.  
  31. struct SHCoeffsAndWeight_1
  32. {
  33.     SHVectorRGB_1 coeffs;
  34.     float weight;
  35. };
  36.  
  37. struct Params
  38. {
  39.     uint gCubeFace;
  40.     uint gFaceSize;
  41.     uint2 gDispatchSize;
  42. };
  43.  
  44. struct SHVectorRGB_1
  45. {
  46.     SHVector_1 R;
  47.     SHVector_1 G;
  48.     SHVector_1 B;
  49. };
  50.  
  51. struct gOutput
  52. {
  53.     SHCoeffsAndWeight xst_gOutput[1];
  54. };
  55.  
  56. constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
  57.  
  58. struct spvDescriptorSetBuffer0
  59. {
  60.     texturecube<float> gInputTex [[id(1)]];
  61.     device gOutput* m_807 [[id(2)]];
  62.     sampler gInputTexSmplr [[id(2)]];
  63.     constant Params* m_531 [[id(3)]];
  64. };
  65.  
  66. void SHZero(thread SHVector_1& v)
  67. {
  68.     for (int i = 0; i < 25; i++)
  69.     {
  70.         v.v[i] = 0.0;
  71.     }
  72. }
  73.  
  74. float3 getDirFromCubeFace(thread const uint& cubeFace, thread const float2& uv)
  75. {
  76.     float3 dir;
  77.     if (cubeFace == 0u)
  78.     {
  79.         dir = float3(1.0, -uv.y, -uv.x);
  80.     }
  81.     else
  82.     {
  83.         if (cubeFace == 1u)
  84.         {
  85.             dir = float3(-1.0, -uv.y, uv.x);
  86.         }
  87.         else
  88.         {
  89.             if (cubeFace == 2u)
  90.             {
  91.                 dir = float3(uv.x, 1.0, uv.y);
  92.             }
  93.             else
  94.             {
  95.                 if (cubeFace == 3u)
  96.                 {
  97.                     dir = float3(uv.x, -1.0, -uv.y);
  98.                 }
  99.                 else
  100.                 {
  101.                     if (cubeFace == 4u)
  102.                     {
  103.                         dir = float3(uv.x, -uv.y, 1.0);
  104.                     }
  105.                     else
  106.                     {
  107.                         dir = float3(-uv.x, -uv.y, -1.0);
  108.                     }
  109.                 }
  110.             }
  111.         }
  112.     }
  113.     return dir;
  114. }
  115.  
  116. float integrateProjectedCubeArea(thread const float& u, thread const float& v)
  117. {
  118.     return atan2(u * v, sqrt(((u * u) + (v * v)) + 1.0));
  119. }
  120.  
  121. float texelSolidAngle(thread const float& u, thread const float& v, thread const float& invFaceSize)
  122. {
  123.     float x0 = u - invFaceSize;
  124.     float x1 = u + invFaceSize;
  125.     float y0 = v - invFaceSize;
  126.     float y1 = v + invFaceSize;
  127.     float param = x1;
  128.     float param_1 = y1;
  129.     float param_2 = x0;
  130.     float param_3 = y1;
  131.     float param_4 = x1;
  132.     float param_5 = y0;
  133.     float param_6 = x0;
  134.     float param_7 = y0;
  135.     return ((integrateProjectedCubeArea(param, param_1) - integrateProjectedCubeArea(param_2, param_3)) - integrateProjectedCubeArea(param_4, param_5)) + integrateProjectedCubeArea(param_6, param_7);
  136. }
  137.  
  138. SHVector_1 SHBasis(thread const float3& dir)
  139. {
  140.     float x = dir.x;
  141.     float y = dir.y;
  142.     float z = dir.z;
  143.     float x2 = x * x;
  144.     float y2 = y * y;
  145.     float z2 = z * z;
  146.     float z3 = z2 * z;
  147.     float x4 = x2 * x2;
  148.     float y4 = y2 * y2;
  149.     float z4 = z2 * z2;
  150.     SHVector_1 o;
  151.     o.v[0] = 0.2820949852466583251953125;
  152.     o.v[1] = (-0.48860299587249755859375) * y;
  153.     o.v[2] = 0.48860299587249755859375 * z;
  154.     o.v[3] = (-0.48860299587249755859375) * x;
  155.     o.v[4] = (1.09254801273345947265625 * x) * y;
  156.     o.v[5] = ((-1.09254801273345947265625) * y) * z;
  157.     o.v[6] = 0.3153919875621795654296875 * ((3.0 * z2) - 1.0);
  158.     o.v[7] = ((-1.09254801273345947265625) * x) * z;
  159.     o.v[8] = 0.546274006366729736328125 * (x2 - y2);
  160.     o.v[9] = ((-0.590043008327484130859375) * y) * ((3.0 * x2) - y2);
  161.     o.v[10] = ((2.8906109333038330078125 * y) * x) * z;
  162.     o.v[11] = ((-0.646359980106353759765625) * y) * ((-1.0) + (5.0 * z2));
  163.     o.v[12] = 0.3731760084629058837890625 * ((5.0 * z3) - (3.0 * z));
  164.     o.v[13] = ((-0.4570449888706207275390625) * x) * ((-1.0) + (5.0 * z2));
  165.     o.v[14] = (1.44530594348907470703125 * (x2 - y2)) * z;
  166.     o.v[15] = ((-0.590043008327484130859375) * x) * (x2 - (3.0 * y2));
  167.     o.v[16] = ((2.5033400058746337890625 * x) * y) * (x2 - y2);
  168.     o.v[17] = (((-1.77013003826141357421875) * y) * z) * ((3.0 * x2) - y2);
  169.     o.v[18] = ((0.94617497920989990234375 * y) * x) * ((-1.0) + (7.0 * z2));
  170.     o.v[19] = (((-0.669045984745025634765625) * y) * z) * ((-3.0) + (7.0 * z2));
  171.     o.v[20] = (((105.0 * z4) - (90.0 * z2)) + 9.0) / 28.35926055908203125;
  172.     o.v[21] = (((-0.669045984745025634765625) * x) * z) * ((-3.0) + (7.0 * z2));
  173.     o.v[22] = (0.473087012767791748046875 * (x2 - y2)) * ((-1.0) + (7.0 * z2));
  174.     o.v[23] = (((-1.77013003826141357421875) * x) * z) * (x2 - (3.0 * y2));
  175.     o.v[24] = 0.62583601474761962890625 * ((x4 - ((6.0 * y2) * x2)) + y4);
  176.     return o;
  177. }
  178.  
  179. void SHMultiplyAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs, thread const float& c)
  180. {
  181.     for (int i = 0; i < 25; i++)
  182.     {
  183.         lhs.v[i] += (rhs.v[i] * c);
  184.     }
  185. }
  186.  
  187. void GroupMemoryBarrierWithGroupSync()
  188. {
  189.     threadgroup_barrier(mem_flags::mem_threadgroup);
  190. }
  191.  
  192. void SHAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs)
  193. {
  194.     for (int i = 0; i < 25; i++)
  195.     {
  196.         lhs.v[i] += rhs.v[i];
  197.     }
  198. }
  199.  
  200. kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
  201. {
  202.     threadgroup SHCoeffsAndWeight_1 sCoeffs[64];
  203.     SHCoeffsAndWeight_1 data;
  204.     data.weight = 0.0;
  205.     SHVector_1 param = data.coeffs.R;
  206.     SHZero(param);
  207.     data.coeffs.R = param;
  208.     SHVector_1 param_1 = data.coeffs.G;
  209.     SHZero(param_1);
  210.     data.coeffs.G = param_1;
  211.     SHVector_1 param_2 = data.coeffs.B;
  212.     SHZero(param_2);
  213.     data.coeffs.B = param_2;
  214.     float invFaceSize = 1.0 / float((*spvDescriptorSet0.m_531).gFaceSize);
  215.     uint2 pixelCoords = gl_GlobalInvocationID.xy * uint2(4u);
  216.     uint2 pixelCoordsEnd = pixelCoords + uint2(4u);
  217.     for (uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
  218.     {
  219.         for (uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
  220.         {
  221.             uint _574 = x;
  222.             bool _577 = _574 >= (*spvDescriptorSet0.m_531).gFaceSize;
  223.             bool _585;
  224.             if (!_577)
  225.             {
  226.                 _585 = y >= (*spvDescriptorSet0.m_531).gFaceSize;
  227.             }
  228.             else
  229.             {
  230.                 _585 = _577;
  231.             }
  232.             if (_585)
  233.             {
  234.                 break;
  235.             }
  236.             float u = ((2.0 * (float(x) + 0.5)) * invFaceSize) - 1.0;
  237.             float v = ((2.0 * (float(y) + 0.5)) * invFaceSize) - 1.0;
  238.             uint param_3 = (*spvDescriptorSet0.m_531).gCubeFace;
  239.             float2 param_4 = float2(u, v);
  240.             float3 dir = getDirFromCubeFace(param_3, param_4);
  241.             dir = normalize(dir);
  242.             float param_5 = u;
  243.             float param_6 = v;
  244.             float param_7 = invFaceSize;
  245.             float weight = texelSolidAngle(param_5, param_6, param_7);
  246.             float3 param_8 = dir;
  247.             SHVector_1 shBasis = SHBasis(param_8);
  248.             float3 radiance = spvDescriptorSet0.gInputTex.sample(spvDescriptorSet0.gInputTexSmplr, dir, level(0.0)).xyz;
  249.             SHVector_1 param_9 = data.coeffs.R;
  250.             SHVector_1 param_10 = shBasis;
  251.             float param_11 = radiance.x * weight;
  252.             SHMultiplyAdd(param_9, param_10, param_11);
  253.             data.coeffs.R = param_9;
  254.             SHVector_1 param_12 = data.coeffs.G;
  255.             SHVector_1 param_13 = shBasis;
  256.             float param_14 = radiance.y * weight;
  257.             SHMultiplyAdd(param_12, param_13, param_14);
  258.             data.coeffs.G = param_12;
  259.             SHVector_1 param_15 = data.coeffs.B;
  260.             SHVector_1 param_16 = shBasis;
  261.             float param_17 = radiance.z * weight;
  262.             SHMultiplyAdd(param_15, param_16, param_17);
  263.             data.coeffs.B = param_15;
  264.             data.weight += weight;
  265.         }
  266.     }
  267.     sCoeffs[gl_LocalInvocationIndex] = data;
  268.     GroupMemoryBarrierWithGroupSync();
  269.     int numThreads = 64;
  270.     for (int tc = numThreads / 2; tc > 0; tc = tc >> 1)
  271.     {
  272.         if (gl_LocalInvocationIndex < uint(tc))
  273.         {
  274.             SHVector_1 param_18 = sCoeffs[gl_LocalInvocationIndex].coeffs.R;
  275.             SHVector_1 param_19 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.R;
  276.             SHAdd(param_18, param_19);
  277.             sCoeffs[gl_LocalInvocationIndex].coeffs.R = param_18;
  278.             SHVector_1 param_20 = sCoeffs[gl_LocalInvocationIndex].coeffs.G;
  279.             SHVector_1 param_21 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.G;
  280.             SHAdd(param_20, param_21);
  281.             sCoeffs[gl_LocalInvocationIndex].coeffs.G = param_20;
  282.             SHVector_1 param_22 = sCoeffs[gl_LocalInvocationIndex].coeffs.B;
  283.             SHVector_1 param_23 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.B;
  284.             SHAdd(param_22, param_23);
  285.             sCoeffs[gl_LocalInvocationIndex].coeffs.B = param_22;
  286.             sCoeffs[gl_LocalInvocationIndex].weight += sCoeffs[gl_LocalInvocationIndex + uint(tc)].weight;
  287.         }
  288.         GroupMemoryBarrierWithGroupSync();
  289.     }
  290.     if (gl_LocalInvocationIndex == 0u)
  291.     {
  292.         uint faceOffset = ((*spvDescriptorSet0.m_531).gDispatchSize.x * (*spvDescriptorSet0.m_531).gDispatchSize.y) * (*spvDescriptorSet0.m_531).gCubeFace;
  293.         uint outputIdx = (faceOffset + (gl_WorkGroupID.y * (*spvDescriptorSet0.m_531).gDispatchSize.x)) + gl_WorkGroupID.x;
  294.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[0] = sCoeffs[0].coeffs.R.v[0];
  295.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[1] = sCoeffs[0].coeffs.R.v[1];
  296.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[2] = sCoeffs[0].coeffs.R.v[2];
  297.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[3] = sCoeffs[0].coeffs.R.v[3];
  298.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[4] = sCoeffs[0].coeffs.R.v[4];
  299.         (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[5] = sCoeffs[0].coeffs.R.v[5];
  300.         (*"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement