Advertisement
Guest User

Untitled

a guest
May 21st, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.12 KB | None | 0 0
  1. "\x12\x04\x96\x19#pragma clang diagnostic ignored \"-Wmissing-prototypes\"
  2.  
  3. #include <metal_stdlib>
  4. #include <simd/simd.h>
  5.  
  6. using namespace metal;
  7.  
  8. struct SHVector
  9. {
  10.    float v[25];
  11. };
  12.  
  13. struct SHVector_1
  14. {
  15.    float v[25];
  16. };
  17.  
  18. struct SHVectorRGB
  19. {
  20.    SHVector R;
  21.    SHVector G;
  22.    SHVector B;
  23. };
  24.  
  25. struct SHCoeffsAndWeight
  26. {
  27.    SHVectorRGB coeffs;
  28.    float weight;
  29. };
  30.  
  31. struct SHCoeffsAndWeight_1
  32. {
  33.    SHVectorRGB_1 coeffs;
  34.    float weight;
  35. };
  36.  
  37. struct Params
  38. {
  39.    uint gCubeFace;
  40.    uint gFaceSize;
  41.    uint2 gDispatchSize;
  42. };
  43.  
  44. struct SHVectorRGB_1
  45. {
  46.    SHVector_1 R;
  47.    SHVector_1 G;
  48.    SHVector_1 B;
  49. };
  50.  
  51. struct gOutput
  52. {
  53.    SHCoeffsAndWeight xst_gOutput[1];
  54. };
  55.  
  56. constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
  57.  
  58. struct spvDescriptorSetBuffer0
  59. {
  60.    texturecube<float> gInputTex [[id(1)]];
  61.    device gOutput* m_807 [[id(2)]];
  62.    sampler gInputTexSmplr [[id(2)]];
  63.    constant Params* m_531 [[id(3)]];
  64. };
  65.  
  66. void SHZero(thread SHVector_1& v)
  67. {
  68.    for (int i = 0; i < 25; i++)
  69.    {
  70.        v.v[i] = 0.0;
  71.    }
  72. }
  73.  
  74. float3 getDirFromCubeFace(thread const uint& cubeFace, thread const float2& uv)
  75. {
  76.    float3 dir;
  77.    if (cubeFace == 0u)
  78.    {
  79.        dir = float3(1.0, -uv.y, -uv.x);
  80.    }
  81.    else
  82.    {
  83.        if (cubeFace == 1u)
  84.        {
  85.            dir = float3(-1.0, -uv.y, uv.x);
  86.        }
  87.        else
  88.        {
  89.            if (cubeFace == 2u)
  90.            {
  91.                dir = float3(uv.x, 1.0, uv.y);
  92.            }
  93.            else
  94.            {
  95.                if (cubeFace == 3u)
  96.                {
  97.                    dir = float3(uv.x, -1.0, -uv.y);
  98.                }
  99.                else
  100.                {
  101.                    if (cubeFace == 4u)
  102.                    {
  103.                        dir = float3(uv.x, -uv.y, 1.0);
  104.                    }
  105.                    else
  106.                    {
  107.                        dir = float3(-uv.x, -uv.y, -1.0);
  108.                    }
  109.                }
  110.            }
  111.        }
  112.    }
  113.    return dir;
  114. }
  115.  
  116. float integrateProjectedCubeArea(thread const float& u, thread const float& v)
  117. {
  118.    return atan2(u * v, sqrt(((u * u) + (v * v)) + 1.0));
  119. }
  120.  
  121. float texelSolidAngle(thread const float& u, thread const float& v, thread const float& invFaceSize)
  122. {
  123.    float x0 = u - invFaceSize;
  124.    float x1 = u + invFaceSize;
  125.    float y0 = v - invFaceSize;
  126.    float y1 = v + invFaceSize;
  127.    float param = x1;
  128.    float param_1 = y1;
  129.    float param_2 = x0;
  130.    float param_3 = y1;
  131.    float param_4 = x1;
  132.    float param_5 = y0;
  133.    float param_6 = x0;
  134.    float param_7 = y0;
  135.    return ((integrateProjectedCubeArea(param, param_1) - integrateProjectedCubeArea(param_2, param_3)) - integrateProjectedCubeArea(param_4, param_5)) + integrateProjectedCubeArea(param_6, param_7);
  136. }
  137.  
  138. SHVector_1 SHBasis(thread const float3& dir)
  139. {
  140.    float x = dir.x;
  141.    float y = dir.y;
  142.    float z = dir.z;
  143.    float x2 = x * x;
  144.    float y2 = y * y;
  145.    float z2 = z * z;
  146.    float z3 = z2 * z;
  147.    float x4 = x2 * x2;
  148.    float y4 = y2 * y2;
  149.    float z4 = z2 * z2;
  150.    SHVector_1 o;
  151.    o.v[0] = 0.2820949852466583251953125;
  152.    o.v[1] = (-0.48860299587249755859375) * y;
  153.    o.v[2] = 0.48860299587249755859375 * z;
  154.    o.v[3] = (-0.48860299587249755859375) * x;
  155.    o.v[4] = (1.09254801273345947265625 * x) * y;
  156.    o.v[5] = ((-1.09254801273345947265625) * y) * z;
  157.    o.v[6] = 0.3153919875621795654296875 * ((3.0 * z2) - 1.0);
  158.    o.v[7] = ((-1.09254801273345947265625) * x) * z;
  159.    o.v[8] = 0.546274006366729736328125 * (x2 - y2);
  160.    o.v[9] = ((-0.590043008327484130859375) * y) * ((3.0 * x2) - y2);
  161.    o.v[10] = ((2.8906109333038330078125 * y) * x) * z;
  162.    o.v[11] = ((-0.646359980106353759765625) * y) * ((-1.0) + (5.0 * z2));
  163.    o.v[12] = 0.3731760084629058837890625 * ((5.0 * z3) - (3.0 * z));
  164.    o.v[13] = ((-0.4570449888706207275390625) * x) * ((-1.0) + (5.0 * z2));
  165.    o.v[14] = (1.44530594348907470703125 * (x2 - y2)) * z;
  166.    o.v[15] = ((-0.590043008327484130859375) * x) * (x2 - (3.0 * y2));
  167.    o.v[16] = ((2.5033400058746337890625 * x) * y) * (x2 - y2);
  168.    o.v[17] = (((-1.77013003826141357421875) * y) * z) * ((3.0 * x2) - y2);
  169.    o.v[18] = ((0.94617497920989990234375 * y) * x) * ((-1.0) + (7.0 * z2));
  170.    o.v[19] = (((-0.669045984745025634765625) * y) * z) * ((-3.0) + (7.0 * z2));
  171.    o.v[20] = (((105.0 * z4) - (90.0 * z2)) + 9.0) / 28.35926055908203125;
  172.    o.v[21] = (((-0.669045984745025634765625) * x) * z) * ((-3.0) + (7.0 * z2));
  173.    o.v[22] = (0.473087012767791748046875 * (x2 - y2)) * ((-1.0) + (7.0 * z2));
  174.    o.v[23] = (((-1.77013003826141357421875) * x) * z) * (x2 - (3.0 * y2));
  175.    o.v[24] = 0.62583601474761962890625 * ((x4 - ((6.0 * y2) * x2)) + y4);
  176.    return o;
  177. }
  178.  
  179. void SHMultiplyAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs, thread const float& c)
  180. {
  181.    for (int i = 0; i < 25; i++)
  182.    {
  183.        lhs.v[i] += (rhs.v[i] * c);
  184.    }
  185. }
  186.  
  187. void GroupMemoryBarrierWithGroupSync()
  188. {
  189.    threadgroup_barrier(mem_flags::mem_threadgroup);
  190. }
  191.  
  192. void SHAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs)
  193. {
  194.    for (int i = 0; i < 25; i++)
  195.    {
  196.        lhs.v[i] += rhs.v[i];
  197.    }
  198. }
  199.  
  200. kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
  201. {
  202.    threadgroup SHCoeffsAndWeight_1 sCoeffs[64];
  203.    SHCoeffsAndWeight_1 data;
  204.    data.weight = 0.0;
  205.    SHVector_1 param = data.coeffs.R;
  206.    SHZero(param);
  207.    data.coeffs.R = param;
  208.    SHVector_1 param_1 = data.coeffs.G;
  209.    SHZero(param_1);
  210.    data.coeffs.G = param_1;
  211.    SHVector_1 param_2 = data.coeffs.B;
  212.    SHZero(param_2);
  213.    data.coeffs.B = param_2;
  214.    float invFaceSize = 1.0 / float((*spvDescriptorSet0.m_531).gFaceSize);
  215.    uint2 pixelCoords = gl_GlobalInvocationID.xy * uint2(4u);
  216.    uint2 pixelCoordsEnd = pixelCoords + uint2(4u);
  217.    for (uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
  218.    {
  219.        for (uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
  220.        {
  221.            uint _574 = x;
  222.            bool _577 = _574 >= (*spvDescriptorSet0.m_531).gFaceSize;
  223.            bool _585;
  224.            if (!_577)
  225.            {
  226.                _585 = y >= (*spvDescriptorSet0.m_531).gFaceSize;
  227.            }
  228.            else
  229.            {
  230.                _585 = _577;
  231.            }
  232.            if (_585)
  233.            {
  234.                break;
  235.            }
  236.            float u = ((2.0 * (float(x) + 0.5)) * invFaceSize) - 1.0;
  237.            float v = ((2.0 * (float(y) + 0.5)) * invFaceSize) - 1.0;
  238.            uint param_3 = (*spvDescriptorSet0.m_531).gCubeFace;
  239.            float2 param_4 = float2(u, v);
  240.            float3 dir = getDirFromCubeFace(param_3, param_4);
  241.            dir = normalize(dir);
  242.            float param_5 = u;
  243.            float param_6 = v;
  244.            float param_7 = invFaceSize;
  245.            float weight = texelSolidAngle(param_5, param_6, param_7);
  246.            float3 param_8 = dir;
  247.            SHVector_1 shBasis = SHBasis(param_8);
  248.            float3 radiance = spvDescriptorSet0.gInputTex.sample(spvDescriptorSet0.gInputTexSmplr, dir, level(0.0)).xyz;
  249.            SHVector_1 param_9 = data.coeffs.R;
  250.            SHVector_1 param_10 = shBasis;
  251.            float param_11 = radiance.x * weight;
  252.            SHMultiplyAdd(param_9, param_10, param_11);
  253.            data.coeffs.R = param_9;
  254.            SHVector_1 param_12 = data.coeffs.G;
  255.            SHVector_1 param_13 = shBasis;
  256.            float param_14 = radiance.y * weight;
  257.            SHMultiplyAdd(param_12, param_13, param_14);
  258.            data.coeffs.G = param_12;
  259.            SHVector_1 param_15 = data.coeffs.B;
  260.            SHVector_1 param_16 = shBasis;
  261.            float param_17 = radiance.z * weight;
  262.            SHMultiplyAdd(param_15, param_16, param_17);
  263.            data.coeffs.B = param_15;
  264.            data.weight += weight;
  265.        }
  266.    }
  267.    sCoeffs[gl_LocalInvocationIndex] = data;
  268.    GroupMemoryBarrierWithGroupSync();
  269.    int numThreads = 64;
  270.    for (int tc = numThreads / 2; tc > 0; tc = tc >> 1)
  271.    {
  272.        if (gl_LocalInvocationIndex < uint(tc))
  273.        {
  274.            SHVector_1 param_18 = sCoeffs[gl_LocalInvocationIndex].coeffs.R;
  275.            SHVector_1 param_19 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.R;
  276.            SHAdd(param_18, param_19);
  277.            sCoeffs[gl_LocalInvocationIndex].coeffs.R = param_18;
  278.            SHVector_1 param_20 = sCoeffs[gl_LocalInvocationIndex].coeffs.G;
  279.            SHVector_1 param_21 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.G;
  280.            SHAdd(param_20, param_21);
  281.            sCoeffs[gl_LocalInvocationIndex].coeffs.G = param_20;
  282.            SHVector_1 param_22 = sCoeffs[gl_LocalInvocationIndex].coeffs.B;
  283.            SHVector_1 param_23 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.B;
  284.            SHAdd(param_22, param_23);
  285.            sCoeffs[gl_LocalInvocationIndex].coeffs.B = param_22;
  286.            sCoeffs[gl_LocalInvocationIndex].weight += sCoeffs[gl_LocalInvocationIndex + uint(tc)].weight;
  287.        }
  288.        GroupMemoryBarrierWithGroupSync();
  289.    }
  290.    if (gl_LocalInvocationIndex == 0u)
  291.    {
  292.        uint faceOffset = ((*spvDescriptorSet0.m_531).gDispatchSize.x * (*spvDescriptorSet0.m_531).gDispatchSize.y) * (*spvDescriptorSet0.m_531).gCubeFace;
  293.        uint outputIdx = (faceOffset + (gl_WorkGroupID.y * (*spvDescriptorSet0.m_531).gDispatchSize.x)) + gl_WorkGroupID.x;
  294.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[0] = sCoeffs[0].coeffs.R.v[0];
  295.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[1] = sCoeffs[0].coeffs.R.v[1];
  296.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[2] = sCoeffs[0].coeffs.R.v[2];
  297.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[3] = sCoeffs[0].coeffs.R.v[3];
  298.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[4] = sCoeffs[0].coeffs.R.v[4];
  299.        (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[5] = sCoeffs[0].coeffs.R.v[5];
  300.        (*"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement