Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma clang diagnostic ignored \"-Wmissing-prototypes\"
- #include <metal_stdlib>
- #include <simd/simd.h>
- using namespace metal;
- struct SHVector
- {
- float v[25];
- };
- struct SHVector_1
- {
- float v[25];
- };
- struct SHVectorRGB
- {
- SHVector R;
- SHVector G;
- SHVector B;
- };
- struct SHCoeffsAndWeight
- {
- SHVectorRGB coeffs;
- float weight;
- };
- struct SHCoeffsAndWeight_1
- {
- SHVectorRGB_1 coeffs;
- float weight;
- };
- struct Params
- {
- uint gCubeFace;
- uint gFaceSize;
- uint2 gDispatchSize;
- };
- struct SHVectorRGB_1
- {
- SHVector_1 R;
- SHVector_1 G;
- SHVector_1 B;
- };
- struct gOutput
- {
- SHCoeffsAndWeight xst_gOutput[1];
- };
- constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 8u, 1u);
- struct spvDescriptorSetBuffer0
- {
- texturecube<float> gInputTex [[id(1)]];
- device gOutput* m_807 [[id(2)]];
- sampler gInputTexSmplr [[id(2)]];
- constant Params* m_531 [[id(3)]];
- };
- void SHZero(thread SHVector_1& v)
- {
- for (int i = 0; i < 25; i++)
- {
- v.v[i] = 0.0;
- }
- }
- float3 getDirFromCubeFace(thread const uint& cubeFace, thread const float2& uv)
- {
- float3 dir;
- if (cubeFace == 0u)
- {
- dir = float3(1.0, -uv.y, -uv.x);
- }
- else
- {
- if (cubeFace == 1u)
- {
- dir = float3(-1.0, -uv.y, uv.x);
- }
- else
- {
- if (cubeFace == 2u)
- {
- dir = float3(uv.x, 1.0, uv.y);
- }
- else
- {
- if (cubeFace == 3u)
- {
- dir = float3(uv.x, -1.0, -uv.y);
- }
- else
- {
- if (cubeFace == 4u)
- {
- dir = float3(uv.x, -uv.y, 1.0);
- }
- else
- {
- dir = float3(-uv.x, -uv.y, -1.0);
- }
- }
- }
- }
- }
- return dir;
- }
- float integrateProjectedCubeArea(thread const float& u, thread const float& v)
- {
- return atan2(u * v, sqrt(((u * u) + (v * v)) + 1.0));
- }
- float texelSolidAngle(thread const float& u, thread const float& v, thread const float& invFaceSize)
- {
- float x0 = u - invFaceSize;
- float x1 = u + invFaceSize;
- float y0 = v - invFaceSize;
- float y1 = v + invFaceSize;
- float param = x1;
- float param_1 = y1;
- float param_2 = x0;
- float param_3 = y1;
- float param_4 = x1;
- float param_5 = y0;
- float param_6 = x0;
- float param_7 = y0;
- return ((integrateProjectedCubeArea(param, param_1) - integrateProjectedCubeArea(param_2, param_3)) - integrateProjectedCubeArea(param_4, param_5)) + integrateProjectedCubeArea(param_6, param_7);
- }
- SHVector_1 SHBasis(thread const float3& dir)
- {
- float x = dir.x;
- float y = dir.y;
- float z = dir.z;
- float x2 = x * x;
- float y2 = y * y;
- float z2 = z * z;
- float z3 = z2 * z;
- float x4 = x2 * x2;
- float y4 = y2 * y2;
- float z4 = z2 * z2;
- SHVector_1 o;
- o.v[0] = 0.2820949852466583251953125;
- o.v[1] = (-0.48860299587249755859375) * y;
- o.v[2] = 0.48860299587249755859375 * z;
- o.v[3] = (-0.48860299587249755859375) * x;
- o.v[4] = (1.09254801273345947265625 * x) * y;
- o.v[5] = ((-1.09254801273345947265625) * y) * z;
- o.v[6] = 0.3153919875621795654296875 * ((3.0 * z2) - 1.0);
- o.v[7] = ((-1.09254801273345947265625) * x) * z;
- o.v[8] = 0.546274006366729736328125 * (x2 - y2);
- o.v[9] = ((-0.590043008327484130859375) * y) * ((3.0 * x2) - y2);
- o.v[10] = ((2.8906109333038330078125 * y) * x) * z;
- o.v[11] = ((-0.646359980106353759765625) * y) * ((-1.0) + (5.0 * z2));
- o.v[12] = 0.3731760084629058837890625 * ((5.0 * z3) - (3.0 * z));
- o.v[13] = ((-0.4570449888706207275390625) * x) * ((-1.0) + (5.0 * z2));
- o.v[14] = (1.44530594348907470703125 * (x2 - y2)) * z;
- o.v[15] = ((-0.590043008327484130859375) * x) * (x2 - (3.0 * y2));
- o.v[16] = ((2.5033400058746337890625 * x) * y) * (x2 - y2);
- o.v[17] = (((-1.77013003826141357421875) * y) * z) * ((3.0 * x2) - y2);
- o.v[18] = ((0.94617497920989990234375 * y) * x) * ((-1.0) + (7.0 * z2));
- o.v[19] = (((-0.669045984745025634765625) * y) * z) * ((-3.0) + (7.0 * z2));
- o.v[20] = (((105.0 * z4) - (90.0 * z2)) + 9.0) / 28.35926055908203125;
- o.v[21] = (((-0.669045984745025634765625) * x) * z) * ((-3.0) + (7.0 * z2));
- o.v[22] = (0.473087012767791748046875 * (x2 - y2)) * ((-1.0) + (7.0 * z2));
- o.v[23] = (((-1.77013003826141357421875) * x) * z) * (x2 - (3.0 * y2));
- o.v[24] = 0.62583601474761962890625 * ((x4 - ((6.0 * y2) * x2)) + y4);
- return o;
- }
- void SHMultiplyAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs, thread const float& c)
- {
- for (int i = 0; i < 25; i++)
- {
- lhs.v[i] += (rhs.v[i] * c);
- }
- }
- void GroupMemoryBarrierWithGroupSync()
- {
- threadgroup_barrier(mem_flags::mem_threadgroup);
- }
- void SHAdd(thread SHVector_1& lhs, thread const SHVector_1& rhs)
- {
- for (int i = 0; i < 25; i++)
- {
- lhs.v[i] += rhs.v[i];
- }
- }
- kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
- {
- threadgroup SHCoeffsAndWeight_1 sCoeffs[64];
- SHCoeffsAndWeight_1 data;
- data.weight = 0.0;
- SHVector_1 param = data.coeffs.R;
- SHZero(param);
- data.coeffs.R = param;
- SHVector_1 param_1 = data.coeffs.G;
- SHZero(param_1);
- data.coeffs.G = param_1;
- SHVector_1 param_2 = data.coeffs.B;
- SHZero(param_2);
- data.coeffs.B = param_2;
- float invFaceSize = 1.0 / float((*spvDescriptorSet0.m_531).gFaceSize);
- uint2 pixelCoords = gl_GlobalInvocationID.xy * uint2(4u);
- uint2 pixelCoordsEnd = pixelCoords + uint2(4u);
- for (uint y = pixelCoords.y; y < pixelCoordsEnd.y; y++)
- {
- for (uint x = pixelCoords.x; x < pixelCoordsEnd.x; x++)
- {
- uint _574 = x;
- bool _577 = _574 >= (*spvDescriptorSet0.m_531).gFaceSize;
- bool _585;
- if (!_577)
- {
- _585 = y >= (*spvDescriptorSet0.m_531).gFaceSize;
- }
- else
- {
- _585 = _577;
- }
- if (_585)
- {
- break;
- }
- float u = ((2.0 * (float(x) + 0.5)) * invFaceSize) - 1.0;
- float v = ((2.0 * (float(y) + 0.5)) * invFaceSize) - 1.0;
- uint param_3 = (*spvDescriptorSet0.m_531).gCubeFace;
- float2 param_4 = float2(u, v);
- float3 dir = getDirFromCubeFace(param_3, param_4);
- dir = normalize(dir);
- float param_5 = u;
- float param_6 = v;
- float param_7 = invFaceSize;
- float weight = texelSolidAngle(param_5, param_6, param_7);
- float3 param_8 = dir;
- SHVector_1 shBasis = SHBasis(param_8);
- float3 radiance = spvDescriptorSet0.gInputTex.sample(spvDescriptorSet0.gInputTexSmplr, dir, level(0.0)).xyz;
- SHVector_1 param_9 = data.coeffs.R;
- SHVector_1 param_10 = shBasis;
- float param_11 = radiance.x * weight;
- SHMultiplyAdd(param_9, param_10, param_11);
- data.coeffs.R = param_9;
- SHVector_1 param_12 = data.coeffs.G;
- SHVector_1 param_13 = shBasis;
- float param_14 = radiance.y * weight;
- SHMultiplyAdd(param_12, param_13, param_14);
- data.coeffs.G = param_12;
- SHVector_1 param_15 = data.coeffs.B;
- SHVector_1 param_16 = shBasis;
- float param_17 = radiance.z * weight;
- SHMultiplyAdd(param_15, param_16, param_17);
- data.coeffs.B = param_15;
- data.weight += weight;
- }
- }
- sCoeffs[gl_LocalInvocationIndex] = data;
- GroupMemoryBarrierWithGroupSync();
- int numThreads = 64;
- for (int tc = numThreads / 2; tc > 0; tc = tc >> 1)
- {
- if (gl_LocalInvocationIndex < uint(tc))
- {
- SHVector_1 param_18 = sCoeffs[gl_LocalInvocationIndex].coeffs.R;
- SHVector_1 param_19 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.R;
- SHAdd(param_18, param_19);
- sCoeffs[gl_LocalInvocationIndex].coeffs.R = param_18;
- SHVector_1 param_20 = sCoeffs[gl_LocalInvocationIndex].coeffs.G;
- SHVector_1 param_21 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.G;
- SHAdd(param_20, param_21);
- sCoeffs[gl_LocalInvocationIndex].coeffs.G = param_20;
- SHVector_1 param_22 = sCoeffs[gl_LocalInvocationIndex].coeffs.B;
- SHVector_1 param_23 = sCoeffs[gl_LocalInvocationIndex + uint(tc)].coeffs.B;
- SHAdd(param_22, param_23);
- sCoeffs[gl_LocalInvocationIndex].coeffs.B = param_22;
- sCoeffs[gl_LocalInvocationIndex].weight += sCoeffs[gl_LocalInvocationIndex + uint(tc)].weight;
- }
- GroupMemoryBarrierWithGroupSync();
- }
- if (gl_LocalInvocationIndex == 0u)
- {
- uint faceOffset = ((*spvDescriptorSet0.m_531).gDispatchSize.x * (*spvDescriptorSet0.m_531).gDispatchSize.y) * (*spvDescriptorSet0.m_531).gCubeFace;
- uint outputIdx = (faceOffset + (gl_WorkGroupID.y * (*spvDescriptorSet0.m_531).gDispatchSize.x)) + gl_WorkGroupID.x;
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[0] = sCoeffs[0].coeffs.R.v[0];
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[1] = sCoeffs[0].coeffs.R.v[1];
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[2] = sCoeffs[0].coeffs.R.v[2];
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[3] = sCoeffs[0].coeffs.R.v[3];
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[4] = sCoeffs[0].coeffs.R.v[4];
- (*spvDescriptorSet0.m_807).xst_gOutput[outputIdx].coeffs.R.v[5] = sCoeffs[0].coeffs.R.v[5];
- (*"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement