Untitled

#version 430 core
layout(local_size_x = 16, local_size_y = 9, local_size_z = 4) in;

struct PointLight{
    vec4 position;
    vec4 color;
    uint enabled;
    float intensity;
    float range;
};

struct LightGrid{
    uint offset;
    uint count;
};

struct VolumeTileAABB{
    vec4 minPoint;
    vec4 maxPoint;
};

layout (std430, binding = 1) buffer clusterAABB{
    VolumeTileAABB cluster[ ];
};

layout (std430, binding = 2) buffer screenToView{
    mat4 inverseProjection;
    uvec4 tileSizes;
    uvec2 screenDimensions;
};

layout (std430, binding = 3) buffer lightSSBO{
    PointLight pointLight[];
};

layout (std430, binding = 4) buffer lightIndexSSBO{
    uint globalLightIndexList[];
};

layout (std430, binding = 5) buffer lightGridSSBO{
    LightGrid lightGrid[];
};

layout (std430, binding = 6) buffer globalIndexCountSSBO{
    uint globalIndexCount;
};

//Shared variables
shared PointLight sharedLights[16*9*4];

uniform mat4 viewMatrix;

bool testSphereAABB(uint light, uint tile);
float sqDistPointAABB(vec3 point, uint tile);

void main(){
    globalIndexCount = 0;
    uint threadCount = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;
    uint lightCount  = pointLight.length();
    uint numBatches = (lightCount + threadCount -1) / threadCount;

    uint tileIndex = gl_LocalInvocationIndex + gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z * gl_WorkGroupID.z;

    uint visibleLightCount = 0;
    uint visibleLightIndices[100];

    for( uint batch = 0; batch < numBatches; ++batch){
        uint lightIndex = batch * threadCount + gl_LocalInvocationIndex;

        //Prevent overflow by clamping to last light which is always null
        lightIndex = min(lightIndex, lightCount);

        //Populating shared light array
        sharedLights[gl_LocalInvocationIndex] = pointLight[lightIndex];
        barrier();

        //Iterating within the current batch of lights
        for( uint light = 0; light < threadCount; ++light){
            if( sharedLights[light].enabled  == 1){
                if( testSphereAABB(light, tileIndex) ){
                    visibleLightIndices[visibleLightCount] = batch * threadCount + light;
                    visibleLightCount += 1;
                }
            }
        }
    }

    //We want all thread groups to have completed the light tests before continuing
    barrier();

    uint offset = atomicAdd(globalIndexCount, visibleLightCount);

    for(uint i = 0; i < visibleLightCount; ++i){
        globalLightIndexList[offset + i] = visibleLightIndices[i];
    }

    lightGrid[tileIndex].offset = offset;
    lightGrid[tileIndex].count = visibleLightCount;
}

bool testSphereAABB(uint light, uint tile){
    float radius = sharedLights[light].range;
    vec3 center  = vec3(viewMatrix * sharedLights[light].position);
    float squaredDistance = sqDistPointAABB(center, tile);

    return squaredDistance <= (radius * radius);
}

float sqDistPointAABB(vec3 point, uint tile){
    float sqDist = 0.0;
    VolumeTileAABB currentCell = cluster[tile];
    cluster[tile].maxPoint[3] = tile;
    for(int i = 0; i < 3; ++i){
        float v = point[i];
        if(v < currentCell.minPoint[i]){
            sqDist += (currentCell.minPoint[i] - v) * (currentCell.minPoint[i] - v);
        }
        if(v > currentCell.maxPoint[i]){
            sqDist += (v - currentCell.maxPoint[i]) * (v - currentCell.maxPoint[i]);
        }
    }

    return sqDist;
}