Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cbuffer cbPerFrame : register (b0)
- {
- float4 cb0_v0; // xy - downscaled buffer size, zw - start/end params
- }
- RWStructuredBuffer<uint> g_buffer : register (u0);
- RWTexture2D<float> g_avgLuminance : register (u1);
- groupshared uint shared_data[256];
- // There is one thread group has 64 threads and shared storage (shared_data)
- [numthreads(64, 1, 1)]
- void TestCS(uint3 GTid : SV_GroupThreadID)
- {
- // GroupThreadID: Number of currently processed thread within a single thread group.
- // In this case values of threadID range [0-63]
- const uint threadID = GTid.x;
- // The first step is to set whole shared data with data from previous stage.
- // Because each thread group has 64 threads, each one can fill 4 elements in one thread
- // using a simple offset.
- [unroll] for (uint idx=0; idx < 4; idx++)
- {
- const uint offset = threadID + idx*64;
- shared_data[ offset ] = g_buffer[offset];
- }
- // We set a barrier here, which means we block execution of all threads in a group until all group
- // shared accesses have been completed and all threads in the group have reached this call.
- GroupMemoryBarrierWithGroupSync();
- // Perform calculations only with the thread with '0' index.
- [branch] if (threadID == 0)
- {
- // Total number of pixels in downscaled buffer
- uint fTotalPixels = cb0_v0.x * cb0_v0.y;
- // Start number of pixels we want to incorporate in average luminance calculation
- int pixelsToConsiderStart = fTotalPixels * cb0_v0.z;
- int pixelsToConsiderEnd = fTotalPixels * cb0_v0.w;
- int pixelsMinusOne = fTotalPixels - 1;
- pixelsToConsiderStart = clamp( pixelsToConsiderStart, 0, pixelsMinusOne );
- pixelsToConsiderEnd = clamp( pixelsToConsiderEnd, pixelsToConsiderStart, pixelsMinusOne );
- // Number of already processed pixels
- int numProcessedPixels = 0;
- // Luma cell [0-255]
- int lumaValue = 0;
- // Whether to continue execution of loop
- bool bExitLoop = false;
- // The purpose of the first loop is to omit "pixelsToConsiderStart" pixels.
- // We keep number of omitted pixels from previous cells and lumaValue to use in the next loop.
- //
- // For example:
- // * pixelsToConsiderStart = 33000
- // * In the first pass of loop, shared_data[0] has 37000 pixels, which exits the loop
- // The output from the loop are:
- // numProcessedPixels = 0 (33000 pixels should have been ommitted, but 4000 pixels must be taken into consideration)
- // lumaValue = 0 (because 4000 pixels left in shared_data[0], we will start calculating total luminance from
- // exactly that point)
- [loop]
- while (!bExitLoop)
- {
- // Get number of pixels with specific luma value.
- uint numPixels = shared_data[lumaValue];
- // Check how many pixels we would have with lumaValue
- int tempSum = numProcessedPixels + numPixels;
- // If more than pixelsToConsiderStart, exit the loop.
- // Therefore, we will start calculating luminance from lumaValue.
- // Simply speaking, pixelsToConsiderStart is number of "darken" pixels to omit before starting calculation.
- [flatten]
- if (tempSum > pixelsToConsiderStart)
- {
- bExitLoop = true;
- }
- else
- {
- numProcessedPixels = tempSum;
- lumaValue++;
- }
- }
- float finalAvgLuminance = 0.0f;
- // Number of omitted pixels in the first loop
- uint numProcessedPixelStart = numProcessedPixels;
- // The purpose of this loop is to calculate contribution of pixels and average luminance.
- // We start from point calculated in the previous loop, keeping number of omitted pixels and starting lumaValue positon.
- // We decode luma value from [0-255] range, multiply it by number of pixels which have this specific luma, and sum it up until
- // we process pixelsToConsiderEnd pixels.
- // After that, we divide total contribution by number of analyzed pixels.
- bExitLoop = false;
- [loop]
- while (!bExitLoop)
- {
- // Get number of pixels with specific luma value.
- uint numPixels = shared_data[lumaValue];
- // Add to all processed pixels
- numProcessedPixels += numPixels;
- // Currently processed luma, distributed in [0-255] range (uint)
- uint encodedLumaUint = lumaValue;
- // Number of pixels with currently processed luma
- float numberOfPixelsWithCurrentLuma = numPixels;
- // Currently processed, encoded [0-255] luma (float)
- float encodedLumaFloat = encodedLumaUint;
- // Reconstruct encodedLumaFloat by inversing encoding process from the first (distribution) pass,
- // which was:
- //
- // float luma = dot(hdrPixelColor, LUMA_RGB);
- // float outLuma;
- //
- // outLuma = luma + 1.0; // because log(0) is undef and log(1) = 0
- // outLuma = log( outLuma ) // logarithmically distribute
- // outLuma = outLuma * 128 // scale by 128, which means log(1) * 128 = 0, log(2,71828) * 128 = 128,
- // log(7,38905) * 128 = 256
- // we start by adding half (we don't want to have zero)
- float fDecodedLuma = encodedLumaFloat + 0.5;
- // and decode luminance
- fDecodedLuma /= 128.0; // Divide by 128
- fDecodedLuma = exp(fDecodedLuma); // exp(x) which cancels log(x)
- fDecodedLuma -= 1.0; // Subtract 1.0
- // Calculate contribution of this luma
- float fCurrentLumaContribution = numberOfPixelsWithCurrentLuma * fDecodedLuma;
- // (Temporary) contribution from all previous passes and current one.
- float tempTotalContribution = fCurrentLumaContribution + finalAvgLuminance;
- [flatten]
- if (numProcessedPixels > pixelsToConsiderEnd )
- {
- // to exit the loop
- bExitLoop = true;
- // We already processed all pixels we wanted, so perform final division here.
- // Number of all processed pixels from user-selected start
- int diff = numProcessedPixels - numProcessedPixelStart;
- // Calculate final average luminance
- finalAvgLuminance = tempTotalContribution / float(diff);
- }
- else
- {
- // Pass current contribution further and increase lumaValue
- finalAvgLuminance = tempTotalContribution;
- lumaValue++;
- }
- }
- // Save average luminance
- g_avgLuminance[uint2(0,0)] = finalAvgLuminance;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement