You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
#jira UE-159321, #jira UE-168348 #rb Dmitriy.Dyomin #preflight 63e60e0b3d24a13d13a69060 [CL 24116479 by Wei Liu in ue5-main branch]
517 lines
20 KiB
Plaintext
517 lines
20 KiB
Plaintext
/*=============================================================================
|
|
LightGridInjection.usf
|
|
=============================================================================*/
|
|
|
|
#include "Common.ush"
|
|
#include "Definitions.usf"
|
|
#include "LargeWorldCoordinates.ush"
|
|
#include "ReflectionEnvironmentShared.ush"
|
|
#include "LightGridCommon.ush"
|
|
|
|
RWStructuredBuffer<uint> RWNumCulledLightsGrid;
|
|
|
|
#if LIGHT_GRID_USES_16BIT_BUFFERS
|
|
RWBuffer<uint> RWCulledLightDataGrid16Bit;
|
|
#define RWCulledLightDataGrid RWCulledLightDataGrid16Bit
|
|
#else
|
|
RWStructuredBuffer<uint> RWCulledLightDataGrid32Bit;
|
|
#define RWCulledLightDataGrid RWCulledLightDataGrid32Bit
|
|
#endif
|
|
|
|
RWStructuredBuffer<uint> RWNextCulledLightLink;
|
|
RWStructuredBuffer<uint> RWStartOffsetGrid;
|
|
RWStructuredBuffer<uint> RWCulledLightLinks;
|
|
|
|
StructuredBuffer<float4> ForwardLocalLightBuffer;
|
|
|
|
uint NumReflectionCaptures;
|
|
uint NumLocalLights;
|
|
uint NumGridCells;
|
|
uint MaxCulledLightsPerCell;
|
|
uint3 CulledGridSize;
|
|
float3 LightGridZParams;
|
|
uint LightGridPixelSizeShift;
|
|
|
|
#if ENABLE_LIGHT_CULLING_VIEW_SPACE_BUILD_DATA
|
|
StructuredBuffer<float4> LightViewSpacePositionAndRadius;
|
|
StructuredBuffer<float4> LightViewSpaceDirAndPreprocAngle;
|
|
#endif // ENABLE_LIGHT_CULLING_VIEW_SPACE_BUILD_DATA
|
|
|
|
#define REFINE_SPOTLIGHT_BOUNDS 1
|
|
|
|
#ifndef USE_LOCALSTORE_FOR_ATOMICS
|
|
#define USE_LOCALSTORE_FOR_ATOMICS 0
|
|
#endif
|
|
|
|
float ComputeCellNearViewDepthFromZSlice(uint ZSlice)
|
|
{
|
|
float SliceDepth = (exp2(ZSlice / LightGridZParams.z) - LightGridZParams.y) / LightGridZParams.x;
|
|
|
|
if (ZSlice == (uint)CulledGridSize.z)
|
|
{
|
|
// Extend the last slice depth max out to world max
|
|
// This allows clamping the depth range to reasonable values,
|
|
// But has the downside that any lights falling into the last depth slice will have very poor culling,
|
|
// Since the view space AABB will be bloated in x and y
|
|
SliceDepth = 2000000.0f;
|
|
}
|
|
|
|
if (ZSlice == 0)
|
|
{
|
|
// The exponential distribution of z slices contains an offset, but some screen pixels
|
|
// may be nearer to the camera than this offset. To avoid false light rejection, we set the
|
|
// first depth slice to zero to ensure that the AABB includes the [0, offset] depth range.
|
|
SliceDepth = 0.0f;
|
|
}
|
|
|
|
return SliceDepth;
|
|
}
|
|
|
|
void ComputeCellViewAABB(uint3 GridCoordinate, out float3 ViewTileMin, out float3 ViewTileMax)
|
|
{
|
|
// Compute extent of tiles in clip-space. Note that the last tile may extend a bit outside of view if view size is not evenly divisible tile size.
|
|
const float2 InvCulledGridSizeF = (1u << LightGridPixelSizeShift) * View.ViewSizeAndInvSize.zw;
|
|
const float2 TileSize = float2(2.0f, -2.0f) * InvCulledGridSizeF.xy;
|
|
const float2 UnitPlaneMin = float2(-1.0f, 1.0f);
|
|
|
|
float2 UnitPlaneTileMin = GridCoordinate.xy * TileSize + UnitPlaneMin;
|
|
float2 UnitPlaneTileMax = (GridCoordinate.xy + 1) * TileSize + UnitPlaneMin;
|
|
|
|
float MinTileZ = ComputeCellNearViewDepthFromZSlice(GridCoordinate.z);
|
|
float MaxTileZ = ComputeCellNearViewDepthFromZSlice(GridCoordinate.z + 1);
|
|
|
|
float MinTileDeviceZ = ConvertToDeviceZ(MinTileZ);
|
|
float4 MinDepthCorner0 = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView);
|
|
float4 MinDepthCorner1 = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView);
|
|
float4 MinDepthCorner2 = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMax.y, MinTileDeviceZ, 1), View.ClipToView);
|
|
float4 MinDepthCorner3 = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMin.y, MinTileDeviceZ, 1), View.ClipToView);
|
|
|
|
float MaxTileDeviceZ = ConvertToDeviceZ(MaxTileZ);
|
|
float4 MaxDepthCorner0 = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView);
|
|
float4 MaxDepthCorner1 = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView);
|
|
float4 MaxDepthCorner2 = mul(float4(UnitPlaneTileMin.x, UnitPlaneTileMax.y, MaxTileDeviceZ, 1), View.ClipToView);
|
|
float4 MaxDepthCorner3 = mul(float4(UnitPlaneTileMax.x, UnitPlaneTileMin.y, MaxTileDeviceZ, 1), View.ClipToView);
|
|
|
|
float2 ViewMinDepthCorner0 = MinDepthCorner0.xy / MinDepthCorner0.w;
|
|
float2 ViewMinDepthCorner1 = MinDepthCorner1.xy / MinDepthCorner1.w;
|
|
float2 ViewMinDepthCorner2 = MinDepthCorner2.xy / MinDepthCorner2.w;
|
|
float2 ViewMinDepthCorner3 = MinDepthCorner3.xy / MinDepthCorner3.w;
|
|
float2 ViewMaxDepthCorner0 = MaxDepthCorner0.xy / MaxDepthCorner0.w;
|
|
float2 ViewMaxDepthCorner1 = MaxDepthCorner1.xy / MaxDepthCorner1.w;
|
|
float2 ViewMaxDepthCorner2 = MaxDepthCorner2.xy / MaxDepthCorner2.w;
|
|
float2 ViewMaxDepthCorner3 = MaxDepthCorner3.xy / MaxDepthCorner3.w;
|
|
|
|
//@todo - derive min and max from quadrant
|
|
ViewTileMin.xy = min(ViewMinDepthCorner0, ViewMinDepthCorner1);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMinDepthCorner2);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMinDepthCorner3);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMaxDepthCorner0);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMaxDepthCorner1);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMaxDepthCorner2);
|
|
ViewTileMin.xy = min(ViewTileMin.xy, ViewMaxDepthCorner3);
|
|
|
|
ViewTileMax.xy = max(ViewMinDepthCorner0, ViewMinDepthCorner1);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMinDepthCorner2);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMinDepthCorner3);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMaxDepthCorner0);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMaxDepthCorner1);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMaxDepthCorner2);
|
|
ViewTileMax.xy = max(ViewTileMax.xy, ViewMaxDepthCorner3);
|
|
|
|
ViewTileMin.z = MinTileZ;
|
|
ViewTileMax.z = MaxTileZ;
|
|
}
|
|
|
|
bool IntersectConeWithSphere(float3 ConeVertex, float3 ConeAxis, float ConeRadius, float2 CosSinAngle, float4 SphereToTest)
|
|
{
|
|
float3 ConeVertexToSphereCenter = SphereToTest.xyz - ConeVertex;
|
|
float ConeVertexToSphereCenterLengthSq = dot(ConeVertexToSphereCenter, ConeVertexToSphereCenter);
|
|
float SphereProjectedOntoConeAxis = dot(ConeVertexToSphereCenter, -ConeAxis);
|
|
float DistanceToClosestPoint = CosSinAngle.x * sqrt(ConeVertexToSphereCenterLengthSq - SphereProjectedOntoConeAxis * SphereProjectedOntoConeAxis) - SphereProjectedOntoConeAxis * CosSinAngle.y;
|
|
|
|
bool bSphereTooFarFromCone = DistanceToClosestPoint > SphereToTest.w;
|
|
bool bSpherePastConeEnd = SphereProjectedOntoConeAxis > SphereToTest.w + ConeRadius;
|
|
bool bSphereBehindVertex = SphereProjectedOntoConeAxis < -SphereToTest.w;
|
|
return !(bSphereTooFarFromCone || bSpherePastConeEnd || bSphereBehindVertex);
|
|
}
|
|
|
|
|
|
|
|
bool AabbOutsidePlane(float3 center, float3 extents, float4 plane)
|
|
{
|
|
float dist = dot(float4(center, 1.0), plane);
|
|
float radius = dot(extents, abs(plane.xyz));
|
|
|
|
return dist > radius;
|
|
}
|
|
|
|
|
|
/**
|
|
* Approximate cone / aabb test that creates a single separating plane that lies in the cone on the side facing the centre of the Aabb
|
|
* Returns false if the Aabb is outside (entirely on the positive side) of this plane.
|
|
* Returns true otherwise, only works for 'acute angled' cones, where the angle is < 90 degrees.
|
|
* Is approximate, in that it can yield false negatives, i.e., that an Aabb may be actually outside, but the test still returns false.
|
|
* Since the intended use is to cull light cones, this is acceptable whereas false positives would cause glitches.
|
|
*/
|
|
bool IsAabbOutsideInfiniteAcuteConeApprox(float3 ConeVertex, float3 ConeAxis, float TanConeAngle, float3 AabbCentre, float3 AabbExt)
|
|
{
|
|
// 1. find plane (well, base) in which normal lies, and which is perpendicular to axis and centre of aabb.
|
|
float3 D = AabbCentre - ConeVertex;
|
|
|
|
// perpendicular to cone axis in plane of cone axis and aabb centre.
|
|
float3 M = -normalize(cross(cross(D, ConeAxis), ConeAxis));
|
|
float3 N = -TanConeAngle * ConeAxis + M;
|
|
float4 Plane = float4(N, 0.0);
|
|
|
|
return AabbOutsidePlane(D, AabbExt, Plane);
|
|
}
|
|
|
|
#if SHADER_LIGHT_GRID_INJECTION_CS
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void LightGridInjectionCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID)
|
|
{
|
|
uint3 GridCoordinate = DispatchThreadId;
|
|
|
|
if (all(GridCoordinate < (uint3)CulledGridSize))
|
|
{
|
|
uint GridIndex = (GridCoordinate.z * CulledGridSize.y + GridCoordinate.y) * CulledGridSize.x + GridCoordinate.x;
|
|
|
|
// Disable to pass all lights through for debugging, will hit limits quickly though
|
|
#define CULL_LIGHTS 1
|
|
#if CULL_LIGHTS
|
|
float3 ViewTileMin;
|
|
float3 ViewTileMax;
|
|
ComputeCellViewAABB(GridCoordinate, ViewTileMin, ViewTileMax);
|
|
|
|
float3 ViewTileCenter = .5f * (ViewTileMin + ViewTileMax);
|
|
float3 ViewTileExtent = ViewTileMax - ViewTileCenter;
|
|
float3 WorldTileCenter = mul(float4(ViewTileCenter, 1), View.ViewToTranslatedWorld).xyz - LWCHackToFloat(PrimaryView.PreViewTranslation);
|
|
float4 WorldTileBoundingSphere = float4(WorldTileCenter, length(ViewTileExtent));
|
|
|
|
uint NumAvailableLinks = NumGridCells * MaxCulledLightsPerCell * NUM_CULLED_GRID_PRIMITIVE_TYPES;
|
|
|
|
LOOP
|
|
for (uint LocalLightIndex = 0; LocalLightIndex < NumLocalLights; LocalLightIndex++)
|
|
{
|
|
uint LocalLightBaseIndex = LocalLightIndex * LOCAL_LIGHT_DATA_STRIDE;
|
|
|
|
#if ENABLE_LIGHT_CULLING_VIEW_SPACE_BUILD_DATA
|
|
float4 LightPositionAndRadius = LightViewSpacePositionAndRadius[LocalLightIndex];
|
|
float3 ViewSpaceLightPosition = LightPositionAndRadius.xyz;
|
|
float LightRadius = LightPositionAndRadius.w;
|
|
#else // !ENABLE_LIGHT_CULLING_VIEW_SPACE_BUILD_DATA
|
|
float4 LightPositionAndInvRadius = ForwardLocalLightBuffer[LocalLightBaseIndex + 0];
|
|
float LightRadius = 1.0f / LightPositionAndInvRadius.w;
|
|
|
|
float3 ViewSpaceLightPosition = mul(float4(LightPositionAndInvRadius.xyz, 1), View.TranslatedWorldToView).xyz;
|
|
#endif // ENABLE_LIGHT_CULLING_VIEW_SPACE_BUILD_DATA
|
|
|
|
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(ViewTileCenter, ViewTileExtent, ViewSpaceLightPosition);
|
|
|
|
if (BoxDistanceSq < LightRadius * LightRadius)
|
|
{
|
|
#if REFINE_SPOTLIGHT_BOUNDS
|
|
bool bPassSpotlightTest = true;
|
|
{
|
|
float4 ViewSpaceDirAndPreprocAngle = LightViewSpaceDirAndPreprocAngle[LocalLightIndex];
|
|
float TanConeAngle = ViewSpaceDirAndPreprocAngle.w;
|
|
|
|
// Set to 0 for non-acute cones, or non-spot lights.
|
|
if (TanConeAngle > 0.0f)
|
|
{
|
|
float3 ViewSpaceLightDirection = -ViewSpaceDirAndPreprocAngle.xyz;
|
|
bPassSpotlightTest = !IsAabbOutsideInfiniteAcuteConeApprox(ViewSpaceLightPosition, ViewSpaceLightDirection, TanConeAngle, ViewTileCenter, ViewTileExtent);
|
|
}
|
|
}
|
|
if (bPassSpotlightTest)
|
|
#endif // REFINE_SPOTLIGHT_BOUNDS
|
|
{
|
|
|
|
#if USE_LINKED_CULL_LIST
|
|
uint NextLink;
|
|
InterlockedAdd(RWNextCulledLightLink[0], 1U, NextLink);
|
|
|
|
if (NextLink < NumAvailableLinks)
|
|
{
|
|
uint PreviousLink;
|
|
InterlockedExchange(RWStartOffsetGrid[GridIndex], NextLink, PreviousLink);
|
|
RWCulledLightLinks[NextLink * LIGHT_LINK_STRIDE + 0] = LocalLightIndex;
|
|
RWCulledLightLinks[NextLink * LIGHT_LINK_STRIDE + 1] = PreviousLink;
|
|
}
|
|
|
|
#else
|
|
uint CulledLightIndex;
|
|
InterlockedAdd(RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 0], 1U, CulledLightIndex);
|
|
RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 1] = GridIndex * MaxCulledLightsPerCell;
|
|
|
|
if (CulledLightIndex < MaxCulledLightsPerCell)
|
|
{
|
|
RWCulledLightDataGrid[GridIndex * MaxCulledLightsPerCell + CulledLightIndex] = LocalLightIndex;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
LOOP
|
|
for (uint ReflectionCaptureIndex = 0; ReflectionCaptureIndex < NumReflectionCaptures; ReflectionCaptureIndex++)
|
|
{
|
|
FLWCVector3 CaptureWorldPosition = MakeLWCVector3(GetReflectionTilePosition(ReflectionCaptureIndex).xyz, GetReflectionPositionAndRadius(ReflectionCaptureIndex).xyz);
|
|
float3 CaptureTranslatedWorldPosition = LWCToFloat(LWCAdd(CaptureWorldPosition, PrimaryView.PreViewTranslation));
|
|
float3 ViewSpaceCapturePosition = mul(float4(CaptureTranslatedWorldPosition, 1), View.TranslatedWorldToView).xyz;
|
|
float CaptureRadius = GetReflectionPositionAndRadius(ReflectionCaptureIndex).w;
|
|
|
|
float BoxDistanceSq = ComputeSquaredDistanceFromBoxToPoint(ViewTileCenter, ViewTileExtent, ViewSpaceCapturePosition);
|
|
|
|
if (BoxDistanceSq < CaptureRadius * CaptureRadius)
|
|
{
|
|
#if USE_LINKED_CULL_LIST
|
|
uint NextLink;
|
|
InterlockedAdd(RWNextCulledLightLink[0], 1U, NextLink);
|
|
|
|
if (NextLink < NumAvailableLinks)
|
|
{
|
|
uint PreviousLink;
|
|
InterlockedExchange(RWStartOffsetGrid[NumGridCells + GridIndex], NextLink, PreviousLink);
|
|
RWCulledLightLinks[NextLink * LIGHT_LINK_STRIDE + 0] = ReflectionCaptureIndex;
|
|
RWCulledLightLinks[NextLink * LIGHT_LINK_STRIDE + 1] = PreviousLink;
|
|
}
|
|
|
|
#else
|
|
uint CulledCaptureIndex;
|
|
InterlockedAdd(RWNumCulledLightsGrid[(NumGridCells + GridIndex) * NUM_CULLED_LIGHTS_GRID_STRIDE + 0], 1U, CulledCaptureIndex);
|
|
RWNumCulledLightsGrid[(NumGridCells + GridIndex) * NUM_CULLED_LIGHTS_GRID_STRIDE + 1] = (NumGridCells + GridIndex) * MaxCulledLightsPerCell;
|
|
|
|
if (CulledCaptureIndex < MaxCulledLightsPerCell)
|
|
{
|
|
RWCulledLightDataGrid[(NumGridCells + GridIndex) * MaxCulledLightsPerCell + CulledCaptureIndex] = ReflectionCaptureIndex;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
|
|
LOOP
|
|
for (uint LocalLightIndex = 0; LocalLightIndex < NumLocalLights; LocalLightIndex++)
|
|
{
|
|
if (LocalLightIndex < MaxCulledLightsPerCell)
|
|
{
|
|
RWCulledLightDataGrid[GridIndex * MaxCulledLightsPerCell + LocalLightIndex] = LocalLightIndex;
|
|
}
|
|
}
|
|
|
|
RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 0] = NumLocalLights;
|
|
RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 1] = GridIndex * MaxCulledLightsPerCell;
|
|
|
|
LOOP
|
|
for (uint ReflectionCaptureIndex = 0; ReflectionCaptureIndex < NumReflectionCaptures; ReflectionCaptureIndex++)
|
|
{
|
|
if (ReflectionCaptureIndex < MaxCulledLightsPerCell)
|
|
{
|
|
RWCulledLightDataGrid[(NumGridCells + GridIndex) * MaxCulledLightsPerCell + ReflectionCaptureIndex] = ReflectionCaptureIndex;
|
|
}
|
|
}
|
|
|
|
RWNumCulledLightsGrid[(NumGridCells + GridIndex) * NUM_CULLED_LIGHTS_GRID_STRIDE + 0] = NumReflectionCaptures;
|
|
RWNumCulledLightsGrid[(NumGridCells + GridIndex) * NUM_CULLED_LIGHTS_GRID_STRIDE + 1] = (NumGridCells + GridIndex) * MaxCulledLightsPerCell;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#endif // SHADER_LIGHT_GRID_INJECTION_CS
|
|
|
|
|
|
|
|
#if SHADER_LIGHT_GRID_COMPACT_CS
|
|
|
|
RWStructuredBuffer<uint> RWNextCulledLightData;
|
|
StructuredBuffer<uint> StartOffsetGrid;
|
|
StructuredBuffer<uint> CulledLightLinks;
|
|
|
|
#if USE_LOCALSTORE_FOR_ATOMICS
|
|
groupshared uint LDSNextCulledLightData;
|
|
groupshared uint LDSBaseStart;
|
|
#endif
|
|
|
|
|
|
void CompactReverseLinkedList(uint GridIndex, uint SceneMax, bool FirstThread, bool bThreadValid)
|
|
{
|
|
uint NumCulledLights = 0;
|
|
uint StartLinkOffset = 0;
|
|
uint LinkOffset = 0;
|
|
uint CulledLightDataStart = 0;
|
|
|
|
if(bThreadValid)
|
|
{
|
|
StartLinkOffset = StartOffsetGrid[GridIndex];
|
|
LinkOffset = StartLinkOffset;
|
|
|
|
// Traverse the linked list to count how many culled indices we have
|
|
while (LinkOffset != 0xFFFFFFFF && NumCulledLights < SceneMax)
|
|
{
|
|
NumCulledLights++;
|
|
LinkOffset = CulledLightLinks[LinkOffset * LIGHT_LINK_STRIDE + 1];
|
|
}
|
|
}
|
|
|
|
|
|
#if USE_LOCALSTORE_FOR_ATOMICS
|
|
|
|
LDSNextCulledLightData = 0;
|
|
LDSBaseStart = 0;
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Firstly do an interlocked add with LDS
|
|
InterlockedAdd(LDSNextCulledLightData, NumCulledLights, CulledLightDataStart);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Then the first thread can do an interlocked add with the buffer
|
|
if(FirstThread)
|
|
{
|
|
InterlockedAdd(RWNextCulledLightData[0], LDSNextCulledLightData, LDSBaseStart);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
CulledLightDataStart += LDSBaseStart;
|
|
#else
|
|
InterlockedAdd(RWNextCulledLightData[0], NumCulledLights, CulledLightDataStart);
|
|
#endif
|
|
|
|
if( bThreadValid)
|
|
{
|
|
RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 0] = NumCulledLights;
|
|
RWNumCulledLightsGrid[GridIndex * NUM_CULLED_LIGHTS_GRID_STRIDE + 1] = CulledLightDataStart;
|
|
|
|
LinkOffset = StartLinkOffset;
|
|
uint CulledLightIndex = 0;
|
|
|
|
while (LinkOffset != 0xFFFFFFFF && CulledLightIndex < NumCulledLights)
|
|
{
|
|
// Reverse the order as we write them out, which restores the original order before the reverse linked list was built
|
|
// Reflection captures are order dependent
|
|
RWCulledLightDataGrid[CulledLightDataStart + NumCulledLights - CulledLightIndex - 1] = CulledLightLinks[LinkOffset * LIGHT_LINK_STRIDE + 0];
|
|
CulledLightIndex++;
|
|
LinkOffset = CulledLightLinks[LinkOffset * LIGHT_LINK_STRIDE + 1];
|
|
}
|
|
}
|
|
}
|
|
|
|
[numthreads(THREADGROUP_SIZE, THREADGROUP_SIZE, THREADGROUP_SIZE)]
|
|
void LightGridCompactCS(
|
|
uint3 GroupId : SV_GroupID,
|
|
uint3 DispatchThreadId : SV_DispatchThreadID,
|
|
uint3 GroupThreadId : SV_GroupThreadID,
|
|
uint GroupIndex : SV_GroupIndex)
|
|
{
|
|
uint3 GridCoordinate = DispatchThreadId;
|
|
|
|
bool bThreadValid = all(GridCoordinate < CulledGridSize);
|
|
bool bFirstThread = GroupIndex==0;
|
|
|
|
uint GridIndex = (GridCoordinate.z * CulledGridSize.y + GridCoordinate.y) * CulledGridSize.x + GridCoordinate.x;
|
|
|
|
// Compact lights
|
|
CompactReverseLinkedList(GridIndex, NumLocalLights, bFirstThread, bThreadValid);
|
|
|
|
// Compact reflection captures
|
|
CompactReverseLinkedList(NumGridCells + GridIndex, NumReflectionCaptures, bFirstThread, bThreadValid);
|
|
}
|
|
|
|
#endif // SHADER_LIGHT_GRID_COMPACT_CS
|
|
|
|
|
|
|
|
#ifdef SHADER_DEBUG_LIGHT_GRID_PS
|
|
|
|
#define SUPPORT_CONTACT_SHADOWS 0
|
|
#include "/Engine/Private/ShaderPrint.ush"
|
|
#include "ColorMap.ush"
|
|
|
|
Texture2D<float4> DepthTexture;
|
|
uint DebugMode;
|
|
|
|
void DebugLightGridPS(
|
|
in float4 SVPos : SV_POSITION,
|
|
out float4 OutLuminanceTransmittance : SV_Target0)
|
|
{
|
|
OutLuminanceTransmittance = float4(0, 0, 0, 1);
|
|
|
|
ResolvedView = ResolveView();
|
|
|
|
const uint EyeIndex = 0;
|
|
const uint2 PixelPos = SVPos.xy;
|
|
const uint2 LocalPixelPos = (SVPos.xy - ResolvedView.ViewRectMin.xy);
|
|
|
|
// Using View.ViewResolutionFraction to maintain a good look under any dynamic resolution factor.
|
|
const uint2 PixelPosDynRes = float2(PixelPos) * View.ViewResolutionFraction;
|
|
const uint2 LocalPixelPosDynRes = float2(LocalPixelPos) * View.ViewResolutionFraction;
|
|
const uint2 LocalPixelPosDynRes2 = float2(LocalPixelPos+1) * View.ViewResolutionFraction;
|
|
|
|
if (any(LocalPixelPosDynRes >= uint2(ResolvedView.ViewSizeAndInvSize.xy)))
|
|
{
|
|
return;
|
|
}
|
|
|
|
const FLightGridData GridData = GetLightGridData(EyeIndex);
|
|
uint DebugNumLocalLights = 0;
|
|
|
|
uint2 StartPos = uint2(50, 100);
|
|
FShaderPrintContext Context = InitShaderPrintContext(all(PixelPos == StartPos), StartPos);
|
|
bool SpecifyDebugSlice = AddCheckbox(Context, TEXT("Specify Slice to debug"), false, GetDefaultFontColor());
|
|
Newline(Context);
|
|
float DebugSlice = 0;
|
|
FFontColor EnableSliceDebugFont = FontDarkGrey; if (SpecifyDebugSlice) { EnableSliceDebugFont = FontWhite; }
|
|
DebugSlice = AddSlider(Context, TEXT("Slice to debug"), 0.0f, EnableSliceDebugFont, 0.0f, GridData.CulledGridSize.z-1);
|
|
Newline(Context);
|
|
|
|
if (DebugMode == 1 && !SpecifyDebugSlice)
|
|
{
|
|
const float SceneDepth = ConvertFromDeviceZ(DepthTexture.Load(int3(PixelPosDynRes, 0)).r);
|
|
|
|
const uint GridIndex = ComputeLightGridCellIndex(LocalPixelPosDynRes, SceneDepth, EyeIndex);
|
|
const FCulledLightsGridData CulledLightsGrid = GetCulledLightsGrid(GridIndex, EyeIndex);
|
|
|
|
DebugNumLocalLights = CulledLightsGrid.NumLocalLights;
|
|
}
|
|
else
|
|
{
|
|
// We do not want to fetch light data from the last slice since in this case the culling will be pretty bad (super large depth turned into a bounding sphere...)
|
|
// See ComputeCellNearViewDepthFromZSlice.
|
|
const int StartGridIndexZ = SpecifyDebugSlice ? uint(DebugSlice) : 0;
|
|
const int EndGridIndexZ = SpecifyDebugSlice ? StartGridIndexZ+1: (DebugMode == 2 ? GridData.CulledGridSize.z - 1 : GridData.CulledGridSize.z);
|
|
LOOP
|
|
for (int SliceIt = StartGridIndexZ; SliceIt < EndGridIndexZ; SliceIt++)
|
|
{
|
|
const uint GridIndex = ComputeLightGridCellIndex(uint3(LocalPixelPosDynRes >> GridData.LightGridPixelSizeShift, SliceIt), EyeIndex);
|
|
const FCulledLightsGridData CulledLightsGrid = GetCulledLightsGrid(GridIndex, EyeIndex);
|
|
|
|
DebugNumLocalLights = max(DebugNumLocalLights, CulledLightsGrid.NumLocalLights);
|
|
}
|
|
}
|
|
|
|
const uint2 TileTopLeftPixelCoord = (LocalPixelPosDynRes >> GridData.LightGridPixelSizeShift) << GridData.LightGridPixelSizeShift;
|
|
const uint2 TileTopLeftPixelCoord2= (LocalPixelPosDynRes2 >> GridData.LightGridPixelSizeShift) << GridData.LightGridPixelSizeShift;
|
|
const uint TileSize = (0x1u << GridData.LightGridPixelSizeShift) - 1;
|
|
|
|
if (DebugNumLocalLights > 0)
|
|
{
|
|
OutLuminanceTransmittance.a = 0.0f;
|
|
OutLuminanceTransmittance.rgb = GetHSVDebugColor(float(DebugNumLocalLights) / 8.0f);
|
|
PrintSmallUint(PixelPos, OutLuminanceTransmittance.rgb, float3(0.1, 0.1, 0.1), (TileTopLeftPixelCoord + TileSize/2) / View.ViewResolutionFraction, float(DebugNumLocalLights));
|
|
}
|
|
|
|
if ((TileTopLeftPixelCoord.x <= LocalPixelPosDynRes.x && TileTopLeftPixelCoord2.x >= LocalPixelPosDynRes.x)
|
|
|| (TileTopLeftPixelCoord.y <= LocalPixelPosDynRes.y && TileTopLeftPixelCoord2.y >= LocalPixelPosDynRes.y))
|
|
{
|
|
OutLuminanceTransmittance.rgba = float4(0.25, 0.25, 0.25, 0.0);
|
|
}
|
|
}
|
|
|
|
#endif // SHADER_DEBUG_LIGHT_GRID_PS
|
|
|