Files
UnrealEngineUWP/Engine/Shaders/Private/TiledDeferredLightShaders.usf
Guillaume Abadie 43ccc8111b Nukes USE_PREEXPOSURE
#rb uriel.doyon, jack.porter

[CL 16070774 by Guillaume Abadie in ue5-main branch]
2021-04-21 04:53:11 -04:00

371 lines
16 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
TiledDeferredLightShaders.usf: Implementation of tiled deferred shading
=============================================================================*/
#define SUPPORT_CONTACT_SHADOWS 0
#define USE_SOURCE_TEXTURE 0
#include "Common.ush"
#include "DeferredLightingCommon.ush"
/**
* Hoist shading model selection out to the main function, then repeat light loop with fixed light model and let early outs deal with it.
* This should let the compiler defer loading values from the GBuffer until they are needed and thus reduce the catastrophic
* resister pressure.
* If the compiler refuses, then move the GBuffer data load into the per-shading model branches also.
* Also use an outermost early out check to skip the whole pass.
*/
#define USE_PASS_PER_SHADING_MODEL 1
#define THREADGROUP_TOTALSIZE (THREADGROUP_SIZEX * THREADGROUP_SIZEY)
/** View rect min in xy, max in zw. */
uint4 ViewDimensions;
uint NumLights;
// Workaround performance issue with shared memory bank collisions in GLSL
#if SWITCH_PROFILE || SWITCH_PROFILE_FORWARD
#define ATOMIC_REDUCTION 0
#else
#define ATOMIC_REDUCTION 1
#endif
#if !ATOMIC_REDUCTION
groupshared float TileZ[THREADGROUP_TOTALSIZE];
#endif
/** Min and Max depth for this tile. */
groupshared uint IntegerTileMinZ;
groupshared uint IntegerTileMaxZ;
/** Number of lights affecting the tile, after culling. */
groupshared uint TileNumLights;
/**
* Indices of lights that survived culling for the tile.
* The order of lights will be non-deterministic, so the final result must not be dependent on this order.
*/
groupshared uint TileLightIndices[MAX_LIGHTS];
/** Number of simple lights affecting the tile after culling. */
groupshared uint TileNumSimpleLights;
/** Same as TileLightIndices, except for simple lights */
groupshared uint TileSimpleLightIndices[MAX_LIGHTS];
/** Output HDR target. */
Texture2D<float4> InTexture;
RWTexture2D<float4> RWOutTexture;
void ExecuteShadingLoops(inout float4 CompositedLighting, FScreenSpaceData ScreenSpaceData, uint NumLightsAffectingTile, uint NumSimpleLightsAffectingTile, float3 CameraVector, float3 WorldPosition)
{
LOOP
for (uint TileLightIndex = 0; TileLightIndex < NumLightsAffectingTile; TileLightIndex++)
{
uint LightIndex = TileLightIndices[TileLightIndex];
FDeferredLightData LightData = (FDeferredLightData)0;
LightData.Position = TiledDeferred.LightPositionAndInvRadius[LightIndex].xyz;
LightData.InvRadius = TiledDeferred.LightPositionAndInvRadius[LightIndex].w;
LightData.Color = TiledDeferred.LightColorAndFalloffExponent[LightIndex].xyz;
LightData.FalloffExponent = TiledDeferred.LightColorAndFalloffExponent[LightIndex].w;
LightData.SpotAngles = TiledDeferred2.SpotAnglesAndSourceRadiusAndSimpleLighting[LightIndex].xy;
{
float4 Value = TiledDeferred2.LightDirectionAndSpotlightMaskAndSpecularScale[LightIndex];
LightData.Direction = Value.xyz;
// Point-lights set the (cosine of) spot angles to ( -2.0f, 1.0f ), see PointLightComponent.cpp
LightData.bSpotLight = LightData.SpotAngles.x > -2.0f;
LightData.SpecularScale = Value.w;
}
LightData.SourceRadius = TiledDeferred2.SpotAnglesAndSourceRadiusAndSimpleLighting[LightIndex].z;
// don't support lights with non-0 length in tiled-deferred path, so make sure it stays zero-initialized
//LightData.SourceLength = TiledDeferred2.SpotAnglesAndSourceRadiusAndSimpleLighting[LightIndex].w;
LightData.ShadowMapChannelMask = TiledDeferred2.ShadowMapChannelMask[LightIndex];
LightData.bInverseSquared = LightData.FalloffExponent == 0;
// Only radial lights supported with tiled deferred
LightData.bRadialLight = true;
// The only type of shadowing supported for lights using tiled is static shadowing, so the light should only compute shadowing if it has static shadowing
LightData.ShadowedBits = dot(LightData.ShadowMapChannelMask, float4(1, 1, 1, 1));
// Lights requiring light attenuation are not supported tiled for now
// WARNING: DeferredLightUniforms.SourceTexture is not set here, but compiles out because of bRectLight being false and USE_SOURCE_TEXTURE == 0.
FRectTexture RectTexture = InitRectTexture(DeferredLightUniforms.SourceTexture);
float SurfaceShadow = 1.0f;
CompositedLighting += GetDynamicLighting(WorldPosition, CameraVector, ScreenSpaceData.GBuffer, ScreenSpaceData.AmbientOcclusion, ScreenSpaceData.GBuffer.ShadingModelID, LightData, float4(1, 1, 1, 1), 0.5, uint2(0,0), RectTexture, SurfaceShadow);
}
// The simple shading model does not depend on ShadingModelID, so use it anytime the material is lit
LOOP
for (uint TileLightIndex2 = 0; TileLightIndex2 < NumSimpleLightsAffectingTile; TileLightIndex2++)
{
uint LightIndex = TileSimpleLightIndices[TileLightIndex2];
FSimpleDeferredLightData LightData = (FSimpleDeferredLightData)0;
LightData.Position = TiledDeferred.LightPositionAndInvRadius[LightIndex].xyz;
LightData.InvRadius = TiledDeferred.LightPositionAndInvRadius[LightIndex].w;
LightData.Color = TiledDeferred.LightColorAndFalloffExponent[LightIndex].xyz;
LightData.FalloffExponent = TiledDeferred.LightColorAndFalloffExponent[LightIndex].w;
LightData.bInverseSquared = LightData.FalloffExponent == 0;
// todo: doesn't support ScreenSpaceSubsurfaceScattering yet (not using alpha)
CompositedLighting.rgb += GetSimpleDynamicLighting(WorldPosition, CameraVector, ScreenSpaceData.GBuffer.WorldNormal, ScreenSpaceData.AmbientOcclusion, ScreenSpaceData.GBuffer.DiffuseColor, ScreenSpaceData.GBuffer.SpecularColor, ScreenSpaceData.GBuffer.Roughness, LightData);
}
}
#define EXECUTE_SHADING_LOOPS_SINGLE_SM(ShadingModelId, PixelShadingModelId, /*inout float4*/ CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition) \
{ \
BRANCH \
if (PixelShadingModelId == ShadingModelId) \
{ \
FScreenSpaceData ScreenSpaceData = GetScreenSpaceDataUint(PixelPos); \
ScreenSpaceData.GBuffer.ShadingModelID = ShadingModelId; \
ExecuteShadingLoops(CompositedLighting, ScreenSpaceData, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition); \
} \
}
[numthreads(THREADGROUP_SIZEX, THREADGROUP_SIZEY, 1)]
void TiledDeferredLightingMain(
uint3 GroupId : SV_GroupID,
uint3 DispatchThreadId : SV_DispatchThreadID,
uint3 GroupThreadId : SV_GroupThreadID)
{
uint ThreadIndex = GroupThreadId.y * THREADGROUP_SIZEX + GroupThreadId.x;
uint2 PixelPos = DispatchThreadId.xy + ViewDimensions.xy;
float2 ScreenUV = (float2(DispatchThreadId.xy) + .5f) / (ViewDimensions.zw - ViewDimensions.xy);
float2 ScreenPosition = float2(2.0f, -2.0f) * ScreenUV + float2(-1.0f, 1.0f);
float SceneDepth = CalcSceneDepth(PixelPos);
#if ATOMIC_REDUCTION
// Initialize per-tile variables
if (ThreadIndex == 0)
{
IntegerTileMinZ = 0x7F7FFFFF;
IntegerTileMaxZ = 0;
TileNumLights = 0;
TileNumSimpleLights = 0;
}
GroupMemoryBarrierWithGroupSync();
// Use shared memory atomics to build the depth bounds for this tile
// Each thread is assigned to a pixel at this point
//@todo - move depth range computation to a central point where it can be reused by all the frame's tiled deferred passes!
InterlockedMin(IntegerTileMinZ, asuint(SceneDepth));
InterlockedMax(IntegerTileMaxZ, asuint(SceneDepth));
GroupMemoryBarrierWithGroupSync();
#else
TileZ[ThreadIndex] = SceneDepth;
GroupMemoryBarrierWithGroupSync();
THREADGROUP_TOTALSIZE;
if (ThreadIndex < 32)
{
float Min = SceneDepth;
float Max = SceneDepth;
for (int i = ThreadIndex + 32; i< THREADGROUP_TOTALSIZE; i += 32)
{
Min = min(Min, TileZ[i]);
Max = max(Max, TileZ[i]);
}
TileZ[ThreadIndex] = Min;
TileZ[ThreadIndex + 32] = Max;
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex < 8)
{
float Min = TileZ[ThreadIndex];
float Max = TileZ[ThreadIndex + 32];
Min = min(Min, TileZ[ThreadIndex + 8]);
Max = max(Max, TileZ[ThreadIndex + 40]);
Min = min(Min, TileZ[ThreadIndex + 16]);
Max = max(Max, TileZ[ThreadIndex + 48]);
Min = min(Min, TileZ[ThreadIndex + 24]);
Max = max(Max, TileZ[ThreadIndex + 56]);
TileZ[ThreadIndex + 64] = Min;
TileZ[ThreadIndex + 96] = Max;
}
GroupMemoryBarrierWithGroupSync();
if (ThreadIndex == 0)
{
float Min = TileZ[64];
float Max = TileZ[96];
for (int i = 1; i< 8; i++)
{
Min = min(Min, TileZ[i + 64]);
Max = max(Max, TileZ[i + 96]);
}
IntegerTileMinZ = asuint(Min);
IntegerTileMaxZ = asuint(Max);
TileNumLights = 0;
TileNumSimpleLights = 0;
}
GroupMemoryBarrierWithGroupSync();
#endif
float MinTileZ = asfloat(IntegerTileMinZ);
float MaxTileZ = asfloat(IntegerTileMaxZ);
// Setup tile frustum planes
float2 TileScale = float2(ViewDimensions.zw - ViewDimensions.xy) * rcp(2 * float2(THREADGROUP_SIZEX, THREADGROUP_SIZEY));
float2 TileBias = TileScale - GroupId.xy;
float4 C1 = float4(View.ViewToClip._11 * TileScale.x, 0.0f, View.ViewToClip._31 * TileScale.x + TileBias.x, 0.0f);
float4 C2 = float4(0.0f, -View.ViewToClip._22 * TileScale.y, View.ViewToClip._32 * TileScale.y + TileBias.y, 0.0f);
float4 C4 = float4(0.0f, 0.0f, 1.0f, 0.0f);
float4 frustumPlanes[6];
frustumPlanes[0] = C4 - C1;
frustumPlanes[1] = C4 + C1;
frustumPlanes[2] = C4 - C2;
frustumPlanes[3] = C4 + C2;
frustumPlanes[4] = float4(0.0f, 0.0f, 1.0f, -MinTileZ);
frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f, MaxTileZ);
// Normalize tile frustum planes
UNROLL
for (uint i = 0; i < 4; ++i)
{
frustumPlanes[i] *= rcp(length(frustumPlanes[i].xyz));
}
// With a perspective projection, the clip space position is NDC * Clip.w
// With an orthographic projection, clip space is the same as NDC
float2 ClipPosition = ScreenPosition * (View.ViewToClip[3][3] < 1.0f ? SceneDepth : 1.0f);
float3 WorldPosition = mul(float4(ClipPosition, SceneDepth, 1), View.ScreenToWorld).xyz;
#define DO_CULLING_AND_SHADING 1
#if DO_CULLING_AND_SHADING
// Compute per-tile lists of affecting lights through bounds culling
// Each thread now operates on a sample instead of a pixel
LOOP
for (uint LightIndex = ThreadIndex; LightIndex < NumLights && LightIndex < MAX_LIGHTS; LightIndex += THREADGROUP_TOTALSIZE)
{
float4 LightPositionAndInvRadius = TiledDeferred.LightPositionAndInvRadius[LightIndex];
float LightRadius = 1.0f / LightPositionAndInvRadius.w;
float3 BoundsViewPosition = mul(float4(LightPositionAndInvRadius.xyz + View.PreViewTranslation.xyz, 1), View.TranslatedWorldToView).xyz;
// Cull the light against the tile's frustum planes
// Note: this has some false positives, a light that is intersecting three different axis frustum planes yet not intersecting the volume of the tile will be treated as intersecting
bool bInTile = true;
// Test against the screen x and y oriented planes first
UNROLL
for (uint i = 0; i < 4; ++i)
{
float PlaneDistance = dot(frustumPlanes[i], float4(BoundsViewPosition, 1.0f));
bInTile = bInTile && (PlaneDistance >= -LightRadius);
}
BRANCH
if (bInTile)
{
bool bInDepthRange = true;
// Test against the depth range
UNROLL
for (uint i = 4; i < 6; ++i)
{
float PlaneDistance = dot(frustumPlanes[i], float4(BoundsViewPosition, 1.0f));
bInDepthRange = bInDepthRange && (PlaneDistance >= -LightRadius);
}
// Add this light to the list of indices if it intersects
BRANCH
if (bInDepthRange)
{
// Separate lights into different index lists depending on whether they will use simple shading or not (designated by a negative SourceLength)
// This prevents the need for an inner loop branch when shading
if (TiledDeferred2.SpotAnglesAndSourceRadiusAndSimpleLighting[LightIndex].w > 0.0f)
{
uint ListIndex;
InterlockedAdd(TileNumSimpleLights, 1U, ListIndex);
TileSimpleLightIndices[ListIndex] = LightIndex;
}
else
{
uint ListIndex;
InterlockedAdd(TileNumLights, 1U, ListIndex);
TileLightIndices[ListIndex] = LightIndex;
}
}
}
}
GroupMemoryBarrierWithGroupSync();
uint NumLightsAffectingTile = TileNumLights;
uint NumSimpleLightsAffectingTile = TileNumSimpleLights;
#if USE_PASS_PER_SHADING_MODEL
// Now we could trick the compiler to put this in a branch by setting an always 1 int variable and decorate with BRANCH
// this might force it to not load the whole thing, but you never know with shader compilers.
uint PixelShadingModelID = GetScreenSpaceDataUint(PixelPos).GBuffer.ShadingModelID;
#else // !USE_PASS_PER_SHADING_MODEL
// Lookup GBuffer properties once per pixel
FScreenSpaceData ScreenSpaceData = GetScreenSpaceDataUint(PixelPos);
uint PixelShadingModelID = ScreenSpaceData.GBuffer.ShadingModelID;
#endif // USE_PASS_PER_SHADING_MODEL
// NOTE: This definition of 'CameraVector' appears to be the reverse of what
// is in forward shading parts e.g, as defined in 'FMaterialPixelParameters'.
float3 CameraVector = normalize(WorldPosition - View.WorldCameraOrigin);
// RGB accumulated RGB HDR color, A: luminance for screenspace subsurface scattering
float4 CompositedLighting = 0;
//BRANCH
if (PixelShadingModelID != SHADINGMODELID_UNLIT)
{
#if USE_PASS_PER_SHADING_MODEL
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_DEFAULT_LIT, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_SUBSURFACE, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_PREINTEGRATED_SKIN, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_CLEAR_COAT, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_SUBSURFACE_PROFILE, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_TWOSIDED_FOLIAGE, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_HAIR, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_CLOTH, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_EYE, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
EXECUTE_SHADING_LOOPS_SINGLE_SM(SHADINGMODELID_SINGLELAYERWATER, PixelShadingModelID, CompositedLighting, PixelPos, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
// SHADINGMODELID_THIN_TRANSLUCENT - skipping because it can not be opaque
#else // !USE_PASS_PER_SHADING_MODEL
ExecuteShadingLoops(CompositedLighting, ScreenSpaceData, NumLightsAffectingTile, NumSimpleLightsAffectingTile, CameraVector, WorldPosition);
#endif // USE_PASS_PER_SHADING_MODEL
}
#if !VISUALIZE_LIGHT_CULLING
CompositedLighting *= View.PreExposure;
#endif // !VISUALIZE_LIGHT_CULLING
#endif
// Only write to the buffer for threads inside the view
BRANCH
if (all(DispatchThreadId.xy < ViewDimensions.zw))
{
// One some hardware we can read and write from the same UAV with a 32 bit format. We don't do that yet.
RWOutTexture[PixelPos.xy] = InTexture[PixelPos.xy] + CompositedLighting;
}
}