Files
UnrealEngineUWP/Engine/Shaders/Private/VirtualShadowMaps/VirtualShadowMapProjectionSpot.ush
andrew lauritzen bf84b62791 Improve texel dither to be more continuous and avoid artifacts at the edges of pages (which are now dynamically picked)
Add a world dither. Not necessarily useful but can be somewhat analygous to PCF radius.
Add permutation to SMRT samples = 0 (and possibly more in the future).

#preflight 628eab6fc826bd5a7f1135ec
#rb ola.olsson

#ROBOMERGE-AUTHOR: andrew.lauritzen
#ROBOMERGE-SOURCE: CL 20372897 via CL 20372913 via CL 20372922
#ROBOMERGE-BOT: UE5 (Release-Engine-Staging -> Main) (v949-20362246)

[CL 20373677 by andrew lauritzen in ue5-main branch]
2022-05-25 19:50:23 -04:00

468 lines
16 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VirtualShadowMapProjectionSpot.ush:
=============================================================================*/
#pragma once
#include "../DeferredShadingCommon.ush"
#include "../SceneTexturesCommon.ush"
#include "../LightShaderParameters.ush"
#include "../PathTracing/Utilities/PathTracingRandomSequence.ush"
#include "VirtualShadowMapPageAccessCommon.ush"
#include "VirtualShadowMapProjectionCommon.ush"
#include "VirtualShadowMapSMRTCommon.ush"
struct FSMRTSingleRayState
{
int VirtualShadowMapId;
float3 RayStartUVz;
float3 RayStepUVz;
// Debug output
int MipLevel;
uint2 VirtualTexelAddress;
uint2 PhysicalTexelAddress;
};
FSMRTSingleRayState SMRTSingleRayInitialize(
int VirtualShadowMapId,
// Shadow PreViewTranslation already added in
float3 RayStartShadowTranslatedWorld,
float3 RayEndShadowTranslatedWorld)
{
FVirtualShadowMapProjectionShaderData ProjectionData = GetVirtualShadowMapProjectionData(VirtualShadowMapId);
float4 RayStartUVz = mul(float4(RayStartShadowTranslatedWorld, 1), ProjectionData.TranslatedWorldToShadowUVMatrix).xyzw;
float4 RayEndUVz = mul(float4(RayEndShadowTranslatedWorld , 1), ProjectionData.TranslatedWorldToShadowUVMatrix).xyzw;
// NOTE: We assume by construction that ray ends are not behind the light near plane as the warping
// due to SMRT ray tests gets severe long before the rays would otherwise clip at that plane.
// If clipping is ever necessary, it must be done to the ray endpoints in world space so that lights that
// need to walk multiple faces/clipmap levels in lock step can do it consistently.
RayStartUVz.xyz = RayStartUVz.xyz / RayStartUVz.w;
RayEndUVz.xyz = RayEndUVz.xyz / RayEndUVz.w;
float3 RayStepUVz = RayEndUVz.xyz - RayStartUVz.xyz;
FSMRTSingleRayState Result;
Result.VirtualShadowMapId = VirtualShadowMapId;
Result.RayStartUVz = RayStartUVz.xyz;
Result.RayStepUVz = RayStepUVz.xyz;
Result.MipLevel = 0;
Result.VirtualTexelAddress = uint2(0xFFFFFFFF, 0xFFFFFFFF);
Result.PhysicalTexelAddress = uint2(0xFFFFFFFF, 0xFFFFFFFF);
return Result;
}
FSMRTSample SMRTFindSample(inout FSMRTSingleRayState RayState, float SampleTime)
{
FSMRTSample Sample = (FSMRTSample)0;
Sample.bValid = false;
float3 UVz = RayState.RayStartUVz.xyz + RayState.RayStepUVz.xyz * SampleTime;
if (all(UVz.xy == saturate(UVz.xy)))
{
FVirtualShadowMapSample SmSample = SampleVirtualShadowMap(RayState.VirtualShadowMapId, UVz.xy);
if (SmSample.bValid)
{
Sample.bValid = true;
Sample.SampleDepth = SmSample.Depth;
Sample.ReferenceDepth = UVz.z;
Sample.CompareToleranceScale = RayState.RayStepUVz.z;
// Debug output
RayState.MipLevel = SmSample.MipLevel;
RayState.PhysicalTexelAddress = SmSample.PhysicalTexelAddress;
RayState.VirtualTexelAddress = SmSample.VirtualTexelAddress;
}
}
return Sample;
}
float SMRTComputeOccluderDistance(FSMRTSingleRayState RayState, float SampleDepth)
{
// Avoid keeping around the registers
float4x4 ViewToClip = GetVirtualShadowMapProjectionData(RayState.VirtualShadowMapId).ShadowViewToClipMatrix;
// View space Z delta is a good enough stand-in for euclidean distance for now given our bent ray tests and so on
float OccluderViewZ = ViewToClip._43 / (SampleDepth - ViewToClip._33);
float ReceiverViewZ = ViewToClip._43 / (RayState.RayStartUVz.z - ViewToClip._33);
float Result = ReceiverViewZ - OccluderViewZ;
return max(VIRTUAL_SHADOW_MAP_MIN_OCCLUDER_DISTANCE, Result);
}
// Instantiate SMRTRayCast for FSMRTSingleRayState
#define SMRT_TEMPLATE_RAY_STRUCT FSMRTSingleRayState
#include "VirtualShadowMapSMRTTemplate.ush"
#undef SMRT_TEMPLATE_RAY_STRUCT
struct FSMRTTwoCubeFaceRayState
{
FSMRTSingleRayState Face0;
FSMRTSingleRayState Face1;
bool bSampleInFace0;
};
FSMRTTwoCubeFaceRayState SMRTTwoCubeFaceRayInitialize(
int VirtualShadowMapId0,
int VirtualShadowMapId1,
float3 RayStartShadowTranslatedWorld,
float3 RayEndShadowTranslatedWorld)
{
FSMRTTwoCubeFaceRayState Result;
Result.Face0 = SMRTSingleRayInitialize(VirtualShadowMapId0, RayStartShadowTranslatedWorld, RayEndShadowTranslatedWorld);
Result.Face1 = SMRTSingleRayInitialize(VirtualShadowMapId1, RayStartShadowTranslatedWorld, RayEndShadowTranslatedWorld);
Result.bSampleInFace0 = false;
return Result;
}
FSMRTSample SMRTFindSample(inout FSMRTTwoCubeFaceRayState RayState, float SampleTime)
{
FSMRTSample Sample = SMRTFindSample(RayState.Face0, SampleTime);
RayState.bSampleInFace0 = Sample.bValid;
if (!RayState.bSampleInFace0)
{
Sample = SMRTFindSample(RayState.Face1, SampleTime);
}
return Sample;
}
float SMRTComputeOccluderDistance(FSMRTTwoCubeFaceRayState RayState, float SampleDepth)
{
if (RayState.bSampleInFace0)
{
return SMRTComputeOccluderDistance(RayState.Face0, SampleDepth);
}
else
{
return SMRTComputeOccluderDistance(RayState.Face1, SampleDepth);
}
}
// Instantiate SMRTRayCast for FSMRTTwoCubeFaceRayState
#define SMRT_TEMPLATE_RAY_STRUCT FSMRTTwoCubeFaceRayState
#include "VirtualShadowMapSMRTTemplate.ush"
#undef SMRT_TEMPLATE_RAY_STRUCT
FSMRTResult ShadowRayCastSpotLight(
int VirtualShadowMapId,
float3 RayStartShadowTranslatedWorld,
float3 RayEndShadowTranslatedWorld,
int NumSteps,
float StepOffset,
// To get debug data out
inout FSMRTSingleRayState OutRayState)
{
OutRayState = SMRTSingleRayInitialize(VirtualShadowMapId, RayStartShadowTranslatedWorld, RayEndShadowTranslatedWorld);
return SMRTRayCast(OutRayState, NumSteps, StepOffset);
}
FSMRTResult ShadowRayCastPointLight(
int VirtualShadowMapId,
float3 RayStartShadowTranslatedWorld,
float3 RayEndShadowTranslatedWorld,
int NumSteps,
float StepOffset,
// To get debug data out
inout FSMRTSingleRayState OutRayState)
{
uint RayStartFace = VirtualShadowMapGetCubeFace(RayStartShadowTranslatedWorld);
uint RayEndFace = VirtualShadowMapGetCubeFace(RayEndShadowTranslatedWorld);
FSMRTResult Result;
if (RayStartFace != RayEndFace)
{
FSMRTTwoCubeFaceRayState RayState = SMRTTwoCubeFaceRayInitialize(
VirtualShadowMapId + RayStartFace,
VirtualShadowMapId + RayEndFace,
RayStartShadowTranslatedWorld, RayEndShadowTranslatedWorld);
Result = SMRTRayCast(RayState, NumSteps, StepOffset);
if (RayState.bSampleInFace0)
{
OutRayState = RayState.Face0;
}
else
{
OutRayState = RayState.Face1;
}
}
else
{
// Fast path: ray stays on a single cube map face (effectively a spot light)
OutRayState = SMRTSingleRayInitialize(VirtualShadowMapId + RayEndFace, RayStartShadowTranslatedWorld, RayEndShadowTranslatedWorld);
Result = SMRTRayCast(OutRayState, NumSteps, StepOffset);
}
return Result;
}
// MaxBits version reduces unrolled iterations
float4 SobolSampler(uint SampleIndex, inout uint Seed, uint MaxBits)
{
// first scramble the index to decorelate from other 4-tuples
uint SobolIndex = FastOwenScrambling(SampleIndex, EvolveSobolSeed(Seed)) & ( (1U << MaxBits) - 1U );
// now get Sobol' point from this index
uint4 Result = uint4(SobolIndex, 0, 0, 0);
UNROLL for (uint b = 0, v = 1; b < MaxBits; b++)
{
uint IndexBit = (SobolIndex >> b) & 1;
Result.y ^= IndexBit * v;
Result.zw ^= IndexBit * SobolMatrices[b];
v ^= v << 1;
}
// finally scramble the points to avoid structured artifacts
Result.x = FastOwenScrambling(Result.x, EvolveSobolSeed(Seed));
Result.y = FastOwenScrambling(Result.y, EvolveSobolSeed(Seed));
Result.z = FastOwenScrambling(Result.z, EvolveSobolSeed(Seed));
Result.w = FastOwenScrambling(Result.w, EvolveSobolSeed(Seed));
// output as float in [0,1) taking care not to skew the distribution
// due to the non-uniform spacing of floats in this range
return (Result >> 8) * 5.96046447754e-08; // * 2^-24
}
// Normal should be normalized
bool IsBackfaceToLocalLight(
float3 ToLight,
float3 Normal,
float LightSourceRadius,
out float3 OutConeAxis,
out float OutConeSin,
out float OutDistToLight)
{
float DistSqr = dot(ToLight, ToLight);
float Falloff = rcp(DistSqr + 1);
float InvDist = rsqrt(DistSqr);
OutConeAxis = ToLight * InvDist;
OutConeSin = LightSourceRadius * InvDist;
OutDistToLight = DistSqr * InvDist;
float SinAlphaSqr = saturate(Pow2(LightSourceRadius) * Falloff);
float SinAlpha = sqrt(SinAlphaSqr);
bool bBackface = dot(Normal, OutConeAxis) < -SinAlpha;
return bBackface;
}
// Version without outputs
bool IsBackfaceToLocalLight(float3 ToLight, float3 Normal, float LightSourceRadius)
{
float3 Dummy1;
float Dummy2;
float Dummy3;
return IsBackfaceToLocalLight(ToLight, Normal, LightSourceRadius, Dummy1, Dummy2, Dummy3);
}
FVirtualShadowMapSampleResult TraceLocalLight(
int VirtualShadowMapId,
FLightShaderParameters Light,
uint2 PixelPos,
const float SceneDepth,
float3 TranslatedWorldPosition,
float RayStartOffset,
uint MaxRayCount,
uint SamplesPerRay,
float CotMaxRayAngleFromLight,
const float Noise,
const bool bCullBackface,
float3 WorldNormal,
bool bUseAdaptiveRayCount)
{
const uint LightType = Light.SpotAngles.x > -2.0f ? LIGHT_TYPE_SPOT : LIGHT_TYPE_POINT;
const float3 ToLight = Light.TranslatedWorldPosition - TranslatedWorldPosition;
float3 ConeAxis;
float ConeSin;
float DistToLight;
bool bBackface = IsBackfaceToLocalLight(ToLight, WorldNormal, Light.SourceRadius, ConeAxis, ConeSin, DistToLight);
FVirtualShadowMapSampleResult Result = InitVirtualShadowMapSampleResult();
Result.bValid = true;
Result.ShadowFactor = 0.0f;
if (!bBackface || !bCullBackface)
{
float StepOffset = Noise;
uint TileID = MortonEncode( PixelPos );
// Move from PrimaryView translated world to Shadow translated world
// NOTE: Cube map faces all share the same PreViewTranslation
FVirtualShadowMapProjectionShaderData BaseProjectionData = GetVirtualShadowMapProjectionData(VirtualShadowMapId);
float3 PrimaryToShadowTranslation = LWCToFloat(LWCSubtract(BaseProjectionData.PreViewTranslation, PrimaryView.PreViewTranslation));
float3 ShadowTranslatedWorld = TranslatedWorldPosition + PrimaryToShadowTranslation;
uint i = 0;
uint RayMissCount = 0;
for( ; i < MaxRayCount; i++ )
{
RandomSequence RandSequence;
#if 0
RandomSequence_Initialize( RandSequence, PixelPos, i, View.StateFrameIndex, MaxRayCount );
float4 RandSample = RandomSequence_GenerateSample4D( RandSequence );
#else
// Combine pixel-level and sample-level bits into the sample index (visible structure will be hidden by owen scrambling of the index)
RandSequence.SampleIndex = reversebits((65536 * View.StateFrameIndex + TileID) * MaxRayCount + i);
RandSequence.SampleSeed = 0; // always use the same sequence
float4 RandSample = SobolSampler( RandSequence.SampleIndex, RandSequence.SampleSeed, 8 );
#endif
float2 LightUV = UniformSampleDiskConcentricApprox( RandSample.xy ).xy;
LightUV *= ConeSin;
float SinTheta2 = dot( LightUV, LightUV );
float SinTheta = sqrt( SinTheta2 );
float CosTheta = sqrt( 1 - SinTheta2 );
float3 Dir = TangentToWorld( float3( LightUV, CosTheta ), ConeAxis );
// The further afield our rays go the poorer our approximation is as the "bend" due to our testing
// against a shadow map instead of along the ray increases. Thus we avoid going all the way to the light
// where the projection becomes extreme.
// Clip ray to max angle to limit penumbra sizes
float ClampedEnd = DistToLight * 0.75 * saturate( 1.5 / ( CosTheta + CotMaxRayAngleFromLight * SinTheta ) );
float Start = min(RayStartOffset, ClampedEnd - 1e-6f);
float3 RayStart = ShadowTranslatedWorld + Dir * Start;
float3 RayEnd = ShadowTranslatedWorld + Dir * ClampedEnd;
FSMRTResult SMRTResult;
FSMRTSingleRayState RayState; // For debug output
if ( LightType == LIGHT_TYPE_SPOT )
{
SMRTResult = ShadowRayCastSpotLight(
VirtualShadowMapId,
RayStart,
RayEnd,
SamplesPerRay,
StepOffset,
RayState);
}
else
{
SMRTResult = ShadowRayCastPointLight(
VirtualShadowMapId,
RayStart,
RayEnd,
SamplesPerRay,
StepOffset,
RayState);
}
RayMissCount += SMRTResult.bValidHit ? 0 : 1;
// Debug output (DCE'd if not used)
Result.ClipmapIndexOrMipLevel = RayState.MipLevel;
Result.VirtualTexelAddress = RayState.VirtualTexelAddress;
Result.PhysicalTexelAddress = RayState.PhysicalTexelAddress;
Result.OccluderDistance = SMRTResult.OccluderDistance;
if (bUseAdaptiveRayCount)
{
if( i == 0 )
{
bool bHit = SMRTResult.bValidHit;
#if 0 // Disabled heuristic penumbra size path for now as it creates some speckle artifacts near the edges of sharp shadows
if( bHit )
{
#if 0
//float WorldBluringRadius = ComputeLightSampleWorldBluringRadius( TranslatedWorldPosition, LIGHT_TYPE_SPOT, Light, SMRTResult.OccluderDistance );
//float PixelBluringRadius = ProjectWorldDistanceToPixelDistance( SceneDepth, WorldBluringRadius );
float WorldBluringRadius = Light.SourceRadius * SMRTResult.OccluderDistance / max( DistToLight - SMRTResult.OccluderDistance, 0.001 );
float PixelBluringRadius = WorldBluringRadius * View.ViewSizeAndInvSize.x * 0.5 / ( SceneDepth * View.ClipToView[0][0] );
bHit = PixelBluringRadius > 2;
#else
bHit = SMRTResult.OccluderDistance * Light.SourceRadius * ( 0.25 * View.ViewSizeAndInvSize.y / ( SceneDepth * View.ClipToView[1][1] ) ) > max( DistToLight - Result.OccluderDistance, 0.001 );
#endif
}
#endif
// All lanes miss or hit something close enough to not be noisy.
bool bAllLanesMiss = WaveActiveAllTrue( !bHit );
if( bAllLanesMiss )
{
break;
}
}
else if( i == 1 )
{
// After 2 iterations and all have hit, must be in umbra.
// TODO: Modify this heuristic based on ray counts to avoid blocky artifacts in near-umbra
bool bAllLanesHit = WaveActiveAllTrue( RayMissCount == 0 );
if( bAllLanesHit )
{
break;
}
}
}
uint Seed = asuint( StepOffset );
StepOffset = ( EvolveSobolSeed( Seed ) >> 8 ) * 5.96046447754e-08; // * 2^-24;
}
uint RayCount = min(i + 1, MaxRayCount); // break vs regular for loop exit
Result.ShadowFactor = SMRTComputeRayMissFactor(RayMissCount, RayCount, MaxRayCount, Noise);
Result.RayCount = RayCount;
}
return Result;
}
// Generate ray based on light source geometry
bool GenerateRayLocalLight(
FLightShaderParameters Light,
uint2 PixelPos,
float3 TranslatedWorldPosition,
float3 WorldNormal,
float CotMaxRayAngleFromLight,
uint RayIndex,
uint RayCount,
inout float3 OutRayStart,
inout float3 OutRayEnd)
{
const float3 ToLight = Light.TranslatedWorldPosition - TranslatedWorldPosition;
float3 ConeAxis;
float ConeSin;
float DistToLight;
bool bBackface = IsBackfaceToLocalLight(ToLight, WorldNormal, Light.SourceRadius, ConeAxis, ConeSin, DistToLight);
if (!bBackface)
{
uint TileID = MortonEncode(PixelPos);
RandomSequence RandSequence;
// Combine pixel-level and sample-level bits into the sample index (visible structure will be hidden by owen scrambling of the index)
RandSequence.SampleIndex = reversebits((65536 * View.StateFrameIndex + TileID) * RayCount + RayIndex);
RandSequence.SampleSeed = 0; // always use the same sequence
float2 E = SobolSampler(RandSequence.SampleIndex, RandSequence.SampleSeed, 8).xy;
float2 LightUV = UniformSampleDiskConcentricApprox(E).xy;
LightUV *= ConeSin;
float SinTheta2 = dot(LightUV, LightUV);
float SinTheta = sqrt(SinTheta2);
float CosTheta = sqrt(1 - SinTheta2);
float3 Dir = TangentToWorld(float3(LightUV, CosTheta), ConeAxis);
// The further afield our rays go the poorer our approximation is as the "bend" due to our testing
// against a shadow map instead of along the ray increases. Thus we avoid going all the way to the light
// where the projection becomes extreme.
// Clip ray to max angle to limit penumbra sizes
float ClampedLength = DistToLight * 0.75 * saturate(1.5 / (CosTheta + CotMaxRayAngleFromLight * SinTheta));
OutRayStart = TranslatedWorldPosition;
OutRayEnd = TranslatedWorldPosition + Dir * ClampedLength;
}
return !bBackface;
}