// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved. /*============================================================================= ShadowFilteringCommon.usf: Contains functions to filter a shadowmap, shared between forward/deferred shading. =========================================================================*/ struct FPCFSamplerSettings { Texture2D ShadowDepthTexture; SamplerState ShadowDepthTextureSampler; //XY - Pixel size of shadowmap //ZW - Inverse pixel size of shadowmap float4 ShadowBufferSize; // SceneDepth in lightspace. float SceneDepth; float TransitionScale; // set by the caller, constant for the code so only one code branch should be compiled bool bSubsurface; // only used if bSubsurface is true float DensityMulConstant; // only used if bSubsurface is true float2 ProjectionDepthBiasParameters; }; // linear PCF, input 3x3 // @param Values0 in row 0 from left to right: x,y,z,w // @param Values1 in row 1 from left to right: x,y,z,w // @param Values2 in row 2 from left to right: x,y,z,w // can be optimized float PCF2x2(float2 Fraction, float3 Values0, float3 Values1, float3 Values2) { float2 VerticalLerp00 = lerp(float2(Values0.x, Values1.x), float2(Values0.y, Values1.y), Fraction.xx); float PCFResult00 = lerp(VerticalLerp00.x, VerticalLerp00.y, Fraction.y); float2 VerticalLerp10 = lerp(float2(Values0.y, Values1.y), float2(Values0.z, Values1.z), Fraction.xx); float PCFResult10 = lerp(VerticalLerp10.x, VerticalLerp10.y, Fraction.y); float2 VerticalLerp01 = lerp(float2(Values1.x, Values2.x), float2(Values1.y, Values2.y), Fraction.xx); float PCFResult01 = lerp(VerticalLerp01.x, VerticalLerp01.y, Fraction.y); float2 VerticalLerp11 = lerp(float2(Values1.y, Values2.y), float2(Values1.z, Values2.z), Fraction.xx); float PCFResult11 = lerp(VerticalLerp11.x, VerticalLerp11.y, Fraction.y); return saturate((PCFResult00 + PCFResult10 + PCFResult01 + PCFResult11) * 0.25f); } // linear PCF, input 4x4 // @param Values0 in row 0 from left to right: x,y,z,w // @param Values1 in row 1 from left to right: x,y,z,w // @param Values2 in row 2 from left to right: x,y,z,w // @param Values3 in row 3 from left to right: x,y,z,w // can be optimized float PCF3x3(float2 Fraction, float4 Values0, float4 Values1, float4 Values2, float4 Values3) { float2 VerticalLerp00 = lerp(float2(Values0.x, Values1.x), float2(Values0.y, Values1.y), Fraction.xx); float PCFResult00 = lerp(VerticalLerp00.x, VerticalLerp00.y, Fraction.y); float2 VerticalLerp10 = lerp(float2(Values0.y, Values1.y), float2(Values0.z, Values1.z), Fraction.xx); float PCFResult10 = lerp(VerticalLerp10.x, VerticalLerp10.y, Fraction.y); float2 VerticalLerp20 = lerp(float2(Values0.z, Values1.z), float2(Values0.w, Values1.w), Fraction.xx); float PCFResult20 = lerp(VerticalLerp20.x, VerticalLerp20.y, Fraction.y); float2 VerticalLerp01 = lerp(float2(Values1.x, Values2.x), float2(Values1.y, Values2.y), Fraction.xx); float PCFResult01 = lerp(VerticalLerp01.x, VerticalLerp01.y, Fraction.y); float2 VerticalLerp11 = lerp(float2(Values1.y, Values2.y), float2(Values1.z, Values2.z), Fraction.xx); float PCFResult11 = lerp(VerticalLerp11.x, VerticalLerp11.y, Fraction.y); float2 VerticalLerp21 = lerp(float2(Values1.z, Values2.z), float2(Values1.w, Values2.w), Fraction.xx); float PCFResult21 = lerp(VerticalLerp21.x, VerticalLerp21.y, Fraction.y); float2 VerticalLerp02 = lerp(float2(Values2.x, Values3.x), float2(Values2.y, Values3.y), Fraction.xx); float PCFResult02 = lerp(VerticalLerp02.x, VerticalLerp02.y, Fraction.y); float2 VerticalLerp12 = lerp(float2(Values2.y, Values3.y), float2(Values2.z, Values3.z), Fraction.xx); float PCFResult12 = lerp(VerticalLerp12.x, VerticalLerp12.y, Fraction.y); float2 VerticalLerp22 = lerp(float2(Values2.z, Values3.z), float2(Values2.w, Values3.w), Fraction.xx); float PCFResult22 = lerp(VerticalLerp22.x, VerticalLerp22.y, Fraction.y); return saturate((PCFResult00 + PCFResult10 + PCFResult20 + PCFResult01 + PCFResult11 + PCFResult21 + PCFResult02 + PCFResult12 + PCFResult22) * .11111f); } // linear PCF, input 4x4 // using Gather: xyzw in counter clockwise order starting with the sample to the lower left of the queried location // @param Values0 left top // @param Values1 right top // @param Values2 left bottom // @param Values3 right bottom float PCF3x3gather(float2 Fraction, float4 Values0, float4 Values1, float4 Values2, float4 Values3) { float4 Results; Results.x = Values0.w * (1.0 - Fraction.x); Results.y = Values0.x * (1.0 - Fraction.x); Results.z = Values2.w * (1.0 - Fraction.x); Results.w = Values2.x * (1.0 - Fraction.x); Results.x += Values0.z; Results.y += Values0.y; Results.z += Values2.z; Results.w += Values2.y; Results.x += Values1.w; Results.y += Values1.x; Results.z += Values3.w; Results.w += Values3.x; Results.x += Values1.z * Fraction.x; Results.y += Values1.y * Fraction.x; Results.z += Values3.z * Fraction.x; Results.w += Values3.y * Fraction.x; return dot( Results, float4( 1.0 - Fraction.y, 1.0, 1.0, Fraction.y) * ( 1.0 / 9.0) ); } // horizontal PCF, input 6x2 float2 HorizontalPCF5x2(float2 Fraction, float4 Values00, float4 Values20, float4 Values40) { float Results0; float Results1; Results0 = Values00.w * (1.0 - Fraction.x); Results1 = Values00.x * (1.0 - Fraction.x); Results0 += Values00.z; Results1 += Values00.y; Results0 += Values20.w; Results1 += Values20.x; Results0 += Values20.z; Results1 += Values20.y; Results0 += Values40.w; Results1 += Values40.x; Results0 += Values40.z * Fraction.x; Results1 += Values40.y * Fraction.x; return float2(Results0, Results1); } // lowest quality ith PCF float PCF1x1(float2 Fraction, float4 Values00) { float2 HorizontalLerp00 = lerp(Values00.wx, Values00.zy, Fraction.xx); return lerp(HorizontalLerp00.x, HorizontalLerp00.y, Fraction.y); } float4 CalculateOcclusion(float4 ShadowmapDepth, FPCFSamplerSettings Settings) { if (Settings.bSubsurface) { // Determine the distance that the light traveled through the subsurface object // This assumes that anything between this subsurface pixel and the light was also a subsurface material, // As a result, subsurface materials receive leaked light based on how transparent they are float4 Thickness = max(Settings.SceneDepth - ShadowmapDepth, 0); float4 Occlusion = saturate(exp(-Thickness * Settings.DensityMulConstant)); // Never shadow from depths that were never written to (max depth value) return ShadowmapDepth > .99f ? 1 : Occlusion; } else { // The standard comparison is SceneDepth < ShadowmapDepth // Using a soft transition based on depth difference // Offsets shadows a bit but reduces self shadowing artifacts considerably float TransitionScale = Settings.TransitionScale; //SoftTransitionScale.z; return saturate((ShadowmapDepth - Settings.SceneDepth) * TransitionScale + 1); } } float3 CalculateOcclusion(float3 ShadowmapDepth, FPCFSamplerSettings Settings) { if (Settings.bSubsurface) { // Determine the distance that the light traveled through the subsurface object // This assumes that anything between this subsurface pixel and the light was also a subsurface material, // As a result, subsurface materials receive leaked light based on how transparent they are float3 Thickness = max(Settings.SceneDepth - (ShadowmapDepth - Settings.ProjectionDepthBiasParameters.x), 0); float3 Occlusion = saturate(exp(-Thickness * Settings.DensityMulConstant)); // Never shadow from depths that were never written to (max depth value) return ShadowmapDepth > .99f ? 1 : Occlusion; } else { // The standard comparison is Settings.SceneDepth < ShadowmapDepth // Using a soft transition based on depth difference // Offsets shadows a bit but reduces self shadowing artifacts considerably float TransitionScale = Settings.TransitionScale; //SoftTransitionScale.z; return saturate((ShadowmapDepth - Settings.SceneDepth) * TransitionScale + 1); } } void FetchRowOfThree(float2 Sample00TexelCenter, float VerticalOffset, out float3 Values0, FPCFSamplerSettings Settings) { Values0.x = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(0, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0.y = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(1, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0.z = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(2, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0 = CalculateOcclusion(Values0, Settings); } void FetchRowOfFour(float2 Sample00TexelCenter, float VerticalOffset, out float4 Values0, FPCFSamplerSettings Settings) { Values0.x = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(0, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0.y = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(1, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0.z = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(2, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0.w = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(3, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values0 = CalculateOcclusion(Values0, Settings); } void FetchRowOfThreeAfterFour(float2 Sample00TexelCenter, float VerticalOffset, out float3 Values1, FPCFSamplerSettings Settings) { Values1.x = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(4, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values1.y = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(5, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values1.z = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (Sample00TexelCenter + float2(6, VerticalOffset)) * Settings.ShadowBufferSize.zw, 0).r; Values1 = CalculateOcclusion(Values1, Settings); } // break this out for forward rendering as it's not part of ManualPCF's set of shadowquality settings. float Manual2x2PCF(float2 ShadowPosition, FPCFSamplerSettings Settings) { float2 TexelPos = ShadowPosition * Settings.ShadowBufferSize.xy - 0.5f; // bias to be consistent with texture filtering hardware float2 Fraction = frac(TexelPos); float2 TexelCenter = floor(TexelPos) + 0.5f; // bias to get reliable texel center content float3 SamplesValues0, SamplesValues1, SamplesValues2; FetchRowOfThree(TexelCenter, 0, SamplesValues0, Settings); FetchRowOfThree(TexelCenter, 1, SamplesValues1, Settings); FetchRowOfThree(TexelCenter, 2, SamplesValues2, Settings); return PCF2x2(Fraction, SamplesValues0, SamplesValues1, SamplesValues2); } float ManualNoFiltering(float2 ShadowPosition, FPCFSamplerSettings Settings) { // very low quality but very good performance, useful to profile, 1 sample, not using gather4 return CalculateOcclusion(Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, ShadowPosition, 0).rrr, Settings).r; } float Manual1x1PCF(float2 ShadowPosition, FPCFSamplerSettings Settings) { float2 TexelPos = ShadowPosition * Settings.ShadowBufferSize.xy - 0.5f; // bias to be consistent with texture filtering hardware float2 Fraction = frac(TexelPos); float2 TexelCenter = floor(TexelPos) + 0.5f; // bias to get reliable texel center content // using Gather: xyzw in counter clockwise order starting with the sample to the lower left of the queried location float4 Samples; #if FEATURE_GATHER4 Samples = Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, TexelCenter * Settings.ShadowBufferSize.zw); #else Samples.x = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (TexelCenter.xy + float2(0, 1)) * Settings.ShadowBufferSize.zw, 0).r; Samples.y = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (TexelCenter.xy + float2(1, 1)) * Settings.ShadowBufferSize.zw, 0).r; Samples.z = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (TexelCenter.xy + float2(1, 0)) * Settings.ShadowBufferSize.zw, 0).r; Samples.w = Texture2DSampleLevel(Settings.ShadowDepthTexture, Settings.ShadowDepthTextureSampler, (TexelCenter.xy + float2(0, 0)) * Settings.ShadowBufferSize.zw, 0).r; #endif float4 Values00 = CalculateOcclusion(Samples, Settings); return PCF1x1(Fraction, Values00); } float ManualPCF(float2 ShadowPosition, FPCFSamplerSettings Settings) { #if SHADOW_QUALITY == 1 return ManualNoFiltering(ShadowPosition, Settings); #endif #if SHADOW_QUALITY == 2 // low quality, 2x2 samples, using and not using gather4 return Manual1x1PCF(ShadowPosition, Settings); #endif #if SHADOW_QUALITY == 3 // medium quality, 4x4 samples, using and not using gather4 { float2 TexelPos = ShadowPosition * Settings.ShadowBufferSize.xy - 0.5f; // bias to be consistent with texture filtering hardware float2 Fraction = frac(TexelPos); float2 TexelCenter = floor(TexelPos) + 0.5f; // bias to get reliable texel center content { float2 Sample00TexelCenter = TexelCenter - float2(1, 1); float4 SampleValues0, SampleValues1, SampleValues2, SampleValues3; #if FEATURE_GATHER4 float2 SamplePos = TexelCenter * Settings.ShadowBufferSize.zw; // bias to get reliable texel center content SampleValues0 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(ShadowDepthTextureSampler, SamplePos, int2(-1, -1)), Settings); SampleValues1 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(ShadowDepthTextureSampler, SamplePos, int2(1, -1)), Settings); SampleValues2 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(ShadowDepthTextureSampler, SamplePos, int2(-1, 1)), Settings); SampleValues3 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(ShadowDepthTextureSampler, SamplePos, int2(1, 1)), Settings); return PCF3x3gather(Fraction, SampleValues0, SampleValues1, SampleValues2, SampleValues3); #else // FEATURE_GATHER4 FetchRowOfFour(Sample00TexelCenter, 0, SampleValues0, Settings); FetchRowOfFour(Sample00TexelCenter, 1, SampleValues1, Settings); FetchRowOfFour(Sample00TexelCenter, 2, SampleValues2, Settings); FetchRowOfFour(Sample00TexelCenter, 3, SampleValues3, Settings); return PCF3x3(Fraction, SampleValues0, SampleValues1, SampleValues2, SampleValues3); #endif // FEATURE_GATHER4 } } #endif #if FEATURE_GATHER4 // high quality, 6x6 samples, using gather4 { float2 TexelPos = ShadowPosition * Settings.ShadowBufferSize.xy - 0.5f; // bias to be consistent with texture filtering hardware float2 Fraction = frac(TexelPos); float2 TexelCenter = floor(TexelPos); float2 SamplePos = (TexelCenter + 0.5f) * Settings.ShadowBufferSize.zw; // bias to get reliable texel center content float Results; float4 Values00 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(-2, -2)), Settings); float4 Values20 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(0, -2)), Settings); float4 Values40 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(2, -2)), Settings); float2 Row0 = HorizontalPCF5x2(Fraction, Values00, Values20, Values40); Results = Row0.x * (1.0f - Fraction.y) + Row0.y; float4 Values02 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(-2, 0)), Settings); float4 Values22 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(0, 0)), Settings); float4 Values42 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(2, 0)), Settings); float2 Row1 = HorizontalPCF5x2(Fraction, Values02, Values22, Values42); Results += Row1.x + Row1.y; float4 Values04 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(-2, 2)), Settings); float4 Values24 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(0, 2)), Settings); float4 Values44 = CalculateOcclusion(Settings.ShadowDepthTexture.Gather(Settings.ShadowDepthTextureSampler, SamplePos, int2(2, 2)), Settings); float2 Row2 = HorizontalPCF5x2(Fraction, Values04, Values24, Values44); Results += Row2.x + Row2.y * Fraction.y; return 0.04f * Results; } #else // FEATURE_GATHER4 // high quality, 7x7 samples, not using gather4 (todo: ideally we make this 6x6 to get same results with gather code) { float2 Fraction = frac(ShadowPosition * Settings.ShadowBufferSize.xy); float2 Sample00TexelCenter = floor(ShadowPosition * Settings.ShadowBufferSize.xy) - float2(3, 3); // Fetch 7x7 shadowmap point samples // Do 6x6 PCF samples, sharing the point samples between neighboring PCF samples float4 Results; float4 SampleValues03; float4 SampleValues13; { float4 SampleValues10; float4 SampleValues11; float4 SampleValues12; // Group work to minimize temporary registers and to split texture work with PCF ALU operations to hide texture latency // Without this layout (all texture lookups at the beginning, PCF ALU's at the end) this shader was 4x slower on Nvidia cards { float4 SampleValues00; FetchRowOfFour(Sample00TexelCenter, 0, SampleValues00, Settings); SampleValues10.x = SampleValues00.w; float4 SampleValues01; FetchRowOfFour(Sample00TexelCenter, 1, SampleValues01, Settings); SampleValues11.x = SampleValues01.w; float4 SampleValues02; FetchRowOfFour(Sample00TexelCenter, 2, SampleValues02, Settings); SampleValues12.x = SampleValues02.w; FetchRowOfFour(Sample00TexelCenter, 3, SampleValues03, Settings); SampleValues13.x = SampleValues03.w; Results.x = PCF3x3(Fraction, SampleValues00, SampleValues01, SampleValues02, SampleValues03); } { FetchRowOfThreeAfterFour(Sample00TexelCenter, 0, SampleValues10.yzw, Settings); FetchRowOfThreeAfterFour(Sample00TexelCenter, 1, SampleValues11.yzw, Settings); FetchRowOfThreeAfterFour(Sample00TexelCenter, 2, SampleValues12.yzw, Settings); FetchRowOfThreeAfterFour(Sample00TexelCenter, 3, SampleValues13.yzw, Settings); Results.y = PCF3x3(Fraction, SampleValues10, SampleValues11, SampleValues12, SampleValues13); } } { float4 SampleValues14; float4 SampleValues15; float4 SampleValues16; { float4 SampleValues04; FetchRowOfFour(Sample00TexelCenter, 4, SampleValues04, Settings); SampleValues14.x = SampleValues04.w; float4 SampleValues05; FetchRowOfFour(Sample00TexelCenter, 5, SampleValues05, Settings); SampleValues15.x = SampleValues05.w; float4 SampleValues06; FetchRowOfFour(Sample00TexelCenter, 6, SampleValues06, Settings); SampleValues16.x = SampleValues06.w; Results.z = PCF3x3(Fraction, SampleValues03, SampleValues04, SampleValues05, SampleValues06); } { FetchRowOfThreeAfterFour(Sample00TexelCenter, 4, SampleValues14.yzw, Settings); FetchRowOfThreeAfterFour(Sample00TexelCenter, 5, SampleValues15.yzw, Settings); FetchRowOfThreeAfterFour(Sample00TexelCenter, 6, SampleValues16.yzw, Settings); Results.w = PCF3x3(Fraction, SampleValues13, SampleValues14, SampleValues15, SampleValues16); } } return dot(Results, .25f); } #endif // FEATURE_GATHER4 }