Files
UnrealEngineUWP/Engine/Shaders/ScreenSpaceReflections.usf
Martin Mittring 14c6584195 * added VisualzieSSR showflag (from NVIDIA branch)
[CL 2118796 by Martin Mittring in Main branch]
2014-06-26 20:02:41 -04:00

416 lines
14 KiB
Plaintext

// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
ScreenSpaceReflections.usf: To generate screen space reflections as a postprocess
=============================================================================*/
#include "Common.usf"
#include "PostProcessCommon.usf"
#include "DeferredShadingCommon.usf"
#include "Random.usf"
#include "BRDF.usf"
#include "MonteCarlo.usf"
// 0:reference (slower, potentially higher quality) 1:use mips of the HZB depth (better performance)
#define USE_HZB 1
#define SCALAR_BRANCHLESS 0
#define VECTORIZED_BRANCHLESS 0
#define VECTORIZED_EARLY_OUT 1
// .r:Intensity in 0..1 range .g:RoughnessMaskMul, b:unused, a:unused
float4 SSRParams;
uint MortonCode( uint x )
{
//x = (x ^ (x << 8)) & 0x00ff00ff;
//x = (x ^ (x << 4)) & 0x0f0f0f0f;
x = (x ^ (x << 2)) & 0x33333333;
x = (x ^ (x << 1)) & 0x55555555;
return x;
}
uint ReverseUIntBits( uint bits )
{
//bits = ( bits << 16) | ( bits >> 16);
//bits = ( (bits & 0x00ff00ff) << 8 ) | ( (bits & 0xff00ff00) >> 8 );
//bits = ( (bits & 0x0f0f0f0f) << 4 ) | ( (bits & 0xf0f0f0f0) >> 4 );
bits = ( (bits & 0x33333333) << 2 ) | ( (bits & 0xcccccccc) >> 2 );
bits = ( (bits & 0x55555555) << 1 ) | ( (bits & 0xaaaaaaaa) >> 1 );
return bits;
}
float4 SampleDepthTexture( Texture2D Texture, SamplerState Sampler, float Level, float4 SampleUV0, float4 SampleUV1 )
{
float4 SampleDepth;
#if USE_HZB
SampleDepth.x = Texture2DSampleLevel( Texture, Sampler, SampleUV0.xy, Level ).r;
SampleDepth.y = Texture2DSampleLevel( Texture, Sampler, SampleUV0.zw, Level ).r;
SampleDepth.z = Texture2DSampleLevel( Texture, Sampler, SampleUV1.xy, Level ).r;
SampleDepth.w = Texture2DSampleLevel( Texture, Sampler, SampleUV1.zw, Level ).r;
#else
SampleDepth.x = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV0.xy, 0 ).r;
SampleDepth.y = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV0.zw, 0 ).r;
SampleDepth.z = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV1.xy, 0 ).r;
SampleDepth.w = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV1.zw, 0 ).r;
#endif
return SampleDepth;
}
float4 RayCast( float3 R, float Roughness, float SceneDepth, float3 PositionTranslatedWorld, int NumSteps, float StepOffset )
{
// TODO provide RayStartUVz
// NOTE could clip ray against frustum planes
// TODO use screen position and skip matrix mul
float4 RayStartClip = mul( float4( PositionTranslatedWorld, 1 ), View.TranslatedWorldToClip );
float4 RayEndClip = mul( float4( PositionTranslatedWorld + R * SceneDepth, 1 ), View.TranslatedWorldToClip );
float3 RayStartScreen = RayStartClip.xyz / RayStartClip.w;
float3 RayEndScreen = RayEndClip.xyz / RayEndClip.w;
// Normalize 2D length
float3 RayStepScreen = ( RayEndScreen - RayStartScreen ) / length( RayEndScreen.xy - RayStartScreen.xy );
RayStepScreen *= 1.5;
#if USE_HZB
float3 RayStartUVz = float3( RayStartScreen.xy * float2( 0.5, -0.5 ) + 0.5, RayStartScreen.z );
float3 RayStepUVz = float3( RayStepScreen.xy * float2( 0.5, -0.5 ), RayStepScreen.z );
#else
float3 RayStartUVz = float3( RayStartScreen.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz, RayStartScreen.z );
float3 RayStepUVz = float3( RayStepScreen.xy * View.ScreenPositionScaleBias.xy, RayStepScreen.z );
#endif
const float Step = 1.0 / (NumSteps + 1);
// *2 to get less morie pattern in extreme cases, larger values make object appear not grounded in reflections
const float CompareTolerance = abs( RayStepScreen.z ) * Step * 2;
// avoid bugs with early returns inside of loops on certain platform compilers.
float4 Result = float4( 0, 0, 0, 1 );
#if SCALAR_BRANCHLESS
float MinHitTime = 1;
float LastDiff = 0;
float SampleTime = StepOffset * Step + Step;
UNROLL
for( int i = 0; i < NumSteps; i++ )
{
float3 SampleUVz = RayStartUVz + RayStepUVz * SampleTime;
// Use lower res for farther samples
float Level = Roughness * (i * 4.0 / NumSteps);
float SampleDepth = PostprocessInput1.SampleLevel( PostprocessInput1Sampler, SampleUVz.xy, Level ).r;
float DepthDiff = SampleUVz.z - SampleDepth;
bool Hit = abs( DepthDiff + CompareTolerance ) < CompareTolerance;
// Find more accurate hit using line segment intersection
float TimeLerp = saturate( LastDiff / (LastDiff - DepthDiff) );
float IntersectTime = SampleTime + TimeLerp * Step - Step;
float HitTime = Hit ? IntersectTime : 1;
MinHitTime = min( MinHitTime, HitTime );
LastDiff = DepthDiff;
SampleTime += Step;
}
float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime;
#if USE_HZB
HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 );
HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
#endif
Result = float4( HitUVz, MinHitTime );
#elif VECTORIZED_BRANCHLESS
float MinHitTime = 1;
float LastDiff = 0;
float Level = 0;
// Vectorized to group fetches
float4 SampleTime = ( StepOffset + float4( 1, 2, 3, 4 ) ) / (NumSteps + 1);
float4 SampleUV0 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.xxyy;
float4 SampleUV1 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.zzww;
float4 SampleZ = RayStartUVz.zzzz + RayStepUVz.zzzz * SampleTime;
LOOP
for( int i = 0; i < NumSteps; i += 4 )
{
// Use lower res for farther samples
float4 SampleDepth = SampleDepthTexture( PostprocessInput1, PostprocessInput1Sampler, Level, SampleUV0, SampleUV1 );
#if 1
float4 DepthDiff = SampleZ - SampleDepth;
bool4 Hit = abs( DepthDiff + CompareTolerance ) < CompareTolerance;
// If hit set to intersect time. If missed set to 1, beyond end of ray
float4 HitTime = Hit ? SampleTime : 1;
// Take closest hit
HitTime.xy = min( HitTime.xy, HitTime.zw );
MinHitTime = min( MinHitTime, min( HitTime.x, HitTime.y ) );
#else
// Line segment intersection
float4 DepthDiff1 = SampleZ - SampleDepth;
float4 DepthDiff0 = float4( LastDiff, DepthDiff1.xyz );
float4 TimeLerp = saturate( DepthDiff0 / (DepthDiff0 - DepthDiff1) );
float4 IntersectTime = SampleTime + (TimeLerp - 1) / (NumSteps + 1);
bool4 Hit = abs( DepthDiff1 + CompareTolerance ) < CompareTolerance;
// If hit set to intersect time. If missed set to 1, beyond end of ray
float4 HitTime = Hit ? IntersectTime : 1;
// Take closest hit
HitTime.xy = min( HitTime.xy, HitTime.zw );
MinHitTime = min( MinHitTime, min( HitTime.x, HitTime.y ) );
LastDiff = DepthDiff1.w;
#endif
Level += Roughness * (16.0 / NumSteps);
SampleTime += 4.0 / (NumSteps + 1);
SampleUV0 += RayStepUVz.xyxy * 4.0 / (NumSteps + 1);
SampleUV1 += RayStepUVz.xyxy * 4.0 / (NumSteps + 1);
SampleZ += RayStepUVz.zzzz * 4.0 / (NumSteps + 1);
}
float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime;
#if USE_HZB
HitUVz.xy = HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 );
HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
#endif
Result = float4( HitUVz, MinHitTime );
#else // VECTORIZED_EARLY_OUT
float LastDiff = 0;
float Level = 0;
float4 SampleTime = ( StepOffset + float4( 1, 2, 3, 4 ) ) / (NumSteps + 1);
LOOP
for( int i = 0; i < NumSteps; i += 4 )
{
// Vectorized to group fetches
float4 SampleUV0 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.xxyy;
float4 SampleUV1 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.zzww;
float4 SampleZ = RayStartUVz.zzzz + RayStepUVz.zzzz * SampleTime;
// Use lower res for farther samples
float4 SampleDepth = SampleDepthTexture( PostprocessInput1, PostprocessInput1Sampler, Level, SampleUV0, SampleUV1 );
float4 DepthDiff1 = SampleZ - SampleDepth;
bool4 Hit = abs( -DepthDiff1 - CompareTolerance ) < CompareTolerance;
BRANCH if( any( Hit ) )
{
// Find more accurate hit using line segment intersection
float4 DepthDiff0 = float4( LastDiff, DepthDiff1.xyz );
float4 TimeLerp = saturate( DepthDiff0 / (DepthDiff0 - DepthDiff1) );
float4 IntersectTime = SampleTime + (TimeLerp - 1) / (NumSteps + 1);
float4 HitTime = Hit ? IntersectTime : 1;
// Take closest hit
HitTime.xy = min( HitTime.xy, HitTime.zw );
float MinHitTime = min( HitTime.x, HitTime.y );
float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime;
#if USE_HZB
HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 );
HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
#endif
Result = float4( HitUVz, MinHitTime );
break;
}
LastDiff = DepthDiff1.w;
Level += Roughness * (16.0 / NumSteps);
SampleTime += 4.0 / (NumSteps + 1);
}
#endif
return Result;
}
float4 SampleScreenColor( float3 HitUVz, float HitTime )
{
float4 OutColor;
#if PREV_FRAME_COLOR
// Find previous screen position for hit since color buffer is from last frame
// TODO combine to single matrix
float4 HitClip = float4( (HitUVz.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy, HitUVz.z, 1 );
float4 HitTranslatedWorld = mul( HitClip, View.ClipToTranslatedWorld );
HitTranslatedWorld /= HitTranslatedWorld.w;
float3 PrevTranslatedWorld = HitTranslatedWorld.xyz + (View.PrevPreViewTranslation - View.PreViewTranslation);
float4 PrevClip = mul( float4( PrevTranslatedWorld, 1 ), View.PrevTranslatedWorldToClip );
float2 PrevScreen = PrevClip.xy / PrevClip.w;
float2 PrevUV = PrevScreen.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz;
OutColor.rgb = PostprocessInput0.SampleLevel( PostprocessInput0Sampler, PrevUV, 0 ).rgb;
OutColor.a = 1;
// Off screen masking
float2 Vignette = saturate( abs( PrevScreen ) * 5 - 4 );
#else
OutColor.rgb = PostprocessInput0.SampleLevel( PostprocessInput0Sampler, HitUVz.xy, 0 ).rgb;
OutColor.a = 1;
// Off screen masking
float2 HitScreenPos = ( HitUVz.xy - View.ScreenPositionScaleBias.wz ) / View.ScreenPositionScaleBias.xy;
float2 Vignette = saturate( abs( HitScreenPos ) * 5 - 4 );
#endif
//PrevScreen sometimes has NaNs or Infs. DX11 is protected because saturate turns NaNs -> 0.
//Do a SafeSaturate so other platforms get the same protection.
OutColor *= SafeSaturate( 1.0 - dot( Vignette, Vignette ) );
// Transform NaNs to black, transform negative colors to black.
OutColor.rgb = -min(-OutColor.rgb, 0.0);
// Fade end of trace
OutColor *= saturate( 4 - 4 * HitTime );
return OutColor;
}
void ScreenSpaceReflectionsPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
float2 UV = UVAndScreenPos.xy;
float2 ScreenPos = UVAndScreenPos.zw;
int2 PixelPos = int2(UVAndScreenPos.zw * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f);
OutColor = 0;
FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV);
// mask SSR to reduce noise and for better performance, roughness of 0 should have SSR, at MaxRoughness we fade to 0
float RoughnessFade = saturate(ScreenSpaceData.GBuffer.Roughness * SSRParams.y + 2);
#if 1
// Early out
BRANCH if( RoughnessFade == 0 || ScreenSpaceData.GBuffer.ShadingModelID == 0 )
{
return;
}
#endif
#if SSR_QUALITY == 0
// visualize SSR
float PatternMask = ((PixelPos.x / 2 + PixelPos.y / 2) % 2) * 0.7f;
OutColor = lerp(float4(1, 0, 0, 1), float4(1, 1 ,0, 1), PatternMask) * 0.3f;
return;
#endif
float Roughness = ScreenSpaceData.GBuffer.Roughness;
float3 N = ScreenSpaceData.GBuffer.WorldNormal;
float SceneDepth = CalcSceneDepth(UV);
float3 PositionTranslatedWorld = mul( float3( ScreenPos * SceneDepth, SceneDepth ), (float3x3)View.ScreenToTranslatedWorld );
float3 V = View.TranslatedViewOrigin.xyz - PositionTranslatedWorld;
V = normalize( V );
uint FrameRandom;
{
bool bTemporalAAIsOn = View.TemporalAAParams.g > 1;
if(bTemporalAAIsOn)
{
// usually this number is in the 0..7 range but it depends on the TemporalAA quality
FrameRandom = (uint)(View.TemporalAAParams.r * 1551);
}
else
{
// todo: using the FrameNumber can be wrong in editor
// 4 aligns with the temporal smoothing, larger number will do more flickering (power of two for best performance)
const uint RandomizeOverNFrames = 8;
FrameRandom = (View.FrameNumber % RandomizeOverNFrames) * 1551;
}
}
#if SSR_QUALITY == 1
const int NumSteps = 8;
const int NumRays = 1;
#elif SSR_QUALITY == 2
const int NumSteps = 16;
const int NumRays = 1;
#elif SSR_QUALITY == 3
const int NumSteps = 8;
const int NumRays = 4;
#else // SSR_QUALITY == 4
const int NumSteps = 12;
const int NumRays = 12;
#endif
// Sample set dithered over 4x4 pixels
uint Morton = MortonCode( PixelPos.x & 3 ) | ( MortonCode( PixelPos.y & 3 ) * 2 );
uint PixelIndex = ReverseUIntBits( Morton );
if( NumRays > 1 )
{
uint2 Random = ScrambleTEA( uint2( PixelPos ) ^ FrameRandom, 3 );
// Shoot multiple rays
LOOP for( int i = 0; i < NumRays; i++ )
{
// TODO better per ray offset?
uint Offset = ( PixelIndex + ReverseUIntBits( FrameRandom + i * 117 ) ) & 15;
float StepOffset = Offset / 15.0;
StepOffset -= 0.5;
float2 E = Hammersley( i, NumRays, Random );
float3 H = TangentToWorld( ImportanceSampleBlinn( E, Roughness ).xyz, N );
float3 R = 2 * dot( V, H ) * H - V;
float4 HitUVzTime = RayCast( R, Roughness, SceneDepth, PositionTranslatedWorld, NumSteps, StepOffset );
// if there was a hit
BRANCH if( HitUVzTime.w < 1 )
{
float4 SampleColor = SampleScreenColor( HitUVzTime.xyz, HitUVzTime.w );
SampleColor.rgb /= 1 + Luminance(SampleColor.rgb);
OutColor += SampleColor;
}
}
OutColor /= NumRays;
OutColor.rgb /= 1 - Luminance(OutColor.rgb);
}
else
{
uint Offset = ( PixelIndex + ReverseUIntBits( FrameRandom ) ) & 15;
float StepOffset = Offset / 15.0;
StepOffset -= 0.5;
float3 R = reflect( -V, N );
float4 HitUVzTime = RayCast( R, Roughness, SceneDepth, PositionTranslatedWorld, NumSteps, StepOffset );
// if there was a hit
BRANCH if( HitUVzTime.w < 1 )
{
OutColor = SampleScreenColor( HitUVzTime.xyz, HitUVzTime.w );
}
}
OutColor *= RoughnessFade;
OutColor *= SSRParams.r;
}