// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved. /*============================================================================= ScreenSpaceReflections.usf: To generate screen space reflections as a postprocess =============================================================================*/ #include "Common.usf" #include "PostProcessCommon.usf" #include "DeferredShadingCommon.usf" #include "Random.usf" #include "BRDF.usf" #include "MonteCarlo.usf" // 0:reference (slower, potentially higher quality) 1:use mips of the HZB depth (better performance) #define USE_HZB 1 #define SCALAR_BRANCHLESS 0 #define VECTORIZED_BRANCHLESS 0 #define VECTORIZED_EARLY_OUT 1 // .r:Intensity in 0..1 range .g:RoughnessMaskMul, b:unused, a:unused float4 SSRParams; uint MortonCode( uint x ) { //x = (x ^ (x << 8)) & 0x00ff00ff; //x = (x ^ (x << 4)) & 0x0f0f0f0f; x = (x ^ (x << 2)) & 0x33333333; x = (x ^ (x << 1)) & 0x55555555; return x; } uint ReverseUIntBits( uint bits ) { //bits = ( bits << 16) | ( bits >> 16); //bits = ( (bits & 0x00ff00ff) << 8 ) | ( (bits & 0xff00ff00) >> 8 ); //bits = ( (bits & 0x0f0f0f0f) << 4 ) | ( (bits & 0xf0f0f0f0) >> 4 ); bits = ( (bits & 0x33333333) << 2 ) | ( (bits & 0xcccccccc) >> 2 ); bits = ( (bits & 0x55555555) << 1 ) | ( (bits & 0xaaaaaaaa) >> 1 ); return bits; } float4 SampleDepthTexture( Texture2D Texture, SamplerState Sampler, float Level, float4 SampleUV0, float4 SampleUV1 ) { float4 SampleDepth; #if USE_HZB SampleDepth.x = Texture2DSampleLevel( Texture, Sampler, SampleUV0.xy, Level ).r; SampleDepth.y = Texture2DSampleLevel( Texture, Sampler, SampleUV0.zw, Level ).r; SampleDepth.z = Texture2DSampleLevel( Texture, Sampler, SampleUV1.xy, Level ).r; SampleDepth.w = Texture2DSampleLevel( Texture, Sampler, SampleUV1.zw, Level ).r; #else SampleDepth.x = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV0.xy, 0 ).r; SampleDepth.y = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV0.zw, 0 ).r; SampleDepth.z = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV1.xy, 0 ).r; SampleDepth.w = Texture2DSampleLevel( SceneDepthTexture, SceneDepthTextureSampler, SampleUV1.zw, 0 ).r; #endif return SampleDepth; } float4 RayCast( float3 R, float Roughness, float SceneDepth, float3 PositionTranslatedWorld, int NumSteps, float StepOffset ) { // TODO provide RayStartUVz // NOTE could clip ray against frustum planes // TODO use screen position and skip matrix mul float4 RayStartClip = mul( float4( PositionTranslatedWorld, 1 ), View.TranslatedWorldToClip ); float4 RayEndClip = mul( float4( PositionTranslatedWorld + R * SceneDepth, 1 ), View.TranslatedWorldToClip ); float3 RayStartScreen = RayStartClip.xyz / RayStartClip.w; float3 RayEndScreen = RayEndClip.xyz / RayEndClip.w; // Normalize 2D length float3 RayStepScreen = ( RayEndScreen - RayStartScreen ) / length( RayEndScreen.xy - RayStartScreen.xy ); RayStepScreen *= 1.5; #if USE_HZB float3 RayStartUVz = float3( RayStartScreen.xy * float2( 0.5, -0.5 ) + 0.5, RayStartScreen.z ); float3 RayStepUVz = float3( RayStepScreen.xy * float2( 0.5, -0.5 ), RayStepScreen.z ); #else float3 RayStartUVz = float3( RayStartScreen.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz, RayStartScreen.z ); float3 RayStepUVz = float3( RayStepScreen.xy * View.ScreenPositionScaleBias.xy, RayStepScreen.z ); #endif const float Step = 1.0 / (NumSteps + 1); // *2 to get less morie pattern in extreme cases, larger values make object appear not grounded in reflections const float CompareTolerance = abs( RayStepScreen.z ) * Step * 2; // avoid bugs with early returns inside of loops on certain platform compilers. float4 Result = float4( 0, 0, 0, 1 ); #if SCALAR_BRANCHLESS float MinHitTime = 1; float LastDiff = 0; float SampleTime = StepOffset * Step + Step; UNROLL for( int i = 0; i < NumSteps; i++ ) { float3 SampleUVz = RayStartUVz + RayStepUVz * SampleTime; // Use lower res for farther samples float Level = Roughness * (i * 4.0 / NumSteps); float SampleDepth = PostprocessInput1.SampleLevel( PostprocessInput1Sampler, SampleUVz.xy, Level ).r; float DepthDiff = SampleUVz.z - SampleDepth; bool Hit = abs( DepthDiff + CompareTolerance ) < CompareTolerance; // Find more accurate hit using line segment intersection float TimeLerp = saturate( LastDiff / (LastDiff - DepthDiff) ); float IntersectTime = SampleTime + TimeLerp * Step - Step; float HitTime = Hit ? IntersectTime : 1; MinHitTime = min( MinHitTime, HitTime ); LastDiff = DepthDiff; SampleTime += Step; } float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime; #if USE_HZB HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 ); HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz; #endif Result = float4( HitUVz, MinHitTime ); #elif VECTORIZED_BRANCHLESS float MinHitTime = 1; float LastDiff = 0; float Level = 0; // Vectorized to group fetches float4 SampleTime = ( StepOffset + float4( 1, 2, 3, 4 ) ) / (NumSteps + 1); float4 SampleUV0 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.xxyy; float4 SampleUV1 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.zzww; float4 SampleZ = RayStartUVz.zzzz + RayStepUVz.zzzz * SampleTime; LOOP for( int i = 0; i < NumSteps; i += 4 ) { // Use lower res for farther samples float4 SampleDepth = SampleDepthTexture( PostprocessInput1, PostprocessInput1Sampler, Level, SampleUV0, SampleUV1 ); #if 1 float4 DepthDiff = SampleZ - SampleDepth; bool4 Hit = abs( DepthDiff + CompareTolerance ) < CompareTolerance; // If hit set to intersect time. If missed set to 1, beyond end of ray float4 HitTime = Hit ? SampleTime : 1; // Take closest hit HitTime.xy = min( HitTime.xy, HitTime.zw ); MinHitTime = min( MinHitTime, min( HitTime.x, HitTime.y ) ); #else // Line segment intersection float4 DepthDiff1 = SampleZ - SampleDepth; float4 DepthDiff0 = float4( LastDiff, DepthDiff1.xyz ); float4 TimeLerp = saturate( DepthDiff0 / (DepthDiff0 - DepthDiff1) ); float4 IntersectTime = SampleTime + (TimeLerp - 1) / (NumSteps + 1); bool4 Hit = abs( DepthDiff1 + CompareTolerance ) < CompareTolerance; // If hit set to intersect time. If missed set to 1, beyond end of ray float4 HitTime = Hit ? IntersectTime : 1; // Take closest hit HitTime.xy = min( HitTime.xy, HitTime.zw ); MinHitTime = min( MinHitTime, min( HitTime.x, HitTime.y ) ); LastDiff = DepthDiff1.w; #endif Level += Roughness * (16.0 / NumSteps); SampleTime += 4.0 / (NumSteps + 1); SampleUV0 += RayStepUVz.xyxy * 4.0 / (NumSteps + 1); SampleUV1 += RayStepUVz.xyxy * 4.0 / (NumSteps + 1); SampleZ += RayStepUVz.zzzz * 4.0 / (NumSteps + 1); } float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime; #if USE_HZB HitUVz.xy = HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 ); HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz; #endif Result = float4( HitUVz, MinHitTime ); #else // VECTORIZED_EARLY_OUT float LastDiff = 0; float Level = 0; float4 SampleTime = ( StepOffset + float4( 1, 2, 3, 4 ) ) / (NumSteps + 1); LOOP for( int i = 0; i < NumSteps; i += 4 ) { // Vectorized to group fetches float4 SampleUV0 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.xxyy; float4 SampleUV1 = RayStartUVz.xyxy + RayStepUVz.xyxy * SampleTime.zzww; float4 SampleZ = RayStartUVz.zzzz + RayStepUVz.zzzz * SampleTime; // Use lower res for farther samples float4 SampleDepth = SampleDepthTexture( PostprocessInput1, PostprocessInput1Sampler, Level, SampleUV0, SampleUV1 ); float4 DepthDiff1 = SampleZ - SampleDepth; bool4 Hit = abs( -DepthDiff1 - CompareTolerance ) < CompareTolerance; BRANCH if( any( Hit ) ) { // Find more accurate hit using line segment intersection float4 DepthDiff0 = float4( LastDiff, DepthDiff1.xyz ); float4 TimeLerp = saturate( DepthDiff0 / (DepthDiff0 - DepthDiff1) ); float4 IntersectTime = SampleTime + (TimeLerp - 1) / (NumSteps + 1); float4 HitTime = Hit ? IntersectTime : 1; // Take closest hit HitTime.xy = min( HitTime.xy, HitTime.zw ); float MinHitTime = min( HitTime.x, HitTime.y ); float3 HitUVz = RayStartUVz + RayStepUVz * MinHitTime; #if USE_HZB HitUVz.xy = HitUVz.xy * float2( 2, -2 ) + float2( -1, 1 ); HitUVz.xy = HitUVz.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz; #endif Result = float4( HitUVz, MinHitTime ); break; } LastDiff = DepthDiff1.w; Level += Roughness * (16.0 / NumSteps); SampleTime += 4.0 / (NumSteps + 1); } #endif return Result; } float4 SampleScreenColor( float3 HitUVz, float HitTime ) { float4 OutColor; #if PREV_FRAME_COLOR // Find previous screen position for hit since color buffer is from last frame // TODO combine to single matrix float4 HitClip = float4( (HitUVz.xy - View.ScreenPositionScaleBias.wz) / View.ScreenPositionScaleBias.xy, HitUVz.z, 1 ); float4 HitTranslatedWorld = mul( HitClip, View.ClipToTranslatedWorld ); HitTranslatedWorld /= HitTranslatedWorld.w; float3 PrevTranslatedWorld = HitTranslatedWorld.xyz + (View.PrevPreViewTranslation - View.PreViewTranslation); float4 PrevClip = mul( float4( PrevTranslatedWorld, 1 ), View.PrevTranslatedWorldToClip ); float2 PrevScreen = PrevClip.xy / PrevClip.w; float2 PrevUV = PrevScreen.xy * View.ScreenPositionScaleBias.xy + View.ScreenPositionScaleBias.wz; OutColor.rgb = PostprocessInput0.SampleLevel( PostprocessInput0Sampler, PrevUV, 0 ).rgb; OutColor.a = 1; // Off screen masking float2 Vignette = saturate( abs( PrevScreen ) * 5 - 4 ); #else OutColor.rgb = PostprocessInput0.SampleLevel( PostprocessInput0Sampler, HitUVz.xy, 0 ).rgb; OutColor.a = 1; // Off screen masking float2 HitScreenPos = ( HitUVz.xy - View.ScreenPositionScaleBias.wz ) / View.ScreenPositionScaleBias.xy; float2 Vignette = saturate( abs( HitScreenPos ) * 5 - 4 ); #endif //PrevScreen sometimes has NaNs or Infs. DX11 is protected because saturate turns NaNs -> 0. //Do a SafeSaturate so other platforms get the same protection. OutColor *= SafeSaturate( 1.0 - dot( Vignette, Vignette ) ); // Transform NaNs to black, transform negative colors to black. OutColor.rgb = -min(-OutColor.rgb, 0.0); // Fade end of trace OutColor *= saturate( 4 - 4 * HitTime ); return OutColor; } void ScreenSpaceReflectionsPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0) { float2 UV = UVAndScreenPos.xy; float2 ScreenPos = UVAndScreenPos.zw; int2 PixelPos = int2(UVAndScreenPos.zw * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f); OutColor = 0; FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV); // mask SSR to reduce noise and for better performance, roughness of 0 should have SSR, at MaxRoughness we fade to 0 float RoughnessFade = saturate(ScreenSpaceData.GBuffer.Roughness * SSRParams.y + 2); #if 1 // Early out BRANCH if( RoughnessFade == 0 || ScreenSpaceData.GBuffer.ShadingModelID == 0 ) { return; } #endif float Roughness = ScreenSpaceData.GBuffer.Roughness; float3 N = ScreenSpaceData.GBuffer.WorldNormal; float SceneDepth = CalcSceneDepth(UV); float3 PositionTranslatedWorld = mul( float3( ScreenPos * SceneDepth, SceneDepth ), (float3x3)View.ScreenToTranslatedWorld ); float3 V = View.TranslatedViewOrigin.xyz - PositionTranslatedWorld; V = normalize( V ); uint FrameRandom; { bool bTemporalAAIsOn = View.TemporalAAParams.g > 1; if(bTemporalAAIsOn) { // usually this number is in the 0..7 range but it depends on the TemporalAA quality FrameRandom = (uint)(View.TemporalAAParams.r * 1551); } else { // todo: using the FrameNumber can be wrong in editor // 4 aligns with the temporal smoothing, larger number will do more flickering (power of two for best performance) const uint RandomizeOverNFrames = 8; FrameRandom = (View.FrameNumber % RandomizeOverNFrames) * 1551; } } #if SSR_QUALITY == 1 const int NumSteps = 8; const int NumRays = 1; #elif SSR_QUALITY == 2 const int NumSteps = 16; const int NumRays = 1; #elif SSR_QUALITY == 3 const int NumSteps = 8; const int NumRays = 4; #else // SSR_QUALITY == 4 const int NumSteps = 12; const int NumRays = 12; #endif // Sample set dithered over 4x4 pixels uint Morton = MortonCode( PixelPos.x & 3 ) | ( MortonCode( PixelPos.y & 3 ) * 2 ); uint PixelIndex = ReverseUIntBits( Morton ); if( NumRays > 1 ) { uint2 Random = ScrambleTEA( uint2( PixelPos ) ^ FrameRandom, 3 ); // Shoot multiple rays LOOP for( int i = 0; i < NumRays; i++ ) { // TODO better per ray offset? uint Offset = ( PixelIndex + ReverseUIntBits( FrameRandom + i * 117 ) ) & 15; float StepOffset = Offset / 15.0; StepOffset -= 0.5; float2 E = Hammersley( i, NumRays, Random ); float3 H = TangentToWorld( ImportanceSampleBlinn( E, Roughness ).xyz, N ); float3 R = 2 * dot( V, H ) * H - V; float4 HitUVzTime = RayCast( R, Roughness, SceneDepth, PositionTranslatedWorld, NumSteps, StepOffset ); // if there was a hit BRANCH if( HitUVzTime.w < 1 ) { float4 SampleColor = SampleScreenColor( HitUVzTime.xyz, HitUVzTime.w ); SampleColor.rgb /= 1 + Luminance(SampleColor.rgb); OutColor += SampleColor; } } OutColor /= NumRays; OutColor.rgb /= 1 - Luminance(OutColor.rgb); } else { uint Offset = ( PixelIndex + ReverseUIntBits( FrameRandom ) ) & 15; float StepOffset = Offset / 15.0; StepOffset -= 0.5; float3 R = reflect( -V, N ); float4 HitUVzTime = RayCast( R, Roughness, SceneDepth, PositionTranslatedWorld, NumSteps, StepOffset ); // if there was a hit BRANCH if( HitUVzTime.w < 1 ) { OutColor = SampleScreenColor( HitUVzTime.xyz, HitUVzTime.w ); } } OutColor *= RoughnessFade; OutColor *= SSRParams.r; }