Files
UnrealEngineUWP/Engine/Shaders/PostProcessDOF.usf
Timothy Lottes bc9026b581 Add the requested extra "fog" depth based blur to CircleDOF.
[CL 2452108 by Timothy Lottes in Main branch]
2015-02-19 15:23:17 -05:00

742 lines
27 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostProcessDOF.usf: PostProcessing Depth of Field
=============================================================================*/
#include "Common.usf"
#include "PostProcessCommon.usf"
#include "DeferredShadingCommon.usf" // FGBufferData
#include "DepthOfFieldCommon.usf"
// todo move to central place
float ComputeDOFNearFocalMask(float SceneDepth)
{
float NearFocalPlane = View.DepthOfFieldFocalDistance;
return saturate((NearFocalPlane - SceneDepth) / View.DepthOfFieldNearTransitionRegion);
}
// todo move to central place
float ComputeDOFFarFocalMask(float SceneDepth)
{
float FarFocalPlane = View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion;
return saturate((SceneDepth - FarFocalPlane) / View.DepthOfFieldFarTransitionRegion);
}
// .x:far, .y:near
float2 ComputeDOFFocalMask(float SceneDepth, float SkyWithoutHorizonMask)
{
float2 Ret = float2(ComputeDOFFarFocalMask(SceneDepth), ComputeDOFNearFocalMask(SceneDepth));
float SkyFocusDistance = DepthOfFieldParams[0].x;
// The skybox should not be faded out, expect in the horizon, this can be optimized
if(SceneDepth > SkyFocusDistance)
{
Ret.x = lerp(Ret.x, 0, SkyWithoutHorizonMask);
}
return Ret;
}
// pixel shader entry point
void SetupPS(
float4 UVAndScreenPos : TEXCOORD0
, out float4 OutColor0 : SV_Target0
#if ENABLE_NEAR_BLUR
, out float4 OutColor1 : SV_Target1
#endif
)
{
float2 UV = UVAndScreenPos.xy;
float2 Offset = 0.5f * PostprocessInput0Size.zw;
float MaskDistance = View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion * 0.5f;
float4 DepthQuad = GatherSceneDepth(UV, PostprocessInput1Size.zw);
#if ENABLE_NEAR_BLUR == 0
// We aren't writing out to the second render target, so we'll just make a dummy value here which
// doesn't end up going anywhere. Then, the source code can stay neat and tidy, while the compiler can still
// strip it out
float4 OutColor1;
#endif
OutColor0 = 0;
OutColor1 = 0;
float2 Mask;
float4 Sample;
// for each sample of the full res input image
// we compute the mask (front of back layer)
// and put into MRT0 or MRT1
// screen position in [-1, 1] screen space
float2 ScreenSpacePos = UVAndScreenPos.zw;
// can be optimized, needed to not blur the skybox
float3 ScreenVector = normalize(mul(float4(ScreenSpacePos, 1, 0), View.ScreenToWorld).xyz);
float SkyWithoutHorizonMask = saturate(ScreenVector.z * 3.0f);
Mask = ComputeDOFFocalMask(DepthQuad.x, SkyWithoutHorizonMask);
Sample = float4(Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, UV + Offset * float2(-1, 1), 0).rgb, 1);
OutColor0 += Sample * Mask.x;
OutColor1 += Sample * Mask.y;
Mask = ComputeDOFFocalMask(DepthQuad.y, SkyWithoutHorizonMask);
Sample = float4(Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV + Offset * float2(1, 1)).rgb, 1);
OutColor0 += Sample * Mask.x;
OutColor1 += Sample * Mask.y;
Mask = ComputeDOFFocalMask(DepthQuad.z, SkyWithoutHorizonMask);
Sample = float4(Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV + Offset * float2(1, -1)).rgb, 1);
OutColor0 += Sample * Mask.x;
OutColor1 += Sample * Mask.y;
Mask = ComputeDOFFocalMask(DepthQuad.w, SkyWithoutHorizonMask);
Sample = float4(Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV + Offset * float2(-1, -1)).rgb, 1);
OutColor0 += Sample * Mask.x;
OutColor1 += Sample * Mask.y;
// we average 4 samples
OutColor0 /= 4;
OutColor1 /= 4;
// OutColor0.rgb *= float3(1,0,0);
// OutColor1.rgb *= float3(0,1,0);
}
float4 DepthOfFieldUVLimit;
// pixel shader to combine the full res scene and the blurred images behind and in front of the the focal plane
void MainRecombinePS(
in float4 UVAndScreenPos : TEXCOORD0,
out float4 OutColor : SV_Target0
)
{
// SceneColor in full res
float2 PixelPosCenter = UVAndScreenPos.zw * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f;
float2 FullResUV = PixelPosCenter * PostprocessInput0Size.zw;
// DOF in half res
// float2 ViewportUV = FullResUV * float2(1, DepthOfFieldParams[1].z);// - 0.5 * PostprocessInput1Size.zw;
// float2 ViewportUV = (PixelPos * 0.5f + 0.5f) * PostprocessInput1Size.zw;
float2 ViewportUV = UVAndScreenPos.xy;
// Clamp UV to avoid pulling bad data.
ViewportUV.x = clamp(ViewportUV.x, DepthOfFieldUVLimit.x, DepthOfFieldUVLimit.z);
ViewportUV.y = clamp(ViewportUV.y, DepthOfFieldUVLimit.y, DepthOfFieldUVLimit.w);
float4 SceneColorAndDepth = float4(Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, FullResUV).rgb, CalcSceneDepth(FullResUV));
float3 UnfocusedSceneColor = SceneColorAndDepth.rgb;
// behind focal plane
float4 DOFAccumLayer1 = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, ViewportUV);
#if ENABLE_NEAR_BLUR
float4 DOFAccumLayer3 = Texture2DSample(PostprocessInput2, PostprocessInput2Sampler, ViewportUV);
#else
// I'm presuming all that matters here is the W==0 bit to mask out this value
// TODO: Should check that compiler is doing a good job of removing the usages of this
// from the rest of the code. It has no reason not to be able to do so...
float4 DOFAccumLayer3 = float4(0,0,0,0);
#endif
float Layer1Mask = DOFAccumLayer1.a;
float Layer2Mask = 1.0f - ComputeDOFFarFocalMask(SceneColorAndDepth.a);
// float Layer2Mask = 1.0f - DOFAccumLayer1.a;
float Layer3Mask = DOFAccumLayer3.a;
float PerPixelNearMask = ComputeDOFNearFocalMask(SceneColorAndDepth.a);
// 3 layers
float Div0Bias = 0.0001f;
// RGB color, A how much the full resolution showes through
float3 LayerMerger = 0;
// Layer 1: half res background
LayerMerger = (UnfocusedSceneColor * Div0Bias + DOFAccumLayer1.rgb) / (DOFAccumLayer1.a + Div0Bias);
// Needed to cope with the skybox not being blurred, the tweak value
// avoids having a discontinuity between blurry far objects and the skybox
// and is choosen to not produce too much blobby looking out of focus rendering.
float Blend = DOFAccumLayer1.a;
// Magic function to transform alpha into smooth blend function against in-focus skybox.
Blend = sqrt(Blend);
Blend = sqrt(Blend);
Blend = Blend * Blend * (3.0 - 2.0 * Blend);
LayerMerger = lerp(UnfocusedSceneColor, LayerMerger, Blend);
// Layer 2: then we add the focused scene to fill the empty areas
float Smash = 0.25;
Layer2Mask = saturate((Layer2Mask - (1.0 - Smash)) * rcp(Smash));
Layer2Mask *= Layer2Mask;
// LayerMerger = lerp(LayerMerger, SceneColorAndDepth.rgb, Layer2Mask * (1 - PerPixelNearMask));
LayerMerger = lerp(LayerMerger, SceneColorAndDepth.rgb, Layer2Mask);
float3 FrontLayer = (UnfocusedSceneColor * Div0Bias + DOFAccumLayer3.rgb) / (DOFAccumLayer3.a + Div0Bias);
// Layer 3: on top of that blend the front half res layer
LayerMerger = lerp(LayerMerger, FrontLayer, saturate(Layer3Mask * 5));
OutColor.rgb = LayerMerger;
OutColor.a = 0;
}
//
// PROTOTYPE CIRCLE DOF : WORK IN PROGRESS
//
// {radius, depth blur amp, depth blur radius}
float3 CircleDofParams;
// CIRCLE DOF: Compute circle of confusion size in pixels.
float DepthToCoc(float Depth)
{
float Focus = View.DepthOfFieldFocalDistance;
float Radius = CircleDofParams.x;
float CocRadius = ((Depth - Focus) / Depth) * Radius;
float DepthBlurRadius = (1.0 - exp2(-Depth * CircleDofParams.y)) * CircleDofParams.z;
float ReturnCoc = max(abs(CocRadius), DepthBlurRadius);
if(CocRadius < 0.0)
{
ReturnCoc = -ReturnCoc;
}
return ReturnCoc;
}
// pixel shader entry point
void CircleSetupPS(float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor0 : SV_Target0)
{
float2 UV = UVAndScreenPos.xy;
float4 DepthQuad = GatherSceneDepth(UV, PostprocessInput1Size.zw);
UV = UVAndScreenPos.xy - 0.5*PostprocessInput0Size.zw;
float4 CW = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0);
float4 CZ = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,0));
float4 CX = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(0,1));
float4 CY = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,1));
float4 CocQuad = float4(DepthToCoc(DepthQuad.x), DepthToCoc(DepthQuad.y), DepthToCoc(DepthQuad.z), DepthToCoc(DepthQuad.w));
// Doing a max depth reduction (erode the foreground). Less correct, but less artifacts.
// Perhaps need to re-open this in the future.
#if 0
// Stuff max radius in alpha.
OutColor0.a = max(max(CocQuad.x,CocQuad.y),max(CocQuad.z,CocQuad.w));
#else
// This in theory is better but causes bleeding artifacts with temporal AA..
// This is important otherwise near thin objects disappear (leaves clamping artifacts in recombined pass).
OutColor0.a = CocQuad.x;
if(abs(OutColor0.a) > CocQuad.y) OutColor0.a = CocQuad.y;
if(abs(OutColor0.a) > CocQuad.z) OutColor0.a = CocQuad.z;
if(abs(OutColor0.a) > CocQuad.w) OutColor0.a = CocQuad.w;
#endif
// Remove samples which are outside the size.
// TODO: Tune the ScaleFactor.
float ScaleFactor = 64.0;
float4 W = float4(
1.0-saturate(abs(OutColor0.a - CocQuad.x) * ScaleFactor),
1.0-saturate(abs(OutColor0.a - CocQuad.y) * ScaleFactor),
1.0-saturate(abs(OutColor0.a - CocQuad.z) * ScaleFactor),
1.0-saturate(abs(OutColor0.a - CocQuad.w) * ScaleFactor));
OutColor0.rgb = (1.0/(W.x+W.y+W.z+W.w)) * (CX.rgb*W.x + CY.rgb*W.y + CZ.rgb*W.z + CW.rgb*W.w);
}
// {0 to 1} output.
float NoizNorm(float2 N, float X)
{
N+=X;
return frac(sin(dot(N.xy,float2(12.9898, 78.233)))*43758.5453);
}
// {-1 to 1} output.
float NoizSnorm(float2 N, float X)
{
return NoizNorm(N,X)*2.0-1.0;
}
float2 RotVec(float Radius, float Radians)
{
return Radius * float2(cos(Radians), sin(Radians));
}
float2 RandomOffset;
float Min4(float4 A)
{
return min(min(A.x,A.y),min(A.z,A.w));
}
float Min16(float4 A, float4 B, float4 C, float4 D)
{
return min(min(Min4(A),Min4(B)),min(Min4(C),Min4(D)));
}
// This does a 2x2:1 reduction with a 4x4:1 dilation.
void CircleDilatePS(float4 UVAndScreenPos : TEXCOORD0, out float OutColor : SV_Target0)
{
// Sampling pattern (each gather4)
// d g
// j M (M={0,0} point)
#if COMPILER_GLSL || COMPILER_GLSL_ES2
float2 UV = UVAndScreenPos.xy + 0.5*PostprocessInput0Size.zw;
// This leverages nearest sampling (bilinear won't work).
// Probably not the best way to do this.
float4 Sd, Sg, Sj, Sm;
Sd.x = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-2,-2)).a;
Sd.y = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-1,-2)).a;
Sd.z = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-2,-1)).a;
Sd.w = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-1,-1)).a;
Sg.x = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(0,-2)).a;
Sg.y = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,-2)).a;
Sg.z = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(0,-1)).a;
Sg.w = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,-1)).a;
Sj.x = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-2,0)).a;
Sj.y = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-1,0)).a;
Sj.z = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-2,1)).a;
Sj.w = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(-1,1)).a;
Sm.x = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(0,0)).a;
Sm.y = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,0)).a;
Sm.z = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(0,1)).a;
Sm.w = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV.xy, 0, int2(1,1)).a;
#else
float2 UV = UVAndScreenPos.xy + 1.0*PostprocessInput0Size.zw;
float4 Sd = PostprocessInput0.GatherAlpha(PostprocessInput0Sampler, UV, int2(-2,-2));
float4 Sg = PostprocessInput0.GatherAlpha(PostprocessInput0Sampler, UV, int2(0,-2));
float4 Sj = PostprocessInput0.GatherAlpha(PostprocessInput0Sampler, UV, int2(-2,0));
float4 Sm = PostprocessInput0.GatherAlpha(PostprocessInput0Sampler, UV, int2(0,0));
#endif // COMPILER_GLSL
// Make sure near is only near blur.
OutColor = min(0.0, Min16(Sd, Sg, Sj, Sm));
}
// pixel shader entry point
void CirclePS(float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor0 : SV_Target0)
{
float2 UV = UVAndScreenPos.xy;
//
// Pass 0
// Dilate near minimum CoC (near CoC is negative values).
//
// Fixed maximum search size (in terms of Circle of Confusion radius).
// Higher than 8 is too noizy for 4 samples.
float Coc = 8.0;
// Get base semi-random direction and dither along radius.
// Reused throughout the rest of the algorithm.
float TwoPi = 2.0 * 3.14159;
float RadianBase = NoizSnorm(UVAndScreenPos.xy, 0.010 * RandomOffset.x) * TwoPi;
float RadiusBase = NoizNorm(UVAndScreenPos.xy, 0.013 * RandomOffset.x);
// Radius
float RadiusBase2 = RadiusBase * (1.0/4.0);
float R1 = sqrt(RadiusBase2 + 3.0/4.0) * Coc;
float R2 = sqrt(RadiusBase2 + 2.0/4.0) * Coc;
float R3 = sqrt(RadiusBase2 + 1.0/4.0) * Coc;
float R4 = sqrt(RadiusBase2 + 0.0/4.0) * Coc;
float2 UV1 = RotVec(R1, RadianBase + TwoPi * 0.0/4.0);
float2 UV2 = RotVec(R2, RadianBase + TwoPi * 2.0/4.0);
float2 UV3 = RotVec(R3, RadianBase + TwoPi * 1.0/4.0);
float2 UV4 = RotVec(R4, RadianBase + TwoPi * 3.0/4.0);
UV1 = UVAndScreenPos.xy + UV1 * PostprocessInput0Size.zw;
UV2 = UVAndScreenPos.xy + UV2 * PostprocessInput0Size.zw;
UV3 = UVAndScreenPos.xy + UV3 * PostprocessInput0Size.zw;
UV4 = UVAndScreenPos.xy + UV4 * PostprocessInput0Size.zw;
float D1 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, UV1, 0).x;
float D2 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, UV2, 0).x;
float D3 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, UV3, 0).x;
float D4 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, UV4, 0).x;
float NearCoc = 65536.0;
float Feather = 2.0;
if(abs(D1)+Feather > R1) NearCoc = min(NearCoc, D1);
if(abs(D2)+Feather > R2) NearCoc = min(NearCoc, D2);
if(abs(D3)+Feather > R3) NearCoc = min(NearCoc, D3);
if(abs(D4)+Feather > R4) NearCoc = min(NearCoc, D4);
//
// Pass 1
//
// Going to grab sets of 4 samples per pass.
// Each set of 4 samples can be a smaller circle of confusion
// (aka can be in-front of the larger background).
// Setup for 12 samples (3 passes of 4 samples).
RadiusBase *= (1.0/11.5);
// Grab circle of confusion for the pixel and pixel color.
OutColor0 = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, UV, 0);
float FarCoc = OutColor0.a;
// Fix in case no near exists.
NearCoc = min(NearCoc, FarCoc);
// Used for sample pattern.
Coc = max(abs(FarCoc),abs(NearCoc));
// Bring out to the smaller radius of sample sets.
// This has the highest chance of seeing a smaller overlapping CoC.
R1 = (RadiusBase+9.0/11.5) * Coc;
R2 = (RadiusBase+3.0/11.5) * Coc;
R3 = (RadiusBase+6.0/11.5) * Coc;
R4 = (RadiusBase+0.0/11.5) * Coc;
// Ensure at least getting different sample than center pixel.
float R1a = max(1.0,R1);
float R2a = max(1.0,R2);
float R3a = max(1.0,R3);
float R4a = max(1.0,R4);
UV1 = RotVec(R1a, RadianBase + TwoPi * 0.0/12.0);
UV2 = RotVec(R2a, RadianBase + TwoPi * 3.0/12.0);
UV3 = RotVec(R3a, RadianBase + TwoPi * 6.0/12.0);
UV4 = RotVec(R4a, RadianBase + TwoPi * 9.0/12.0);
UV1 = UVAndScreenPos.xy + UV1 * PostprocessInput0Size.zw;
UV2 = UVAndScreenPos.xy + UV2 * PostprocessInput0Size.zw;
UV3 = UVAndScreenPos.xy + UV3 * PostprocessInput0Size.zw;
UV4 = UVAndScreenPos.xy + UV4 * PostprocessInput0Size.zw;
float4 C1 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV1, 0);
float4 C2 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV2, 0);
float4 C3 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV3, 0);
float4 C4 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV4, 0);
// Base weight works around the max(1.0,radius) constraint.
// Base weight also shapes to weight higher on the outside radius.
float W0 = 1.0 - saturate(Coc);
float W1 = R1;
float W2 = R2;
float W3 = R3;
float W4 = R4;
// Intersection weight: 0=sample does not intersect pixel, to 1=sample intersects.
// TODO: Tune feather factors.
float IFeather0 = 1.0/4.0;
float I1 = saturate((abs(C1.a) - R1) * IFeather0);
float I2 = saturate((abs(C2.a) - R2) * IFeather0);
float I3 = saturate((abs(C3.a) - R3) * IFeather0);
float I4 = saturate((abs(C4.a) - R4) * IFeather0);
// Check if have a more near intersecting Coc for next pass.
float FarCoc2 = FarCoc;
if(I1*W1 > 0.0) FarCoc2 = min(FarCoc2, C1.a);
if(I2*W2 > 0.0) FarCoc2 = min(FarCoc2, C2.a);
if(I3*W3 > 0.0) FarCoc2 = min(FarCoc2, C3.a);
if(I4*W4 > 0.0) FarCoc2 = min(FarCoc2, C4.a);
// Fully ignore intersection weight when in nearfield blur
// and sample average CoC is 50% between near and far CoC neighborhood.
float AvgCoc = (FarCoc + C1.a + C2.a + C3.a + C4.a) * (1.0/5.0);
// Get dilated far.
FarCoc = max(FarCoc, max(max(C1.a, C2.a),max(C3.a, C4.a)));
// Controls the transition between states.
float IFeather1 = 1.0;
float IFeather2 = 2.0;
float Ignore = saturate(-NearCoc * IFeather1) * saturate(((AvgCoc - FarCoc) / (NearCoc - FarCoc)) * IFeather2);
W1 *= lerp(I1, 1.0, Ignore);
W2 *= lerp(I2, 1.0, Ignore);
W3 *= lerp(I3, 1.0, Ignore);
W4 *= lerp(I4, 1.0, Ignore);
// Make sure at least something is not zero.
W0 += 1.0/65536.0;
// Start weighted accumulation.
OutColor0.rgb = OutColor0.rgb * W0 + C1.rgb * W1 + C2.rgb * W2 + C3.rgb * W3 + C4.rgb * W4;
float Weight = W0+W1+W2+W3+W4;
// Set current result as possible background.
float3 Background = OutColor0.rgb * (1.0/Weight);
//
// Pass 2
//
// Drop weight of existing pass if Coc changes too much.
float Coc2 = max(abs(FarCoc2),abs(NearCoc));
float Drop = (1.0/65536.0) + 1.0 - saturate(abs(Coc - Coc2));
OutColor0.rgb *= Drop;
Weight *= Drop;
R1 = (RadiusBase+10.0/11.5) * Coc2;
R2 = (RadiusBase+ 4.0/11.5) * Coc2;
R3 = (RadiusBase+ 7.0/11.5) * Coc2;
R4 = (RadiusBase+ 1.0/11.5) * Coc2;
R1a = max(1.0,R1);
R2a = max(1.0,R2);
R3a = max(1.0,R3);
R4a = max(1.0,R4);
UV1 = RotVec(R1a, RadianBase + TwoPi * 8.0/12.0);
UV2 = RotVec(R2a, RadianBase + TwoPi * 11.0/12.0);
UV3 = RotVec(R3a, RadianBase + TwoPi * 2.0/12.0);
UV4 = RotVec(R4a, RadianBase + TwoPi * 5.0/12.0);
UV1 = UVAndScreenPos.xy + UV1 * PostprocessInput0Size.zw;
UV2 = UVAndScreenPos.xy + UV2 * PostprocessInput0Size.zw;
UV3 = UVAndScreenPos.xy + UV3 * PostprocessInput0Size.zw;
UV4 = UVAndScreenPos.xy + UV4 * PostprocessInput0Size.zw;
C1 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV1, 0);
C2 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV2, 0);
C3 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV3, 0);
C4 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV4, 0);
// Lerp to background if outside possibly smaller CoC.
C1.rgb = lerp(C1.rgb, Background.rgb, saturate(abs(C1.a) - Coc2));
C2.rgb = lerp(C2.rgb, Background.rgb, saturate(abs(C2.a) - Coc2));
C3.rgb = lerp(C3.rgb, Background.rgb, saturate(abs(C3.a) - Coc2));
C4.rgb = lerp(C4.rgb, Background.rgb, saturate(abs(C4.a) - Coc2));
W1 = R1;
W2 = R2;
W3 = R3;
W4 = R4;
// Intersection weight: 0=sample does not intersect pixel, to 1=sample intersects.
I1 = saturate((abs(C1.a) - R1) * IFeather0);
I2 = saturate((abs(C2.a) - R2) * IFeather0);
I3 = saturate((abs(C3.a) - R3) * IFeather0);
I4 = saturate((abs(C4.a) - R4) * IFeather0);
// Check if have a more near intersecting Coc for next pass.
float FarCoc3 = FarCoc2;
if(I1*W1 > 0.0) FarCoc3 = min(FarCoc3, C1.a);
if(I2*W2 > 0.0) FarCoc3 = min(FarCoc3, C2.a);
if(I3*W3 > 0.0) FarCoc3 = min(FarCoc3, C3.a);
if(I4*W4 > 0.0) FarCoc3 = min(FarCoc3, C4.a);
W1 *= lerp(I1, 1.0, Ignore);
W2 *= lerp(I2, 1.0, Ignore);
W3 *= lerp(I3, 1.0, Ignore);
W4 *= lerp(I4, 1.0, Ignore);
OutColor0.rgb += C1.rgb * W1 + C2.rgb * W2 + C3.rgb * W3 + C4.rgb * W4;
Weight += W1+W2+W3+W4;
//
// Pass 3
//
// Drop weight of existing pass if Coc changes too much.
float Coc3 = max(abs(FarCoc3),abs(NearCoc));
Drop = (1.0/65536.0) + 1.0 - saturate(abs(Coc2 - Coc3));
OutColor0.rgb *= Drop;
Weight *= Drop;
// Send near most CoC back to recombine pass.
OutColor0.a = min(FarCoc3, NearCoc);
R1 = (RadiusBase+11.0/11.5) * Coc3;
R2 = (RadiusBase+ 5.0/11.5) * Coc3;
R3 = (RadiusBase+ 8.0/11.5) * Coc3;
R4 = (RadiusBase+ 2.0/11.5) * Coc3;
R1a = max(1.0,R1);
R2a = max(1.0,R2);
R3a = max(1.0,R3);
R4a = max(1.0,R4);
UV1 = RotVec(R1a, RadianBase + TwoPi * 4.0/12.0);
UV2 = RotVec(R2a, RadianBase + TwoPi * 7.0/12.0);
UV3 = RotVec(R3a, RadianBase + TwoPi * 10.0/12.0);
UV4 = RotVec(R4a, RadianBase + TwoPi * 1.0/12.0);
UV1 = UVAndScreenPos.xy + UV1 * PostprocessInput0Size.zw;
UV2 = UVAndScreenPos.xy + UV2 * PostprocessInput0Size.zw;
UV3 = UVAndScreenPos.xy + UV3 * PostprocessInput0Size.zw;
UV4 = UVAndScreenPos.xy + UV4 * PostprocessInput0Size.zw;
C1 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV1, 0);
C2 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV2, 0);
C3 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV3, 0);
C4 = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV4, 0);
C1.rgb = lerp(C1.rgb, Background.rgb, saturate(abs(C1.a) - Coc3));
C2.rgb = lerp(C2.rgb, Background.rgb, saturate(abs(C2.a) - Coc3));
C3.rgb = lerp(C3.rgb, Background.rgb, saturate(abs(C3.a) - Coc3));
C4.rgb = lerp(C4.rgb, Background.rgb, saturate(abs(C4.a) - Coc3));
W1 = R1;
W2 = R2;
W3 = R3;
W4 = R4;
I1 = saturate((abs(C1.a) - R1) * IFeather0);
I2 = saturate((abs(C2.a) - R2) * IFeather0);
I3 = saturate((abs(C3.a) - R3) * IFeather0);
I4 = saturate((abs(C4.a) - R4) * IFeather0);
W1 *= lerp(I1, 1.0, Ignore);
W2 *= lerp(I2, 1.0, Ignore);
W3 *= lerp(I3, 1.0, Ignore);
W4 *= lerp(I4, 1.0, Ignore);
OutColor0.rgb += C1.rgb * W1 + C2.rgb * W2 + C3.rgb * W3 + C4.rgb * W4;
Weight += W1+W2+W3+W4;
OutColor0.rgb *= (1.0/Weight);
}
// pixel shader to combine the full res scene and the blurred images behind and in front of the the focal plane
void MainCircleRecombinePS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
// Circle of confusion size for the pixel.
float PixDepth = CalcSceneDepth(UVAndScreenPos.xy);
float PixCoc = DepthToCoc(PixDepth);
// Grab nearest Coc.
PixCoc = min(PixCoc, PostprocessInput1.SampleLevel(PostprocessInput1Sampler, UVAndScreenPos.xy, 0).a);
// Transform into sample pattern.
PixCoc = abs(PixCoc) * 2.0; // 2x because full instead of half resolution.
#if 1
// Fetch 2 samples mirrored around the pixel
// which is stochastically distributed to fill out the circle of confusion.
// TODO: Fix the "random values".
float2 UV = UVAndScreenPos.xy * PostprocessInput0Size.xy;
float RadianBase = NoizNorm(UVAndScreenPos.xy, 0.010 * RandomOffset.x) * 3.14159 * 2.0;
float RadiusJitter = NoizNorm(UVAndScreenPos.xy, 0.013 * RandomOffset.x);
float ICoc = PixCoc*sqrt(RadiusJitter);
float2 VP = RotVec(float2(ICoc, 0.0), RadianBase) * PostprocessInput0Size.zw;
// These two samples will still have jitter induced artifacts (very limited utility).
// These two samples will also have bleeding artifacts.
float4 CA = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy + VP, 0);
float4 CB = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy - VP, 0);
// Weight the two samples to avoid forground into background bleed.
float IFeather0 = 1.0/4.0;
ICoc *= 0.5; // Coc is half res units.
float I1 = saturate((abs(CA.a) - ICoc) * IFeather0);
float I2 = saturate((abs(CB.a) - ICoc) * IFeather0);
I1 += 1.0/65536.0;
I2 += 1.0/65536.0;
OutColor = (CA * I1 + CB * I2) * (1.0/(I1+I2));
#else
// Possibly higher quality option in the future.
// Fetch 4 samples in filled disc pattern
// which is stochastically distributed to fill out the circle of confusion.
float2 UV = UVAndScreenPos.xy * PostprocessInput0Size.xy;
float RadianBase = NoizNorm(UVAndScreenPos.xy, 0.010 * RandomOffset.x) * 3.14159 * 2.0;
float RadiusBase = NoizNorm(UVAndScreenPos.xy, 0.013 * RandomOffset.x);
float RadiusBase2 = RadiusBase * (1.0/4.0);
float R1 = sqrt(RadiusBase2 + 3.0/4.0) * PixCoc;
float R2 = sqrt(RadiusBase2 + 2.0/4.0) * PixCoc;
float R3 = sqrt(RadiusBase2 + 1.0/4.0) * PixCoc;
float R4 = sqrt(RadiusBase2 + 0.0/4.0) * PixCoc;
float TwoPi = 3.14159 * 2.0;
float2 UV1 = RotVec(R1, RadianBase + TwoPi * 0.0/4.0);
float2 UV2 = RotVec(R2, RadianBase + TwoPi * 2.0/4.0);
float2 UV3 = RotVec(R3, RadianBase + TwoPi * 1.0/4.0);
float2 UV4 = RotVec(R4, RadianBase + TwoPi * 3.0/4.0);
UV1 = UVAndScreenPos.xy + UV1 * PostprocessInput0Size.zw;
UV2 = UVAndScreenPos.xy + UV2 * PostprocessInput0Size.zw;
UV3 = UVAndScreenPos.xy + UV3 * PostprocessInput0Size.zw;
UV4 = UVAndScreenPos.xy + UV4 * PostprocessInput0Size.zw;
float4 CA = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV1, 0);
float4 CB = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV2, 0);
float4 CC = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV3, 0);
float4 CD = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UV4, 0);
// Weight the two samples to avoid forground into background bleed.
float IFeather0 = 1.0/4.0;
float ICoc = PixCoc * 0.5; // Coc is half res units.
float I1 = saturate((abs(CA.a) - ICoc) * IFeather0);
float I2 = saturate((abs(CB.a) - ICoc) * IFeather0);
float I3 = saturate((abs(CC.a) - ICoc) * IFeather0);
float I4 = saturate((abs(CD.a) - ICoc) * IFeather0);
// Make sure something is non-zero.
I1 += 1.0/65536.0;
I2 += 1.0/65536.0;
I3 += 1.0/65536.0;
I4 += 1.0/65536.0;
OutColor = (CA * I1 + CB * I2 + CC * I3 + CD * I4) * (1.0/(I1+I2+I3+I4));
#endif
// Grab the half resolution neighborhood to remove the artifacts from the full resolution output.
// Nearest location.
#if 1
// This has higher in-focus contrast, but possibly lower noise reduction later.
float2 HUVBase = UVAndScreenPos.xy * PostprocessInput1Size.xy - 0.5;
float2 HUVFrac = frac(HUVBase);
float2 HUV = (trunc(HUVBase) + 0.5) * PostprocessInput1Size.zw;
#else
// This makes the mostly in-focus transition bad (too blurry).
float2 HUV = UVAndScreenPos.xy - 0.5 * PostprocessInput1Size.zw;
#endif
// Load four nearest samples.
float4 H0 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, HUV, 0);
float4 H1 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, HUV, 0, int2(1,0));
float4 H2 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, HUV, 0, int2(0,1));
float4 H3 = PostprocessInput1.SampleLevel(PostprocessInput1Sampler, HUV, 0, int2(1,1));
// TODO: This would work a lot better in YUV style colorspace?
// Limit the full resolution to remove jitter artifacts.
float4 HMax = max(max(H0,H1),max(H2,H3));
float4 HMin = min(min(H0,H1),min(H2,H3));
#if 1
// Increase constrast of limit a little to workaround to strong denoise at near-in-focus.
float4 HD = HMin / 8.0;
float Small = 1.0 - saturate(PixCoc*PixCoc*(1.0/64.0));
HMax += HD * Small;
HMin -= HD * Small;
#endif
// Blend in the limited version quickly to remove HDR jitter artifacts and noise.
float4 OutLimited = min(max(OutColor,HMin),HMax);
OutColor = lerp(OutColor, OutLimited, saturate(PixCoc*PixCoc*4.0));
}