Files
UnrealEngineUWP/Engine/Shaders/PostProcessBokehDOF.usf
2014-03-14 14:13:41 -04:00

482 lines
16 KiB
Plaintext

// Copyright 1998-2014 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostProcessBokehDOF.usf: PostProcessing Lens Flares.
=============================================================================*/
#include "Common.usf"
#include "PostProcessCommon.usf"
#include "DepthOfFieldCommon.usf"
// for the BokehDOF vertex shader, from postprocessing settings
// .x: color threshold, .y:size threshold, zw: unused
float4 DepthOfFieldThresholds;
// .xy:tilecount, .zw:tilesize
uint4 TileCountAndSize;
// .xy:size in pixels, .zw:LeftTop of the original viewport in rendertaregt scaled coordiantes
float4 KernelSize;
// small Bokeh like texture
Texture2D LensTexture;
SamplerState LensTextureSampler;
// @return x:layer in front of focal plane, y: layer behind focal plane 1-x-y:layer in focus
float2 ComputeLayerContributions(float Depth)
{
float Front = saturate((View.DepthOfFieldFocalDistance - Depth) / View.DepthOfFieldNearTransitionRegion);
float Back = saturate((Depth - View.DepthOfFieldFocalDistance - View.DepthOfFieldFocalRegion) / max(View.DepthOfFieldFarTransitionRegion, 0.0001f));
return float2(Front, Back);
}
// can be optimized
float2 PixelToScreenPos(float2 PixelPos)
{
return (PixelPos - ScreenPosToPixel.zw) / ScreenPosToPixel.xy;
}
#if DOF_METHOD == 0
// vertex shader
void MainVS(
uint VId : SV_VertexID,
uint IId : SV_InstanceID,
out float2 OutTexCoord : TEXCOORD0,
out nointerpolation float4 OutColor : TEXCOORD1,
out float4 OutPosition : SV_POSITION
)
{
uint2 TileCount = TileCountAndSize.xy;
uint2 TileSize = TileCountAndSize.zw;
// needs to be the same on C++ side (faster on NVIDIA and AMD)
uint QuadsPerInstance = 8;
#if DOF_INDEX_STYLE == 1
// remap the indices to get vertexid to VId and quadid into IId
IId = IId * QuadsPerInstance + (VId / 6);
VId = VId % 6;
// triangle A: 0:left top, 1:right top, 2: left bottom
// triangle B: 3:right bottom, 4:left bottom, 5: right top
float2 LocalPos = float2(VId % 2, VId > 1 && VId < 5);
#else // DOF_INDEX_STYLE == 1
// remap the indices to get vertexid to VId and quadid into IId
IId = IId * QuadsPerInstance + (VId / 4);
VId = VId % 4;
// triangle A: 0:left top, 1:right top, 2: left bottom
// triangle B: 3:right bottom, 4:left bottom, 5: right top
float2 LocalPos = float2(VId % 2, VId / 2);
#endif // DOF_INDEX_STYLE == 1
float2 TilePos = float2(IId % TileCount.x, IId / TileCount.x);
OutPosition = float4(0, 0, 0, 1);
OutTexCoord = LocalPos.xy;
float2 LeftTop = KernelSize.zw;
float2 PixelPos = TilePos * TileSize + LeftTop;
float2 InputTexCoord = PixelPos * PostprocessInput0Size.zw;
float BrightnessAdjustment = 1;
float OpacityAdjustment = 1;
// move sprite by one texel to capture fine details better
float4 SceneColorAndDepth;
{
float4 ColorAndDepths[4];
ColorAndDepths[0] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2(-0.5f, -0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[1] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2( 0.5f, -0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[2] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2(-0.5f, 0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[3] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2( 0.5f, 0.5f) * PostprocessInput0Size.zw, 0);
float4 Lum = float4(dot(ColorAndDepths[0].rgb, 1), dot(ColorAndDepths[1].rgb, 1), dot(ColorAndDepths[2].rgb, 1), dot(ColorAndDepths[3].rgb, 1));
float InvTotalLum = 1.0f / max(0.0001f, dot(Lum, 1));
float2 SubPixel = float2((Lum.y + Lum.w) * InvTotalLum, (Lum.z + Lum.w) * InvTotalLum);
float2 SubPixelOffset = SubPixel - 0.5f;
PixelPos += SubPixelOffset;
InputTexCoord += SubPixelOffset * PostprocessInput0Size.zw;
// adjusting the sample position affects the brightness so we compensate for that
float CorrectBrightness = dot(Lum, 0.25f);
float2 LumFiltered2 = lerp(Lum.xz, Lum.yw, SubPixel.x);
float LumFiltered = lerp(LumFiltered2.x, LumFiltered2.y, SubPixel.y);
BrightnessAdjustment = CorrectBrightness / max(0.001f, LumFiltered);
SceneColorAndDepth = (ColorAndDepths[0] * Lum.x + ColorAndDepths[1] * Lum.y + ColorAndDepths[2] * Lum.z + ColorAndDepths[3] * Lum.w) * InvTotalLum;
}
SceneColorAndDepth.rgb *= BrightnessAdjustment;
float SceneDepth = SceneColorAndDepth.a;
// ---------------------------------------------
// 0..1
float CircleOfConfusion = ComputeCircleOfConfusion(SceneDepth);
float OcclusionTweakFactor2 = 1.0f;
// todo: max(1.0 looks wrong
OutColor = float4(SceneColorAndDepth.rgb, 1) / max(1.0f, pow(CircleOfConfusion, OcclusionTweakFactor2));
OutColor.a *= OpacityAdjustment;
// added bias to get some content even from a very small radius
float2 CoCPixelSize = CircleOfConfusion * KernelSize.xy + 2.0f;
// offset in half res pixels to put two views in one texture with safety border
float YOffset = SceneDepth < View.DepthOfFieldFocalDistance ? DepthOfFieldParams[1].w : 0;
// offset the corners
OutPosition.xy = PixelToScreenPos(PixelPos + (LocalPos - 0.5f) * CoCPixelSize + float2(0, YOffset));
}
#else // DOF_METHOD == 0
// vertex shader
void MainVS(
uint VId : SV_VertexID,
uint IId : SV_InstanceID,
out float2 OutTexCoord : TEXCOORD0,
out nointerpolation float4 OutColor : TEXCOORD1,
out float4 OutPosition : SV_POSITION
)
{
uint2 TileCount = TileCountAndSize.xy;
uint2 TileSize = TileCountAndSize.zw;
// needs to be the same on C++ side (faster on NVIDIA and AMD)
uint QuadsPerInstance = 8;
#if DOF_INDEX_STYLE == 1
// remap the indices to get vertexid to VId and quadid into IId
uint QuadId = VId / 6;
// triangle A: 0:left top, 1:right top, 2: left bottom
// triangle B: 3:right bottom, 4:left bottom, 5: right top
uint VertexId = VId % 6;
uint HalfTileCountX = TileCount.x / 2;
uint QuarterTileCountX = TileCount.x / 4;
float2 LocalPos = float2(VertexId % 2, VertexId > 1 && VertexId < 5);
#else // DOF_INDEX_STYLE == 1
// remap the indices to get vertexid to VId and quadid into IId
uint QuadId = VId / 4;
// triangle A: 0:left top, 1:right top, 2: left bottom
// triangle B: 3:right bottom, 4:left bottom, 5: right top
uint VertexId = VId % 4;
uint HalfTileCountX = TileCount.x / 2;
uint QuarterTileCountX = TileCount.x / 4;
float2 LocalPos = float2(VertexId % 2, VertexId / 2);
#endif // DOF_INDEX_STYLE == 1
uint QuadIdIndex = QuadId%4;
float2 TilePos = float2(IId % QuarterTileCountX, IId / QuarterTileCountX) * uint2(4,2) + uint2(QuadId/4,0)*2;
bool bRenderAll4Quads = false;
OutPosition = float4(0, 0, 0, 1);
OutTexCoord = LocalPos.xy;
float2 LeftTop = KernelSize.zw;
float2 PixelPos = TilePos * TileSize + LeftTop;
// to not get filtered samples
// PixelPos -= 0.5f;
float2 InputTexCoord = PixelPos * PostprocessInput0Size.zw;
float4 ColorAndDepths[4];
ColorAndDepths[0] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2(-0.5f, -0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[1] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2( 0.5f, -0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[2] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2(-0.5f, 0.5f) * PostprocessInput0Size.zw, 0);
ColorAndDepths[3] = Texture2DSampleLevel(PostprocessInput0, PostprocessInput0Sampler, InputTexCoord + float2( 0.5f, 0.5f) * PostprocessInput0Size.zw, 0);
float4 MinColorAndDepths = min(min(ColorAndDepths[0], ColorAndDepths[1]), min(ColorAndDepths[2], ColorAndDepths[3]));
float4 MaxColorAndDepths = max(max(ColorAndDepths[0], ColorAndDepths[1]), max(ColorAndDepths[2], ColorAndDepths[3]));
{
float3 AbsColor = MaxColorAndDepths.rgb - MinColorAndDepths.rgb;
if(dot(AbsColor, 1) > DepthOfFieldThresholds.x)
{
// disable adaptive if the colors are too different
bRenderAll4Quads = true;
}
}
{
bool MinLayer = MinColorAndDepths.a < View.DepthOfFieldFocalDistance;
bool MaxLayer = MaxColorAndDepths.a < View.DepthOfFieldFocalDistance + View.DepthOfFieldFocalRegion;
if(MinLayer != MaxLayer)
{
// disble adaptive if we cross layers
bRenderAll4Quads = true;
}
}
float AvgDepth = (MinColorAndDepths.a + MaxColorAndDepths.a) * 0.5f;
// can be optimized
{
float CircleOfConfusion = ComputeCircleOfConfusion(AvgDepth);
if(CircleOfConfusion < DepthOfFieldThresholds.y)
{
// disable adaptive if the quads are too small
// small quads don't cost much performance and skipping those would alias
bRenderAll4Quads = true;
}
}
// by now bRenderAll4Quads should be set --------------------------------
float4 SceneColorAndDepth;
if(bRenderAll4Quads)
{
// can be optimized?
SceneColorAndDepth = ColorAndDepths[QuadIdIndex];
// offset the 4 quads
PixelPos += float2(QuadId % 2, QuadIdIndex / 2) - 0.5f;
}
else
{
SceneColorAndDepth = (ColorAndDepths[0] + ColorAndDepths[1] + ColorAndDepths[2] + ColorAndDepths[3]) * 0.25f;
}
float SceneDepth = SceneColorAndDepth.a;
// ---------------------------------------------
// 0..1
float CircleOfConfusion = ComputeCircleOfConfusion(SceneDepth);
// added bias to get some content even from a very small radius
float2 CoCPixelSize = CircleOfConfusion * KernelSize.xy + 2.0f;
float OcclusionTweakFactor2 = 1.0f;
// todo: max(1.0 looks wrong
OutColor = float4(SceneColorAndDepth.rgb, 1) / max(1.0f, pow(CircleOfConfusion, OcclusionTweakFactor2));
float2 Layer = ComputeLayerContributions(SceneDepth);
OutColor *= (SceneDepth < View.DepthOfFieldFocalDistance) ? Layer.r : Layer.g;
if(!bRenderAll4Quads)
{
// we should reject 3 quads (making them a point)
if(QuadIdIndex == 0)
{
// offset it to the center
// PixelPos += 1.0f;
}
else
{
// reject 3 quads
CoCPixelSize = 0;
}
#if DOF_METHOD == 2
// VisualizeAdaptiveDOF: green when is rendered fast (1 quad instead of 4 quads)
OutColor.rgb *= dot(OutColor.rgb, 1.0f/3.0f) * float3(0, 1, 0);
#endif
}
else
{
// it's either one strong quad or 4 faint ones
OutColor *= 0.25f;
#if DOF_METHOD == 2
// VisualizeAdaptiveDOF: red when is rendered slow (all 4 quads)
OutColor.rgb = dot(OutColor.rgb, 1.0f/3.0f) * float3(1, 0, 0);
#endif
}
// offset in half res pixels to put two views in one texture with safety border
float YOffset = SceneDepth < View.DepthOfFieldFocalDistance ? DepthOfFieldParams[1].w : 0;
// offset the corners
OutPosition.xy = PixelToScreenPos(PixelPos + (LocalPos - 0.5f) * CoCPixelSize + float2(0, YOffset));
}
#endif // DOF_METHOD == 0
// @return OutColor
float4 CommonDOFSetup(in float2 CenterUV, out bool bFrontLayer, out float4 Mask )
{
float2 Offset = PostprocessInput0Size.zw;
float2 UV[4];
// no filtering (2x2 kernel) to get no leaking in Depth of Field
UV[0] = CenterUV + Offset * float2(-0.5f, -0.5f);
UV[1] = CenterUV + Offset * float2( 0.5f, -0.5f);
UV[2] = CenterUV + Offset * float2(-0.5f, 0.5f);
UV[3] = CenterUV + Offset * float2( 0.5f, 0.5f);
float4 ColorAndDepth[4];
float2 Layer[4];
FLATTEN for(uint i = 0; i < 4; ++i)
{
// clamping to a small number fixes black dots appearing (denorms?, 0 doesn't fix it)
ColorAndDepth[i].rgb = max(float3(0.0001f, 0.0001f, 0.0001f), Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, UV[i]).rgb);
ColorAndDepth[i].a = CalcSceneDepth(UV[i]);
Layer[i] = ComputeLayerContributions(ColorAndDepth[i].a);
}
float2 LayerSum = Layer[0] + Layer[1] + Layer[2] + Layer[3];
bFrontLayer = LayerSum.x > LayerSum.y;
Mask = bFrontLayer ?
float4(Layer[0].x, Layer[1].x, Layer[2].x, Layer[3].x) :
float4(Layer[0].y, Layer[1].y, Layer[2].y, Layer[3].y);
float SumMask = dot(Mask, 1);
float4 OutColor;
FLATTEN if(SumMask > 0.001f)
{
OutColor = (
ColorAndDepth[0] * Mask.x +
ColorAndDepth[1] * Mask.y +
ColorAndDepth[2] * Mask.z +
ColorAndDepth[3] * Mask.w ) / SumMask;
}
else
{
OutColor = ColorAndDepth[0];
}
return OutColor;
}
// downsample to half res, put depth in alpha
void MainSetupPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
// unused for this pass
bool bFrontLayer;
float4 Mask;
OutColor = CommonDOFSetup(UVAndScreenPos.xy, bFrontLayer, Mask);
// clamp to avoid artifacts from exceeding fp16 through framebuffer blending of multiple very bright lights
OutColor.rgb = min(float3(256 * 256, 256 * 256, 256 * 256), OutColor.rgb);
}
// render in fullres to visualize the half res DOF input from the setup pass
void VisualizeDOFPS(in float4 UVAndScreenPos : TEXCOORD0, out float4 OutColor : SV_Target0)
{
bool bFrontLayer;
float4 Mask;
CommonDOFSetup(UVAndScreenPos.xy, bFrontLayer, Mask);
OutColor = bFrontLayer ?
float4(0, dot(Mask, 1) * 0.8f, 0, 0) :
float4(0, 0, dot(Mask, 1) * 0.8f, 0);
}
// pixel shader to accumulate the Bokeh shaped elements
void MainPS(
float2 TexCoord : TEXCOORD0,
nointerpolation float4 InColor : TEXCOORD1,
out float4 OutColor : SV_Target0
)
{
float4 Kernel = Texture2DSample(LensTexture, LensTextureSampler, TexCoord);
// to make sure the quad is centered around the content
// watch out: the texture can be offset
// Kernel=float4(1,0,0,0.5f);
OutColor = InColor * Kernel;
}
// pixel shader to combine the full res scene and the blurred images behind and in front of the the focal plane
void MainRecombinePS(
in float4 UVAndScreenPos : TEXCOORD0,
out float4 OutColor : SV_Target0
)
{
// SceneColor in full res
float2 PixelPosCenter = UVAndScreenPos.zw * ScreenPosToPixel.xy + ScreenPosToPixel.zw + 0.5f;
float2 FullResUV = PixelPosCenter * PostprocessInput0Size.zw;
float4 SceneColorAndDepth = float4(Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, FullResUV).rgb, CalcSceneDepth(FullResUV));
float3 UnfocusedSceneColor = SceneColorAndDepth.rgb;
OutColor.rgb = UnfocusedSceneColor;
OutColor.a = 0;
#if RECOMBINE_METHOD == 1 || RECOMBINE_METHOD == 3
// BokehDOF in half res
// float2 BokehViewportUV = FullResUV * float2(1, DepthOfFieldParams[1].z);// - 0.5 * PostprocessInput1Size.zw;
// float2 BokehViewportUV = (PixelPos * 0.5f + 0.5f) * PostprocessInput1Size.zw;
float2 BokehViewportUV = UVAndScreenPos.xy;
// behind focal plane
float4 BokehDOFAccumLayer1 = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, BokehViewportUV);
// in front of focal plane
float4 BokehDOFAccumLayer3 = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, BokehViewportUV + float2(0.0f, DepthOfFieldParams[1].y));
float OcclusionTweakFactor1 = DepthOfFieldParams[0].w;
float Layer1Mask = saturate(BokehDOFAccumLayer1.a * OcclusionTweakFactor1);
float Layer2Mask = 1.0f - saturate(ComputeCircleOfConfusion(SceneColorAndDepth.a) * 5); // todo: expose 5 as tweak value
float Layer3Mask = saturate(BokehDOFAccumLayer3.a * OcclusionTweakFactor1);
half FocusedWeight;
// 3 layers
{
// RGB color, A how much the full resolution shows through
float4 LayerMerger = 0;
// Layer 1: half res background
LayerMerger.rgb = lerp(UnfocusedSceneColor, BokehDOFAccumLayer1.rgb / max(BokehDOFAccumLayer1.a, 0.001f), Layer1Mask);
// Layer 2: then we add the focused scene to fill the empty areas
LayerMerger = lerp(LayerMerger, float4(SceneColorAndDepth.rgb, 1), Layer2Mask);
// Layer 3: on top of that blend the front half res layer
LayerMerger = lerp(LayerMerger, float4(BokehDOFAccumLayer3.rgb / max(BokehDOFAccumLayer3.a, 0.001f), 0), Layer3Mask);
UnfocusedSceneColor = LayerMerger.rgb;
// blend in full resolution where we are most in focus
FocusedWeight = LayerMerger.a;
}
OutColor.rgb = lerp(UnfocusedSceneColor, OutColor.rgb, FocusedWeight);
#endif
#if RECOMBINE_METHOD == 2 || RECOMBINE_METHOD == 3
float4 SeparateTranslucency = Texture2DSample(PostprocessInput2, PostprocessInput2Sampler, PixelPosCenter * PostprocessInput2Size.zw);
// add RGB, darken by A (this allows to represent translucent and additive blending)
OutColor.rgb = OutColor.rgb * SeparateTranslucency.a + SeparateTranslucency.rgb;
#endif
}