UnrealEngineUWP/Engine/Shaders/PostProcessTemporalAA.usf

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.

/*=============================================================================
	PostProcessTemporalAA.usf: Temporal AA
=============================================================================*/

#include "Common.usf"
#include "PostProcessCommon.usf"

#ifndef ENABLE_TEMPORAL_AA
#define ENABLE_TEMPORAL_AA	1
#endif

float SampleWeights[9];
float LowpassWeights[9];
float PlusWeights[5];
float2 RandomOffset;

// TODO: This can be removed.
float ClampFeebackMix;

float3 RGBToYCoCg( float3 RGB )
{
	float Y  = dot( RGB, float3(  1, 2,  1 ) );
	float Co = dot( RGB, float3(  2, 0, -2 ) );
	float Cg = dot( RGB, float3( -1, 2, -1 ) );

	float3 YCoCg = float3( Y, Co, Cg );
	return YCoCg;
}

float3 YCoCgToRGB( float3 YCoCg )
{
	float Y  = YCoCg.x * 0.25;
	float Co = YCoCg.y * 0.25;
	float Cg = YCoCg.z * 0.25;

	float R = Y + Co - Cg;
	float G = Y + Cg;
	float B = Y - Co - Cg;

	float3 RGB = float3( R, G, B );
	return RGB;
}

// Faster but less accurate luma computation.
// Luma includes a scaling by 4.
float Luma4(float3 Color)
{
	return (Color.g * 2.0) + (Color.r + Color.b);
}

// Optimized HDR weighting function.
float HdrWeight4(float3 Color, float Exposure)
{
	return rcp(Luma4(Color) * Exposure + 4.0);
}

float HdrWeightY(float Color, float Exposure)
{
	return rcp(Color * Exposure + 1.0);
}

float HdrWeightG(float3 Color, float Exposure)
{
	return rcp(Color.g * Exposure + 1.0);
}

float HdrWeightG_(float Color, float Exposure)
{
	return rcp(Color * Exposure + 1.0);
}


// Optimized HDR weighting function.
float HdrWeight4_(float Color, float Exposure)
{
	return rcp(Color * Exposure + 4.0);
}

// Optimized HDR weighting inverse.
float HdrWeightInv4(float3 Color, float Exposure)
{
	return 4.0 * rcp(Luma4(Color) * (-Exposure) + 1.0);
}

float HdrWeightInvG(float3 Color, float Exposure)
{
	return rcp(Color.g * (-Exposure) + 1.0);
}

float HdrWeightInvY(float Color, float Exposure)
{
	return rcp(Color * (-Exposure) + 1.0);
}

float HdrWeightInv4_(float Color, float Exposure)
{
	return 4.0 * rcp(Color * (-Exposure) + 1.0);
}

float HdrWeightInvG_(float Color, float Exposure)
{
	return rcp(Color * (-Exposure) + 1.0);
}


// This returns exposure normalized linear luma from a PerceptualLuma4().
float LinearLuma4(float Channel, float Exposure)
{
	return Channel * HdrWeightInv4_(Channel, Exposure);
}

// This returns exposure normalized linear luma from a PerceptualLuma4().
float LinearLumaG(float Channel, float Exposure)
{
	return Channel * HdrWeightInvG_(Channel, Exposure);
}


float PerceptualLuma4(float3 Color, float Exposure)
{
	float L = Luma4(Color);
	return L * HdrWeight4_(L, Exposure);
}

float PerceptualLumaG(float3 Color, float Exposure)
{
	return Color.g * HdrWeightG_(Color.g, Exposure);
}


float Luma(float3 Color)
{
	#if 1
		// This seems to work better (less same luma ghost trails).
		// CCIR 601 function for luma.
		return dot(Color, float3(0.299, 0.587, 0.114));
	#else
		// Rec 709 function for luma.
		return dot(Color, float3(0.2126, 0.7152, 0.0722));
	#endif
}

float HighlightCompression(float Channel)
{
	return Channel * rcp(1.0 + Channel);
}

float HighlightDecompression(float Channel)
{
	return Channel * rcp(1.0 - Channel);
}

float PerceptualLuma(float3 Color, float Exposure)
{
	return sqrt(HighlightCompression(Luma(Color) * Exposure));
}

float LinearLuma(float Channel)
{
	// This returns exposure normalized linear luma from a PerceptualLuma().
	return HighlightDecompression(Channel * Channel);
}

// Intersect ray with AABB, knowing there is an intersection.
//   Dir = Ray direction.
//   Org = Start of the ray.
//   Box = Box is at {0,0,0} with this size.
// Returns distance on line segment.
float IntersectAABB(float3 Dir, float3 Org, float3 Box)
{
	#if PS4_PROFILE
	    // This causes flicker, it should only be used on PS4 until proper fix is in.
		if(min(min(abs(Dir.x), abs(Dir.y)), abs(Dir.z)) < (1.0/65536.0)) return 1.0;
	#endif
	float3 RcpDir = rcp(Dir);
	float3 TNeg = (  Box  - Org) * RcpDir;
	float3 TPos = ((-Box) - Org) * RcpDir;
	return max(max(min(TNeg.x, TPos.x), min(TNeg.y, TPos.y)), min(TNeg.z, TPos.z));
}

float HistoryClamp(float3 History, float3 Filtered, float3 NeighborMin, float3 NeighborMax)
{
	float3 Min = min(Filtered, min(NeighborMin, NeighborMax));
	float3 Max = max(Filtered, max(NeighborMin, NeighborMax));
	float3 Avg2 = Max + Min;
	float3 Dir = Filtered - History;
	float3 Org = History - Avg2 * 0.5;
	float3 Scale = Max - Avg2 * 0.5;
	return saturate(IntersectAABB(Dir, Org, Scale));
}

float HdrWeight(float3 Color, float Exposure)
{
	return rcp(max(Luma(Color) * Exposure, 1.0));
}

float4 HdrLerp(float4 ColorA, float4 ColorB, float Blend, float Exposure)
{
	float BlendA = (1.0 - Blend) * HdrWeight(ColorA.rgb, Exposure);
	float BlendB =        Blend  * HdrWeight(ColorB.rgb, Exposure);
	float RcpBlend = rcp(BlendA + BlendB);
	BlendA *= RcpBlend;
	BlendB *= RcpBlend;
	return ColorA * BlendA + ColorB * BlendB;
}

void Bicubic2DCatmullRom( in float2 UV, in float2 Size, out float2 Sample[3], out float2 Weight[3] )
{
	const float2 InvSize = 1.0 / Size;

	UV *= Size;

	float2 tc = floor( UV - 0.5 ) + 0.5;
	float2 f = UV - tc;
	float2 f2 = f * f;
	float2 f3 = f2 * f;

	float2 w0 = f2 - 0.5 * (f3 + f);
	float2 w1 = 1.5 * f3 - 2.5 * f2 + 1;
	float2 w3 = 0.5 * (f3 - f2);
	float2 w2 = 1 - w0 - w1 - w3;

	Weight[0] = w0;
	Weight[1] = w1 + w2;
	Weight[2] = w3;

	Sample[0] = tc - 1;
	Sample[1] = tc + w2 / Weight[1];
	Sample[2] = tc + 2;

	Sample[0] *= InvSize;
	Sample[1] *= InvSize;
	Sample[2] *= InvSize;
}

float RandFast( uint2 PixelPos, float Magic = 3571.0 )
{
	float2 Random = ( 1.0 / 4320.0 ) * PixelPos + float2( 0.25, 0.0 );
	Random = frac( dot( Random * Random, Magic ) );
	Random = frac( dot( Random * Random, Magic ) );
	return Random.x;
}

void SSRTemporalAAPS( float4 UVAndScreenPos : TEXCOORD0, float3 InExposureScaleVignette : TEXCOORD1, out float4 OutColor : SV_Target0 )
{
#if ENABLE_TEMPORAL_AA
	float InExposureScale = InExposureScaleVignette.x;
	#define AA_ALPHA 0
	#define AA_CROSS 0
	#define AA_DYNAMIC 0
	#define AA_LERP 8
	#define AA_NAN 1
	#include "PostProcessTemporalCommon.usf"
#else
	// On broken platforms then at least draw something without AA.
	OutColor = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy, 0);
#endif
}

void DOFTemporalAAPS( float4 UVAndScreenPos : TEXCOORD0, float3 InExposureScaleVignette : TEXCOORD1, out float4 OutColor : SV_Target0 )
{
#if ENABLE_TEMPORAL_AA
	float InExposureScale = InExposureScaleVignette.x;
	#define AA_FILTERED 0
	#define AA_VELOCITY_WEIGHTING 0
	#define AA_ALPHA 0
	#define AA_CROSS 4
	#define AA_DYNAMIC 1
	#define AA_NAN 1
	#define AA_BORDER 1
	#define AA_FORCE_ALPHA_CLAMP 1
	#define AA_GREEN_AS_LUMA 1
	#define AA_YCOCG 1
	#define AA_DOF 1
	#include "PostProcessTemporalCommon.usf"
#else
	OutColor = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy, 0);
#endif
}

void LightShaftTemporalAAPS( float4 UVAndScreenPos : TEXCOORD0, float3 InExposureScaleVignette : TEXCOORD1, out float4 OutColor : SV_Target0 )
{
#if ENABLE_TEMPORAL_AA
	float InExposureScale = InExposureScaleVignette.x;
	#define AA_ALPHA 0
	#define AA_CROSS 0
	#define AA_DYNAMIC 0
	#define AA_LERP 64
	#define AA_NAN 1
	#include "PostProcessTemporalCommon.usf"
#else
	OutColor = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy, 0);
#endif
}

void MainTemporalAAPS(
	in float4 UVAndScreenPos : TEXCOORD0,
	in float3 InExposureScaleVignette : TEXCOORD1,
	// Note: these are needed to match up with the vertex shader outputs which is FPostProcessTonemapVS, otherwise a d3d debug error is issued
	in float4 Unused1 : TEXCOORD2,
	in float4 Unused2 : TEXCOORD3,
	in float4 SvPosition : SV_Position,
	out float4 OutColor : SV_Target0 )
{
#if ENABLE_TEMPORAL_AA
	float InExposureScale = InExposureScaleVignette.x;
	#define AA_FILTERED 1
	#define AA_BORDER 1
	#define AA_ALPHA 0
	#define AA_CROSS 2
	#define AA_GREEN_AS_LUMA 1
	#define AA_AABB 1
	#define AA_LOWPASS 0
	#define AA_DEBUG 0
	#define AA_VELOCITY_WEIGHTING 0
	#define AA_YCOCG 1
	#define AA_BICUBIC 1
	#include "PostProcessTemporalCommon.usf"

	uint2 PixelPos = SvPosition.xy + uint2( View.FrameNumber & 3, (View.FrameNumber / 4) & 3 );
	float Random = RandFast( PixelPos );
	OutColor += (Random - 0.5) * (1.0 / 1024.0) / InExposureScale;
#else
	OutColor = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy, 0);
#endif
}

void MainFastTemporalAAPS(
	in float4 UVAndScreenPos : TEXCOORD0,
	in float3 InExposureScaleVignette : TEXCOORD1,
	in float4 SvPosition : SV_Position,
	out float4 OutColor : SV_Target0 )
{
#if ENABLE_TEMPORAL_AA
	float InExposureScale = InExposureScaleVignette.x;
	#define AA_FILTERED 1
	#define AA_BORDER 1
	#define AA_ALPHA 0
	#define AA_CROSS 2
	#define AA_GREEN_AS_LUMA 0
	#define AA_AABB 1
	#define AA_LOWPASS 0
	#define AA_DEBUG 0
	#define AA_VELOCITY_WEIGHTING 0
	#define AA_YCOCG 1
	#define AA_BICUBIC 0
	#include "PostProcessTemporalCommon.usf"

	uint2 PixelPos = SvPosition.xy + uint2( View.FrameNumber & 3, (View.FrameNumber / 4) & 3 );
	float Random = RandFast( PixelPos );
	OutColor += (Random - 0.5) * (1.0 / 1024.0) / InExposureScale;
#else
	OutColor = PostprocessInput0.SampleLevel(PostprocessInput0Sampler, UVAndScreenPos.xy, 0);
#endif
}