Files
UnrealEngineUWP/Engine/Shaders/PostProcessTonemap.usf
Matt Kuhlenschmidt a8bb5a623a Fixed gamma not being accurate in Slate. We now use the same gamma correction scheme as in the tone mapper which uses the more accurate srgb standard. (e.g Slate UI should look the same as in photoshop assuming it was imported correctly).
- Consolidated gamma correction functions into their own shader file so they can be shared.

[CL 2473803 by Matt Kuhlenschmidt in Main branch]
2015-03-10 11:05:16 -04:00

536 lines
18 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostProcessTonemap.usf: PostProcessing tone mapping
=============================================================================*/
#include "Common.usf"
#include "PostProcessCommon.usf"
#include "TonemapCommon.usf"
#include "GammaCorrectionCommon.usf"
//
// 64x64 tiling noise texture, optimized to have mostly high frequency content
Texture2D NoiseTexture;
SamplerState NoiseTextureSampler;
// xyz:SceneColorTint.rgb, w:unused
float4 ColorScale0;
// xyz:Bloom1Tint.rgb, w:unused
float4 ColorScale1;
// to scale UV (with border) to NormaizedUV (without border)
float2 TexScale;
// from the postprocess settings
float4 VignetteColorIntensity;
// Fine film grain
float3 GrainRandomFull;
float3 GrainScaleBiasJitter;
float4 OverlayColor;
half GrainFromUV(float2 GrainUV)
{
half Grain = frac(sin(GrainUV.x + GrainUV.y * 543.31) * 493013.0);
return Grain;
}
// 256x16 color LUT for color grading
#if USE_VOLUME_LUT == 1
Texture3D ColorGradingLUT;
SamplerState ColorGradingLUTSampler;
#endif
// @param InLDRColor in gamma space, has to be in 0..1 range
half3 ColorLookupTable(half3 InLDRColor)
{
#if USE_VOLUME_LUT == 1
return Texture3DSample(ColorGradingLUT, ColorGradingLUTSampler, InLDRColor * (15.0f / 16.0f) + (0.5f / 16.0f)).xyz;
#else
// requires a volume texture 16x16x16 unwrapped in a 2d texture 256x16
// can be optimized by using a volume texture
float2 Offset = float2(0.5f / 256.0f, 0.5f / 16.0f);
float Scale = 15.0f / 16.0f;
// Also consider blur value in the blur buffer written by translucency
float IntB = floor(InLDRColor.b * 14.9999f) / 16.0f;
half FracB = InLDRColor.b * 15.0f - IntB * 16.0f;
float U = IntB + InLDRColor.r * Scale / 16.0f;
float V = InLDRColor.g * Scale;
half3 RG0 = Texture2DSample(PostprocessInput3, PostprocessInput3Sampler, Offset + float2(U , V)).rgb;
half3 RG1 = Texture2DSample(PostprocessInput3, PostprocessInput3Sampler, Offset + float2(U + 1.0f / 16.0f, V)).rgb;
return lerp(RG0, RG1, FracB);
#endif
}
// xy should be a integer position (e.g. pixel position on the screen)
// use the PseudoRandom() function if you have ALU performance left
// and this if you have TEX performance left.
float PseudoRandomTex(float2 xy)
{
return Texture2DSample(NoiseTexture, NoiseTextureSampler, xy / 64.0f).r;
}
float SwitchVerticalAxis;
// can be optimized
float2 ScreenPosToUV(float2 ScreenPos)
{
return (ScreenPos * ScreenPosToPixel.xy + ScreenPosToPixel.zw) * PostprocessInput0Size.zw;
}
float2 UVToScreenPos(float2 UV)
{
return (UV * PostprocessInput0Size.xy - ScreenPosToPixel.zw) / ScreenPosToPixel.xy;
}
float4 FringeUVParams;
// vertex shader entry point
void MainVS(
in float4 InPosition : ATTRIBUTE0,
in float2 InTexCoord : ATTRIBUTE1,
out float4 OutTexCoord : TEXCOORD0,
out float3 OutExposureScaleVignette : TEXCOORD1,
out float4 OutGrainUV : TEXCOORD2,
out float4 OutFringe : TEXCOORD3,
out float4 OutPosition : SV_POSITION
)
{
DrawRectangle(InPosition, InTexCoord, OutPosition, OutTexCoord.xy);
OutTexCoord = float4(OutTexCoord.xy, OutPosition.xy);
#if FEATURE_LEVEL >= FEATURE_LEVEL_SM5
// texture can be GWhiteTexture which is 1x1. It's important we don't read outside bounds.
OutExposureScaleVignette.x = EyeAdaptation.Load(int3(0, 0, 0)).r;
#else
// Eye adaptation is not yet supported
OutExposureScaleVignette.x = 1.0f;
#endif
// Scale vignette to always be a circle with consistent corner intensity.
OutExposureScaleVignette.yz = VignetteSpace(OutTexCoord.zw);
// Grain
OutGrainUV.xy = OutTexCoord.xy + PostprocessInput0Size.zw * float2(-0.5,0.5);
OutGrainUV.zw = OutTexCoord.xy + GrainRandomFull.xy;
// Fringe
float2 ScreenPos = UVToScreenPos(OutTexCoord.xy);
OutFringe.xy = ScreenPosToUV(ScreenPos * FringeUVParams.r);
OutFringe.zw = ScreenPosToUV(ScreenPos * FringeUVParams.g);
}
// Function graphing
float F0( float x )
{
return x*saturate( (x - 0.5)/2 );
}
float F1( float x )
{
return x;
}
float F2( float x )
{
return x;
}
float F3( float x )
{
return x;
}
float LineShade( float fx, float y, float dydx, float LineWidth )
{
return 1 - smoothstep( 0.5 * LineWidth, LineWidth, abs( fx - y ) / sqrt( 1 + Square( dydx ) ) );
}
float3 Graph( float2 ScreenSpacePos )
{
float2 WindowMin = float2( 0, 0 );
float2 WindowMax = float2( 1, 1 );
float2 p = ( (ScreenSpacePos + 1) * 0.5 - WindowMin ) * ( WindowMax - WindowMin );
float LineWidth = dot( WindowMax - WindowMin, 0.0005 );
float3 Color;
Color = float3( 1, 0, 0 ) * LineShade( F0(p.x), p.y, ( F0(p.x + LineWidth) - F0(p.x - LineWidth) ) / (2 * LineWidth), LineWidth );
Color += float3( 0, 1, 0 ) * LineShade( F1(p.x), p.y, ( F1(p.x + LineWidth) - F1(p.x - LineWidth) ) / (2 * LineWidth), LineWidth );
//Color += float3( 1, 1, 0 ) * LineShade( F2(Graph.x), Graph.y, ( F2(p.x + LineWidth) - F2(p.x - LineWidth) ) / (2 * LineWidth), LineWidth );
//Color += float3( 0, 1, 1 ) * LineShade( F3(Graph.x), Graph.y, ( F3(p.x + LineWidth) - F3(p.x - LineWidth) ) / (2 * LineWidth), LineWidth );
return Color;
}
// pixel shader entry point
void MainPS(
in float4 UVAndScreenPos : TEXCOORD0,
in float3 InExposureScaleVignette : TEXCOORD1,
in float4 GrainUV : TEXCOORD2,
in float4 FringeUV : TEXCOORD3,
out float4 OutColor : SV_Target0
)
{
OutColor = 0;
float2 UV = UVAndScreenPos.xy;
float2 ScreenSpacePos = UVAndScreenPos.zw;
#if USE_GRAIN_JITTER || USE_GRAIN_INTENSITY || USE_GRAIN_QUANTIZATION
half Grain = GrainFromUV(GrainUV.zw);
#endif
float2 SceneUV = UV.xy;
#if USE_GRAIN_JITTER
SceneUV = lerp(UV.xy, GrainUV.xy, (1.0 - Grain*Grain) * GrainScaleBiasJitter.z);
#endif
#if USE_COLOR_FRINGE
float2 SceneUVJitter = float2(0.0, 0.0);
#if USE_GRAIN_JITTER
SceneUVJitter = SceneUV.xy - UV.xy;
#endif
half3 SceneColor = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, FringeUV.zw + SceneUVJitter.xy).rgb;
half SceneColorG = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, FringeUV.xy + SceneUVJitter.xy).g;
half SceneColorB = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, SceneUV).b;
SceneColor.g = SceneColorG;
SceneColor.b = SceneColorB;
#else
half3 SceneColor = Texture2DSample(PostprocessInput0, PostprocessInput0Sampler, SceneUV).rgb;
#endif
#if USE_GAMMA_ONLY
OutColor.rgb = pow(SceneColor, InverseGamma.x);
#else
half3 LinearColor = SceneColor * ColorScale0.rgb;
#if USE_BLOOM
float4 CombinedBloom = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, UV);
float3 BloomDirtMaskColor = Texture2DSample(BloomDirtMask.Mask, BloomDirtMask.MaskSampler, ScreenSpacePos * float2(0.5, -0.5f) + 0.5f).rgb * BloomDirtMask.Tint.rgb;
LinearColor += CombinedBloom.rgb * (ColorScale1.rgb + BloomDirtMaskColor);
#endif
float ExposureScale = InExposureScaleVignette.x;
#if NO_EYEADAPTATION_EXPOSURE_FIX
ExposureScale = BloomDirtMask.Tint.w;
#endif
LinearColor *= ExposureScale;
#if USE_VIGNETTE
#if USE_VIGNETTE_COLOR
LinearColor.rgb = lerp(VignetteColorIntensity.rgb, LinearColor.rgb, ComputeVignetteMask(InExposureScaleVignette.yz, VignetteColorIntensity.a));
#else
LinearColor.rgb *= ComputeVignetteMask(InExposureScaleVignette.yz, VignetteColorIntensity.a);
#endif
#endif
#if USE_GRAIN_INTENSITY
// Needs to go before tonemapping.
half GrainMul = Grain * GrainScaleBiasJitter.x + GrainScaleBiasJitter.y;
LinearColor.rgb *= GrainMul;
#endif
#if USE_PHOTO
half3 FilmColor = FilmPostProcessPhoto(LinearColor.rgb);
#else
half3 FilmColor = FilmPostProcess(LinearColor.rgb);
#endif
// Apply "gamma" curve adjustment.
FilmColor.rgb = pow(FilmColor.rgb, InverseGamma.y);
#if MAC
// Note, MacOSX native output is raw gamma 2.2 not sRGB!
half3 TonemappedColor = pow(FilmColor.rgb, 1.0/2.2);
#else
#if USE_709
// Didn't profile yet if the branching version would be faster (different linear segment).
half3 TonemappedColor = LinearTo709Branchless(FilmColor);
#else
#if USE_GAMMA
// This is different than the prior "gamma" curve adjustment (but reusing the variable).
// For displays set to a gamma colorspace.
half3 TonemappedColor = pow(FilmColor.rgb, InverseGamma.z);
#else
// Apply conversion to sRGB (this must be an exact sRGB conversion else darks are bad).
// Branching is faster than branchless on AMD on PC.
half3 TonemappedColor = LinearToSrgbBranching(FilmColor);
#endif
#endif
#endif
//TonemappedColor = rsqrt( ( 0.8 + 0.8 * LinearColor ) / LinearColor ); // 1 vmad, 3 rcp, 1 vmul, 3 rsqrt
//TonemappedColor = sqrt( 1 - exp( -LinearColor ) ); // 1 vmul, 3 exp2, 1 vadd, 3 sqrt
//TonemappedColor = ( LinearColor * (3.85 * LinearColor + 0.128) ) / ( LinearColor * (3.74 * LinearColor + 1.14) + 0.02 ); // 3 rcp, 3 vmads, 2 vmuls
//TonemappedColor = pow( TonemappedColor, 2.0 / 2.2 );
//TonemappedColor = Graph( ScreenSpacePos );
#if USE_PHOTO
// Blend with custom LDR color, used for Fade track in Matinee.
// This is the 101% wrong way to do this,
// - It adds an extra redundant lerp.
// - It is not going to work with the future-forward ES3 fast path of sRGB output.
// - And it does super ugly non-linear blending.
// The right way is to adjust exposure instead.
TonemappedColor = lerp(TonemappedColor.rgb, OverlayColor.rgb, OverlayColor.a);
#endif
half LuminanceForPostProcessAA = dot(TonemappedColor, half3(0.299f, 0.587f, 0.114f));
#if USE_GRAIN_QUANTIZATION
// Needs to go after tonemapping.
half GrainQuantization = 1.0/256.0;
half GrainAdd = (Grain * GrainQuantization) + (-0.5 * GrainQuantization);
TonemappedColor.rgb += GrainAdd;
#endif
#if USE_COLOR_GRADING
// apply color grading
TonemappedColor = ColorLookupTable(TonemappedColor);
#endif
// RETURN_COLOR not needed unless writing to SceneColor
OutColor = float4(TonemappedColor, LuminanceForPostProcessAA);
#endif
}
// ES2 version
// TODO: Convert float to half.
// vertex shader entry point
void MainVS_ES2(
in float4 InPosition : ATTRIBUTE0,
in float2 InTexCoord : ATTRIBUTE1,
out float4 OutTexCoord : TEXCOORD0,
out float4 OutFineDofGrain : TEXCOORD1,
out float4 OutFullViewUV : TEXCOORD2,
out float2 OutVignette : TEXCOORD3,
out float2 OutTexCoords[4] : TEXCOORD4,
out float4 OutPosition : SV_POSITION
)
{
DrawRectangle(InPosition, InTexCoord, OutPosition, OutTexCoord.xy);
OutTexCoord = float4(OutTexCoord.xy, OutPosition.xy);
// Avoiding a permutation.
if(GrainRandomFull.z > 0.0)
{
// Framebuffer fetch hardware uses the standard possibly non-full rectangle.
// Other hardware gets a full texture source.
OutTexCoord.xy = OutPosition.xy * float2(0.5,-0.5) + 0.5;
}
#if ES2_PROFILE && COMPILER_GLSL_ES2
// This is currently the last pass, so flip the texture on V to match D3D
OutTexCoord.y = 1.0 - OutTexCoord.y;
#endif
// Fine adjustment is inside the possible non-full viewport in the full resolution texture.
OutFineDofGrain.xy = OutTexCoord.xy + PostprocessInput0Size.zw * float2(-0.5,0.5);
// Want grain and a second UV based on the knowledge that the source texture has a full viewport.
OutFullViewUV.xy = OutPosition.xy * float2(0.5,-0.5) + 0.5;
#if ES2_PROFILE && COMPILER_GLSL_ES2
// This is currently the last pass, so flip the texture on V to match D3D
OutFullViewUV.y = 1.0 - OutFullViewUV.y;
#endif
// For DOF attempt to undo sampling bias for the first transition region.
// This is better for the fine transition, breaks down for the larger bokeh.
// This is the best compromise for mobile using 4 bilinear taps only.
OutFullViewUV.zw = OutFullViewUV.xy + PostprocessInput2Size.zw * float2(0.25,-0.5);
OutFineDofGrain.zw = OutFullViewUV.xy + GrainRandomFull.xy;
// NEWS
OutTexCoords[0] = OutTexCoord.xy + PostprocessInput0Size.zw * float2( 0,-1);
OutTexCoords[1] = OutTexCoord.xy + PostprocessInput0Size.zw * float2( 1, 0);
OutTexCoords[2] = OutTexCoord.xy + PostprocessInput0Size.zw * float2(-1, 0);
OutTexCoords[3] = OutTexCoord.xy + PostprocessInput0Size.zw * float2( 0, 1);
// Scale vignette to always be a circle with consistent corner intensity.
OutVignette.xy = VignetteSpace(InPosition.xy);
}
// Constants for DOF blend in.
half CocMaxRadiusInPixelsRcp()
{
half2 MaxOffset = half2(-2.125,-0.50)*2.0;
return rcp(sqrt(dot(MaxOffset, MaxOffset)));
}
half2 CocBlendScaleBias()
{
half2 Start = 0.25 * CocMaxRadiusInPixelsRcp();
half2 End = 1.0 * CocMaxRadiusInPixelsRcp();
half2 ScaleBias;
ScaleBias.x = 1.0/(End-Start);
ScaleBias.y = (-Start)*ScaleBias.x;
return ScaleBias;
}
half2 CocBlendScaleBiasFine()
{
half2 Start = 0.0 * CocMaxRadiusInPixelsRcp();
half2 End = 0.5 * CocMaxRadiusInPixelsRcp();
half2 ScaleBias;
ScaleBias.x = 1.0/(End-Start);
ScaleBias.y = (-Start)*ScaleBias.x;
return ScaleBias;
}
void MainPS_ES2(
in float4 UVAndScreenPos : TEXCOORD0,
in float4 FineDofGrain : TEXCOORD1,
in float4 FullViewUV : TEXCOORD2,
in float2 InVignette : TEXCOORD3,
in float2 InTexCoords[4] : TEXCOORD4,
#if USE_HDR_MOSAIC
in float4 SvPosition : SV_Position,
#endif
out half4 OutColor : SV_Target0
)
{
float2 UV = UVAndScreenPos.xy;
float2 ScreenSpacePos = UVAndScreenPos.zw;
half4 SceneColor = PostprocessInput0.Sample(PostprocessInput0Sampler, UV);
#if USE_GAMMA_ONLY
#if USE_MSAA
SceneColor.rgb *= rcp(SceneColor.r*(-0.299) + SceneColor.g*(-0.587) + SceneColor.b*(-0.114) + 1.0);
#endif
OutColor.rgb = sqrt(SceneColor.rgb);
#else
#if USE_GRAIN_JITTER || USE_GRAIN_INTENSITY || USE_GRAIN_QUANTIZATION
half Grain = GrainFromUV(FineDofGrain.zw);
#endif
#if USE_DOF
half4 DofFine = PostprocessInput0.Sample(PostprocessInput0Sampler, FineDofGrain.xy);
half4 Dof = PostprocessInput2.Sample(PostprocessInput2Sampler, FullViewUV.zw);
// Convert alpha back into circle of confusion.
SceneColor.a = max(Dof.a, abs(SceneColor.a * 2.0 - 1.0));
// Convert circle of confusion into blend factors.
half2 ScaleBias = CocBlendScaleBias(); // Constant.
half DofAmount = saturate(SceneColor.a * ScaleBias.x + ScaleBias.y);
half2 ScaleBias2 = CocBlendScaleBiasFine(); // Constant.
half DofAmountFine = saturate(SceneColor.a * ScaleBias2.x + ScaleBias2.y);
#if USE_GRAIN_JITTER
// Grain can increase fine DOF.
DofAmountFine = max((1.0-Grain*Grain) * GrainScaleBiasJitter.z, DofAmountFine);
#endif
// Blend in fine DOF.
SceneColor.rgb = lerp(SceneColor.rgb, DofFine.rgb, DofAmountFine);
// Blend in coarse DOF.
SceneColor.rgb = lerp(SceneColor.rgb, Dof.rgb, DofAmount);
#else
// Set so temporal AA shader knows everything is in focus.
SceneColor.a = 0.0;
#if USE_GRAIN_JITTER
#if USE_HDR_MOSAIC
// This has grain jitter off.
#else
// Do jitter for grain.
half4 DofFine = PostprocessInput0.Sample(PostprocessInput0Sampler, FineDofGrain.xy);
// Grain jitter.
SceneColor.rgb = lerp(SceneColor.rgb, DofFine.rgb, (1.0-Grain*Grain) * GrainScaleBiasJitter.z);
#endif
#endif
#endif
#if USE_MSAA
// Do after jitter for grain as an optimization.
SceneColor.rgb *= rcp(SceneColor.r*(-0.299) + SceneColor.g*(-0.587) + SceneColor.b*(-0.114) + 1.0);
#endif
#if USE_HDR_MOSAIC
// TODO: Support odd frame inversion of mosaic pattern?
#if 0
half3 SceneColorE = PostprocessInput0.Sample(PostprocessInput0Sampler, InTexCoords[1].xy).rgb;
SceneColor.rgb = HdrDemosaic(SceneColor.rgb, SceneColorE, SvPosition.xy);
#endif
#if 1
// Higher quality path.
half3 SceneColorN = PostprocessInput0.Sample(PostprocessInput0Sampler, InTexCoords[0].xy).rgb;
half3 SceneColorE = PostprocessInput0.Sample(PostprocessInput0Sampler, InTexCoords[1].xy).rgb;
half3 SceneColorW = PostprocessInput0.Sample(PostprocessInput0Sampler, InTexCoords[2].xy).rgb;
half3 SceneColorS = PostprocessInput0.Sample(PostprocessInput0Sampler, InTexCoords[3].xy).rgb;
half3 SceneColorV = SceneColorN * 0.5 + SceneColorS * 0.5;
half3 SceneColorH = SceneColorW * 0.5 + SceneColorE * 0.5;
if(abs(SceneColorN.g - SceneColorS.g) < abs(SceneColorW.g - SceneColorE.g))
{
SceneColorH = SceneColorV;
}
SceneColor.rgb = HdrDemosaic(SceneColor.rgb, SceneColorH, SvPosition.xy);
#endif
#endif
// Match PC naming.
half3 LinearColor = SceneColor.rgb;
#if USE_HDR_MOSAIC
#if USE_VIGNETTE
#if USE_VIGNETTE_COLOR
LinearColor.rgb = lerp(VignetteColorIntensity.rgb, LinearColor.rgb, ComputeVignetteMask(InVignette.xy, VignetteColorIntensity.a));
#else
LinearColor.rgb *= ComputeVignetteMask(InVignette.xy, VignetteColorIntensity.a);
#endif
#endif
#endif
// It is faster to do vignette as a texture lookup + mad because this is an ALU bound shader.
#if (!USE_HDR_MOSAIC) && (USE_BLOOM || USE_LIGHT_SHAFTS || USE_VIGNETTE || USE_VIGNETTE_COLOR)
half4 CombinedBloomSunVignette = Texture2DSample(PostprocessInput1, PostprocessInput1Sampler, FullViewUV.xy);
LinearColor.rgb = LinearColor.rgb * CombinedBloomSunVignette.a + CombinedBloomSunVignette.rgb;
#endif
#if USE_GRAIN_INTENSITY
// Needs to go before tonemapping.
half GrainMul = Grain * GrainScaleBiasJitter.x + GrainScaleBiasJitter.y;
LinearColor.rgb *= GrainMul;
#endif
half3 FilmColor = FilmPostProcess(LinearColor.rgb);
#if IOS
// Note, iOS native output is raw gamma 2.2 not sRGB!
half3 TonemappedColor = pow(FilmColor.rgb, 1.0/2.2);
#else
// Apply conversion to sRGB (this must be an exact sRGB conversion else darks are bad).
half3 TonemappedColor = LinearToSrgbBranchless(FilmColor);
#endif
// Blend with custom LDR color, used for Fade track in Matinee.
// This is the 101% wrong way to do this,
// - It adds an extra redundant lerp.
// - It is not going to work with the future-forward ES3 fast path of sRGB output.
// - And it does super ugly non-linear blending.
// The right way is to adjust exposure instead.
TonemappedColor = lerp(TonemappedColor.rgb, OverlayColor.rgb, OverlayColor.a);
#if USE_GRAIN_QUANTIZATION
// Needs to go after tonemapping.
half GrainQuantization = 1.0/256.0;
half GrainAdd = (Grain * GrainQuantization) + (-0.5 * GrainQuantization);
TonemappedColor.rgb += GrainAdd;
#endif
OutColor = half4(TonemappedColor, SceneColor.a);
#endif
}