Files
UnrealEngineUWP/Engine/Shaders/PostProcessCombineLUTs.usf
Martin Mittring 7b0621b84e Merging
//depot/UE4-Orion/Engine/Shaders/...

to //depot/UE4-Orion/Engine/Shaders/...

[CL 2691152 by Martin Mittring in Main branch]
2015-09-14 19:37:33 -04:00

278 lines
8.6 KiB
Plaintext

// Copyright 1998-2015 Epic Games, Inc. All Rights Reserved.
/*=============================================================================
LUTBlender.usf: Filter pixel shader source.
=============================================================================*/
#define USE_COLOR_MATRIX 1
#define USE_SHADOW_TINT 1
#define USE_CONTRAST 1
#include "Common.usf"
#include "PostProcessCommon.usf"
#include "TonemapCommon.usf"
#include "GammaCorrectionCommon.usf"
// ---------------------------------------------------
// Texture0 is the neutral one and is computed in the shader
Texture2D Texture1;
SamplerState Texture1Sampler;
Texture2D Texture2;
SamplerState Texture2Sampler;
Texture2D Texture3;
SamplerState Texture3Sampler;
Texture2D Texture4;
SamplerState Texture4Sampler;
// 0 is for neutral, 1 for Texture1, 2 for ...
float LUTWeights[5];
half3 ColorScale;
half4 OverlayColor;
static const float LUTSize = 32;
// Accurate for 1000K < Temp < 15000K
// [Krystek 1985, "An algorithm to calculate correlated colour temperature"]
float2 PlanckianLocusChromaticity( float Temp )
{
float u = ( 0.860117757f + 1.54118254e-4f * Temp + 1.28641212e-7f * Temp*Temp ) / ( 1.0f + 8.42420235e-4f * Temp + 7.08145163e-7f * Temp*Temp );
float v = ( 0.317398726f + 4.22806245e-5f * Temp + 4.20481691e-8f * Temp*Temp ) / ( 1.0f - 2.89741816e-5f * Temp + 1.61456053e-7f * Temp*Temp );
float x = 3*u / ( 2*u - 8*v + 4 );
float y = 2*v / ( 2*u - 8*v + 4 );
return float2(x,y);
}
// Accurate for 4000K < Temp < 25000K
// in: correlated color temperature
// out: CIE 1931 chromaticity
float2 D_IlluminantChromaticity( float Temp )
{
// Correct for revision of Plank's law
// This makes 6500 == D65
Temp *= 1.4388 / 1.438;
float x = Temp <= 7000 ?
0.244063 + ( 0.09911e3 + ( 2.9678e6 - 4.6070e9 / Temp ) / Temp ) / Temp :
0.237040 + ( 0.24748e3 + ( 1.9018e6 - 2.0064e9 / Temp ) / Temp ) / Temp;
float y = -3 * x*x + 2.87 * x - 0.275;
return float2(x,y);
}
// Find closest color temperature to chromaticity
// [McCamy 1992, "Correlated color temperature as an explicit function of chromaticity coordinates"]
float CorrelatedColorTemperature( float x, float y )
{
float n = (x - 0.3320) / (0.1858 - y);
return -449 * n*n*n + 3525 * n*n - 6823.3 * n + 5520.33;
}
float2 PlanckianIsothermal( float Temp, float Tint )
{
float u = ( 0.860117757f + 1.54118254e-4f * Temp + 1.28641212e-7f * Temp*Temp ) / ( 1.0f + 8.42420235e-4f * Temp + 7.08145163e-7f * Temp*Temp );
float v = ( 0.317398726f + 4.22806245e-5f * Temp + 4.20481691e-8f * Temp*Temp ) / ( 1.0f - 2.89741816e-5f * Temp + 1.61456053e-7f * Temp*Temp );
float ud = ( -1.13758118e9f - 1.91615621e6f * Temp - 1.53177f * Temp*Temp ) / Square( 1.41213984e6f + 1189.62f * Temp + Temp*Temp );
float vd = ( 1.97471536e9f - 705674.0f * Temp - 308.607f * Temp*Temp ) / Square( 6.19363586e6f - 179.456f * Temp + Temp*Temp );
float2 uvd = normalize( float2( u, v ) );
// Correlated color temperature is meaningful within +/- 0.05
u += -uvd.y * Tint * 0.05;
v += uvd.x * Tint * 0.05;
float x = 3*u / ( 2*u - 8*v + 4 );
float y = 2*v / ( 2*u - 8*v + 4 );
return float2(x,y);
}
float WhiteTemp;
float WhiteTint;
float3 WhiteBalance( float3 LinearColor )
{
float2 SrcWhiteDaylight = D_IlluminantChromaticity( WhiteTemp );
float2 SrcWhitePlankian = PlanckianLocusChromaticity( WhiteTemp );
float2 SrcWhite = WhiteTemp < 4000 ? SrcWhitePlankian : SrcWhiteDaylight;
float2 D65White = float2( 0.31270, 0.32900 );
{
// Offset along isotherm
float2 Isothermal = PlanckianIsothermal( WhiteTemp, WhiteTint ) - SrcWhitePlankian;
SrcWhite += Isothermal;
}
float3x3 WhiteBalanceMat = ChromaticAdaptation( SrcWhite, D65White );
WhiteBalanceMat = mul( XYZ_2_sRGB_MAT, mul( WhiteBalanceMat, sRGB_2_XYZ_MAT ) );
return mul( WhiteBalanceMat, LinearColor );
}
float3 ColorSaturation;
float3 ColorContrast;
float3 ColorGamma;
float3 ColorGain;
float3 ColorOffset;
float3 ColorGrade( float3 LinearColor )
{
// ACEScg working space
const float3x3 sRGB_2_AP1 = mul( XYZ_2_AP1_MAT, mul( D65_2_D60_CAT, sRGB_2_XYZ_MAT ) );
const float3x3 AP1_2_sRGB = mul( XYZ_2_sRGB_MAT, mul( D60_2_D65_CAT, AP1_2_XYZ_MAT ) );
float3 WorkingColor = mul( sRGB_2_AP1, LinearColor );
// TODO optimize
float Luma = dot( WorkingColor, AP1_RGB2Y );
WorkingColor = max( 0, lerp( Luma.xxx, WorkingColor, ColorSaturation ) );
WorkingColor = pow( WorkingColor * (1.0 / 0.18), ColorContrast ) * 0.18;
WorkingColor = pow( WorkingColor, 1.0 / ColorGamma );
WorkingColor = WorkingColor * ColorGain + ColorOffset;
#if 0
{
// Split tone
float ShadowWeight = 1 - smoothstep( 0, 0.18, Luma );
float HighlightWeight = smoothstep( 0.5, 1, Luma );
float MidtoneWeight = 1 - ShadowWeight - HighlightWeight;
WorkingColor = ShadowWeight * ShadowColor + MidtoneWeight * GradedColor + HighlightWeight * HighlightColor;
}
#endif
return mul( AP1_2_sRGB, WorkingColor );
}
uint OutputDevice;
// todo: Weight[0] should be used for neutral, Texture* name should start with 1, color correction should apply on top of that
#if USE_VOLUME_LUT == 1
void MainPS(FWriteToSliceGeometryOutput Input, out float4 OutColor : SV_Target0)
{
// construct the neutral color from a 3d position volume texture
float4 Neutral;
{
float2 UV = Input.Vertex.UV - float2(0.5f / LUTSize, 0.5f / LUTSize);
Neutral = float4(UV * LUTSize / (LUTSize - 1), Input.LayerIndex / (LUTSize - 1), 0);
}
#else
void MainPS(noperspective float4 InUV : TEXCOORD0, out float4 OutColor : SV_Target0)
{
// construct the neutral color from a 2d position in 256x16
float4 Neutral;
{
float2 UV = InUV.xy;
// 0.49999f instead of 0.5f to avoid getting into negative values
UV -= float2(0.49999f / (LUTSize * LUTSize), 0.49999f / LUTSize);
float Scale = LUTSize / (LUTSize - 1);
float3 RGB;
RGB.r = frac(UV.x * LUTSize);
RGB.b = UV.x - RGB.r / LUTSize;
RGB.g = UV.y;
Neutral = float4(RGB * Scale, 0);
}
#endif
float3 LogColor = Neutral.rgb;
const float LinearRange = 14;
const float LinearGrey = 0.18;
const float ExposureGrey = 444;
float3 LinearColor = exp2( ( LogColor - ExposureGrey / 1023.0 ) * LinearRange ) * LinearGrey;
//float3 LinearColor = 2 * ( pow(10.0, ((LogColor - 0.616596 - 0.03) / 0.432699)) - 0.037584 ); // SLog
//float3 LinearColor = ( pow( 10, ( 1023 * LogColor - 685 ) / 300) - .0108 ) / (1 - .0108); // Cineon
//LinearColor = max( 0, LinearColor );
float3 BalancedColor = WhiteBalance( LinearColor );
float3 GradedColor = ColorGrade( BalancedColor );
float3 FilmColor = FilmToneMap( GradedColor );
BRANCH
if( ColorShadow_Tint2.a == 0 )
{
// Legacy tone mapper
FilmColor = FilmPostProcess( GradedColor );
}
#if BLENDCOUNT > 1
{
// Legacy LDR LUT color grading
// FIXME LUTs are in sRGB space
half3 GammaColor = LinearToSrgbBranching( saturate( FilmColor ) );
float3 UVW = GammaColor * (15.0 / 16.0) + (0.5f / 16.0);
GammaColor = LUTWeights[0] * GammaColor;
// BLENDCOUNT is the number of LUT that are blended together including the neutral one
#if BLENDCOUNT >= 2
GammaColor += LUTWeights[1] * UnwrappedTexture3DSample( Texture1, Texture1Sampler, UVW, 16 ).rgb;
#endif
#if BLENDCOUNT >= 3
GammaColor += LUTWeights[2] * UnwrappedTexture3DSample( Texture2, Texture2Sampler, UVW, 16 ).rgb;
#endif
#if BLENDCOUNT >= 4
GammaColor += LUTWeights[3] * UnwrappedTexture3DSample( Texture3, Texture3Sampler, UVW, 16 ).rgb;
#endif
#if BLENDCOUNT >= 5
GammaColor += LUTWeights[4] * UnwrappedTexture3DSample( Texture4, Texture4Sampler, UVW, 16 ).rgb;
#endif
// Back to linear space
FilmColor = sRGBToLinear( GammaColor );
}
#endif
// apply math color correction on top to texture based solution
FilmColor = ColorCorrection( FilmColor );
// blend with custom LDR color, used for Fade track in Matinee
FilmColor = lerp( FilmColor * ColorScale, OverlayColor.rgb, OverlayColor.a );
// Apply "gamma" curve adjustment.
FilmColor = pow( FilmColor, InverseGamma.y );
half3 OutDeviceColor = 0;
BRANCH
if( OutputDevice == 0 )
{
// Apply conversion to sRGB (this must be an exact sRGB conversion else darks are bad).
// Branching is faster than branchless on AMD on PC.
OutDeviceColor = LinearToSrgbBranching( FilmColor );
}
else if( OutputDevice == 1 )
{
// Didn't profile yet if the branching version would be faster (different linear segment).
OutDeviceColor = LinearTo709Branchless( FilmColor );
}
else
{
// This is different than the prior "gamma" curve adjustment (but reusing the variable).
// For displays set to a gamma colorspace.
// Note, MacOSX native output is raw gamma 2.2 not sRGB!
OutDeviceColor = pow( FilmColor, InverseGamma.z );
}
// Better to saturate(lerp(a,b,t)) than lerp(saturate(a),saturate(b),t)
OutColor.rgb = OutDeviceColor / 1.05;
}