Files
UnrealEngineUWP/Engine/Shaders/Private/PostProcessSubsurface.usf
Charles deRousiers 0a30a91970 Add Strata wrap-sub-surface lighting support.
This is the initial version for supporting wrap lighting with Strata, allowing to convert legacy subsurface surface into Strata. This initial version is not fully correct (energy perservation is not correct due to non-normalized phase function, which needs to be revisited).

#rb none
#jira none
#fyi sebastien.hillaire
#preflight shaders

[CL 20345473 by Charles deRousiers in ue5-main branch]
2022-05-24 04:52:51 -04:00

1040 lines
35 KiB
Plaintext

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
PostProcessSubsurface.usf: Screenspace subsurface scattering shaders.
=============================================================================*/
#include "Common.ush"
#include "Strata/Strata.ush"
#include "PostProcessCommon.ush"
#include "DeferredShadingCommon.ush"
#include "SubsurfaceProfileCommon.ush"
// for VisualizeSSS
#include "MiniFontCommon.ush"
#include "ScreenPass.ush"
// x:Radius*DistanceToProjectionWindow/KernelSize*0.5, y:DistanceToProjectionWindow, z:OverrideNumSamples, w: MinGenerateMipsTileCount
float4 SubsurfaceParams;
Texture2D SubsurfaceInput0_Texture;
Texture2D SubsurfaceInput1_Texture;
Texture2D SubsurfaceInput2_Texture;
Texture2D SubsurfaceInput3_Texture;
SamplerState SubsurfaceSampler0;
SamplerState SubsurfaceSampler1;
SamplerState SubsurfaceSampler2;
SamplerState SubsurfaceSampler3;
#if SUPPORTS_INDEPENDENT_SAMPLERS
#define SharedSubsurfaceSampler0 SubsurfaceSampler0
#define SharedSubsurfaceSampler1 SubsurfaceSampler0
#define SharedSubsurfaceSampler2 SubsurfaceSampler0
#define SharedSubsurfaceSampler3 SubsurfaceSampler0
#else
#define SharedSubsurfaceSampler0 SubsurfaceSampler0
#define SharedSubsurfaceSampler1 SubsurfaceSampler1
#define SharedSubsurfaceSampler2 SubsurfaceSampler2
#define SharedSubsurfaceSampler3 SubsurfaceSampler3
#endif
SCREEN_PASS_TEXTURE_VIEWPORT(SubsurfaceInput0)
SCREEN_PASS_TEXTURE_VIEWPORT(SubsurfaceInput1)
SCREEN_PASS_TEXTURE_VIEWPORT(Output)
RWTexture2D<uint> ProfileIdTexture;
// the same to FSubsurfaceTiles::ETileType
#define SUBSURFACE_TILE_TYPE_AFIS 0
#define SUBSURFACE_TILE_TYPE_SEPARABLE 1
#define SUBSURFACE_TILE_TYPE_ALL 2
#define SUBSURFACE_TILE_TYPE_COUNT 3
#define SUBSURFACE_PASS_ONE 0
#define SUBSURFACE_PASS_TWO 1
#define SUBSURFACE_DIRECTION_HORIZONTAL SUBSURFACE_PASS_ONE
#define SUBSURFACE_DIRECTION_VERTICAL SUBSURFACE_PASS_TWO
// Controls the quality (number of samples) of the blur kernel.
#define SUBSURFACE_QUALITY_LOW 0
#define SUBSURFACE_QUALITY_MEDIUM 1
#define SUBSURFACE_QUALITY_HIGH 2
// Full resolution recombine.
#define SUBSURFACE_RECOMBINE_MODE_FULLRES 0
// Half resolution recombine.
#define SUBSURFACE_RECOMBINE_MODE_HALFRES 1
// Just reconstruct the lighting (needed for scalability).
#define SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING 2
// Controls the quality of lighting reconstruction.
#define SUBSURFACE_RECOMBINE_QUALITY_LOW 0
#define SUBSURFACE_RECOMBINE_QUALITY_HIGH 1
#ifndef SUBSURFACE_RECOMBINE_QUALITY
#define SUBSURFACE_RECOMBINE_QUALITY SUBSURFACE_RECOMBINE_QUALITY_LOW
#endif
#if SUBSURFACE_RECOMBINE_QUALITY == SUBSURFACE_RECOMBINE_QUALITY_HALFRES
#define SUBSURFACE_HALFRES 1
#endif
//=============================================================================
// setup for "SeparableSSS.ush"
//=============================================================================
#if SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_LOW
#define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL2_SIZE
#define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL2_OFFSET
#elif SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_MEDIUM
#define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL1_SIZE
#define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL1_OFFSET
#else // SUBSURFACE_QUALITY == SUBSURFACE_QUALITY_HIGH
#define SSSS_N_KERNELWEIGHTCOUNT SSSS_KERNEL0_SIZE
#define SSSS_N_KERNELWEIGHTOFFSET SSSS_KERNEL0_OFFSET
#endif
// 0: faster
// 1: no color bleeding in z direction
#define SSSS_FOLLOW_SURFACE 1
float GetMaskFromDepthInAlpha(float Alpha)
{
return Alpha > 0;
}
#if STRATA_ENABLED
FStrataSubsurfaceData LoadStrataSSSData(float2 UV)
{
const float2 PixelPos = UV.xy * View.BufferSizeAndInvSize.xy;
return StrataLoadSubsurfaceData(Strata.SSSTexture, PixelPos);
}
FStrataSubsurfaceHeader LoadStrataSSSHeader(float2 UV)
{
const float2 PixelPos = UV.xy * View.BufferSizeAndInvSize.xy;
return StrataLoadSubsurfaceHeader(Strata.SSSTexture, PixelPos);
}
#endif
// can be optimized
float GetSubsurfaceStrength(float2 UV)
{
#if STRATA_ENABLED
const FStrataSubsurfaceHeader SSSHeader = LoadStrataSSSHeader(UV);
const float Mask = StrataSubSurfaceHeaderGetUseDiffusion(SSSHeader) ? 1.0f : 0.0f;
const float RadiusScale = StrataSubSurfaceHeaderGetProfileRadiusScale(SSSHeader);
#else
FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV);
const float Mask = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID) ? 1.0f : 0.0f;
const float RadiusScale = ScreenSpaceData.GBuffer.CustomData.a;
#endif
return Mask * RadiusScale;
}
// @return .rgb is the weight for color channel, .a is the sample location
float4 GetSubsurfaceProfileKernel(uint SampleIndex, uint SubsurfaceProfileInt)
{
#if STRATA_ENABLED
if (SubsurfaceProfileInt == SSS_PROFILE_ID_PERPIXEL) return float4(1, 1, 1, 1);
#endif
const float4 TableMax = float4(1, 1, 1, SUBSURFACE_KERNEL_SIZE);
return View.SSProfilesTexture.Load(int3(SampleIndex, SubsurfaceProfileInt, 0)) * TableMax;
}
float3 GetSubsurfaceProfileNormal(float2 BufferUV)
{
BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax);
FGBufferData LocalGBufferData = GetGBufferData(BufferUV);
return LocalGBufferData.WorldNormal;
}
uint GetSubsurfaceProfileId(float2 BufferUV)
{
BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax);
#if STRATA_ENABLED
const FStrataSubsurfaceHeader SSSHeader = LoadStrataSSSHeader(BufferUV);
return StrataSubSurfaceHeaderGetProfileId(SSSHeader);
#else
FGBufferData LocalGBufferData = GetGBufferData(BufferUV);
return ExtractSubsurfaceProfileInt(LocalGBufferData);
#endif
}
float3 GetSubsurfaceProfileBoundaryColorBleed(uint SubsurfaceProfileInt)
{
#if STRATA_ENABLED
if (SubsurfaceProfileInt == SSS_PROFILE_ID_PERPIXEL) return float3(1.f, 1.f, 1.f);
#endif
return GetSubsurfaceProfileKernel(SSSS_BOUNDARY_COLOR_BLEED_OFFSET, SubsurfaceProfileInt).rgb;
}
float4 GetSceneColor(float2 BufferUV)
{
BufferUV = clamp(BufferUV, SubsurfaceInput0_UVViewportBilinearMin, SubsurfaceInput0_UVViewportBilinearMax);
return Texture2DSampleLevel(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV, 0);
}
#include "BurleyNormalizedSSSCommon.ush"
#if STRATA_ENABLED
// Calculate the Subsurface profile kernel on the fly using integration instead of LUT.
half4 GetSubsurfaceProfileKernel(int SampleIndex, float3 D, float ScatterRadiusInMM, float WorldUnitScale)
{
// See notes in SubsurfaceProfile.cpp. Scatter radius is in mm instead of cm here.
ScatterRadiusInMM = max(ScatterRadiusInMM,1.0f);
const float Range = ((2 * SSSS_N_KERNELWEIGHTCOUNT - 1) > 20) ? 3.0f : 2.0f;
const float r = Range * float(SampleIndex) / (SSSS_N_KERNELWEIGHTCOUNT - 1);
const float r0 = Range * (float(SampleIndex) - 0.5f) / (SSSS_N_KERNELWEIGHTCOUNT - 1);
const float r1 = Range * (float(SampleIndex) + 0.5f) / (SSSS_N_KERNELWEIGHTCOUNT - 1);
const float x = r * r / (Range );
const float x0 = r0 * r0 / (Range );
const float x1 = r1 * r1 / (Range );
const float3 MaxCDF = GetCDF3D(D, ScatterRadiusInMM * Range * (1.0f + 0.5f / (SSSS_N_KERNELWEIGHTCOUNT - 1)));
half4 Kernel = (half4)0.0f;
BRANCH
if (SampleIndex == 0)
{
Kernel.rgb = GetCDF3D(D, ScatterRadiusInMM * x1) / MaxCDF;
Kernel.a = 0.0f;
}
else
{
// Need to multiply by 0.5f, and modify MaxCDF to match as close as offline LUT. Here favors integration
// instead of discrete approximation.
Kernel.rgb = (GetCDF3D(D, ScatterRadiusInMM * x1) - GetCDF3D(D, ScatterRadiusInMM * x0)) / MaxCDF;
Kernel.a = 2.0 * x; // 2.0f is consistent to the scale in ComputeMirroredBSSSKernel of BurleyNormalizedSSS.cpp
}
// Scale to match offline encoding
// const float TableMaxA = SUBSURFACE_KERNEL_SIZE;
// const FLinearColor TableColorScale = FLinearColor(
// 1.0f / TableMaxRGB,
// 1.0f / TableMaxRGB,
// 1.0f / TableMaxRGB,
// 1.0f / TableMaxA);
// C *= TableColorScale
// C.W *= Data.ScatterRadius / SUBSURFACE_RADIUS_SCALE;
// SUBSURFACE_KERNEL_SIZE and TableMaxA cancels out
Kernel.a *= (ScatterRadiusInMM * BURLEY_MM_2_CM) * WorldUnitScale /** SUBSURFACE_KERNEL_SIZE*/ /(/*TableMaxA **/ SUBSURFACE_RADIUS_SCALE);
return Kernel;
}
#endif
// fetch surface albedo and diffuse mean free path
// The diffuse mean free path is modulated by world scale and a custom term passed in
// which is the opacity texture.
#if STRATA_ENABLED
FBurleyParameter GetBurleyParameters(FStrataSubsurfaceData SSSData)
{
FBurleyParameter Out = (FBurleyParameter)0;
if (StrataSubSurfaceHeaderGetIsProfile(SSSData.Header))
{
uint ProfileId = StrataSubSurfaceHeaderGetProfileId(SSSData.Header);
float ProfileRadiusScale = StrataSubSurfaceHeaderGetProfileRadiusScale(SSSData.Header);
Out.SurfaceAlbedo = GetSubsurfaceProfileSurfaceAlbedo(ProfileId);
Out.DiffuseMeanFreePath = DecodeDiffuseMeanFreePath(GetSubsurfaceProfileDiffuseMeanFreePath(ProfileId));
Out.WorldUnitScale = DecodeWorldUnitScale(GetSubsurfaceProfileWorldUnitScale(ProfileId)) * BURLEY_CM_2_MM;
Out.SurfaceOpacity = ProfileRadiusScale;
Out.DiffuseMeanFreePath *= Out.SurfaceOpacity;
}
else if (StrataSubSurfaceHeaderGetUseDiffusion(SSSData.Header))
{
float3 SSSMFP = StrataSubSurfaceHeaderGetMFP(SSSData.Header);
float3 SSSBaseColor = StrataSubsurfaceExtrasGetBaseColor(SSSData.Extras);
// Decode the MFP stored as Float111110 in a uint over a RGBA8 unorm
const float3 MFP = SSSMFP * BURLEY_CM_2_MM; // Stored DMFP is in cm, converted into mm
const float MaxMFP = max3(MFP.x, MFP.y, MFP.z);
const uint IndexOfMaxMfp = (MFP.r == MaxMFP) ? 0 : ((MFP.g == MaxMFP) ? 1 : 2);
const float3 DMFP = GetDMFPFromMFPApprox(SSSBaseColor, MFP);
Out.SurfaceAlbedo = float4(SSSBaseColor, SSSBaseColor[IndexOfMaxMfp]);
Out.DiffuseMeanFreePath = float4(DMFP, DMFP[IndexOfMaxMfp]);
Out.WorldUnitScale = 1.0f; // no world unit scale change.
// STRATA_TODO smooth transistion based on pixel footprint w.r.t. MFPRadius and pixel footprint
Out.SurfaceOpacity = 1.0f;
Out.DiffuseMeanFreePath *= Out.SurfaceOpacity;
}
return Out;
}
#else
FBurleyParameter GetBurleyParameters(uint SubsurfaceProfileInt, FGBufferData GBuffer)
{
FBurleyParameter Out = (FBurleyParameter)0;
Out.SurfaceAlbedo = GetSubsurfaceProfileSurfaceAlbedo(SubsurfaceProfileInt);
Out.DiffuseMeanFreePath = DecodeDiffuseMeanFreePath(GetSubsurfaceProfileDiffuseMeanFreePath(SubsurfaceProfileInt));
Out.WorldUnitScale = DecodeWorldUnitScale(GetSubsurfaceProfileWorldUnitScale(SubsurfaceProfileInt)) * BURLEY_CM_2_MM;
// Mask should always be true, we can probably ignore it?
Out.SurfaceOpacity = UseSubsurfaceProfile(GBuffer.ShadingModelID) ? GBuffer.CustomData.a : 0.0f;
Out.DiffuseMeanFreePath *= Out.SurfaceOpacity;
return Out;
}
#endif
// from https://github.com/iryoku/separable-sss/tree/master/Demo
// Jorge Jimenez http://www.iryoku.com/
// http://www.iryoku.com/translucency/downloads/Real-Time-Realistic-Skin-Translucency.pdf
#include "SeparableSSS.ush"
//=============================================================================
bool InUnitBox(float2 UV)
{
return UV.x >= 0 && UV.y >= 0 && UV.y < 1 && UV.y < 1;
}
// @return 0=don't blend in, 1:fully blend in
float ComputeFullResLerp(float ProfileRadiusScale, float PixelRadiusScale, float2 UVSceneColor, float2 FullResInputSizeInverse)
{
float SSSScaleX = SubsurfaceParams.x;
float scale = SSSScaleX / CalcSceneDepth(UVSceneColor);
float HorizontalScaler = SUBSURFACE_RADIUS_SCALE;
// Calculate the final step to fetch the surrounding pixels:
float finalStep = scale * HorizontalScaler;
finalStep *= ProfileRadiusScale;
float PixelSizeRadius = finalStep / (FullResInputSizeInverse.x * 0.5f);
// tweaked for skin, a more flat kernel might need a smaller value, around 2 seems reasonable because we do half res
const float PixelSize = 4.0f;
float Ret = saturate(PixelSizeRadius - PixelSize);
// opacity allows to scale the radius - at some point we should fade in the full resolution, we don't have a masking other than that.
Ret *= saturate(PixelRadiusScale * 10);
// todo: Subsurface has some non scatter contribution - all that should come from the Full res
return Ret;
}
uint MiniFontCharFromSubsurfaceProfile(uint SubsurfaceProfileInt)
{
if (SubsurfaceProfileInt <= 9)
{
return _0_ + SubsurfaceProfileInt;
}
SubsurfaceProfileInt -= 10;
if (SubsurfaceProfileInt <= _Z_ - _A_)
{
return _A_ + SubsurfaceProfileInt;
}
return _QUESTIONMARK_;
}
// visualization (doesn't have to be fast)
void VisualizePS(in noperspective float4 UVAndScreenPos : TEXCOORD0, float4 SvPosition : SV_POSITION, out float4 OutColor : SV_Target0)
{
float2 UV = UVAndScreenPos.xy;
OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, UV);
int2 PixelPos = (int2)SvPosition.xy;
float2 ViewLocalUV = (PixelPos - SubsurfaceInput0_ViewportMin) * SubsurfaceInput0_ViewportSizeInverse;
float2 IDAreaLocalUV = ViewLocalUV * 2 - 1.0f;
if (InUnitBox(IDAreaLocalUV))
{
float2 UV = SubsurfaceInput0_ViewportMin * SubsurfaceInput0_ExtentInverse + IDAreaLocalUV * (SubsurfaceInput0_ViewportSize * SubsurfaceInput0_ExtentInverse);
#if STRATA_ENABLED
const FStrataSubsurfaceHeader SSSHeader = LoadStrataSSSHeader(UV);
const uint SubsurfaceProfileInt = StrataSubSurfaceHeaderGetProfileId(SSSHeader);
const bool bIsProfile = StrataSubSurfaceHeaderGetIsProfile(SSSHeader);
const float PixelRadiusScale = StrataSubSurfaceHeaderGetProfileRadiusScale(SSSHeader);
#else
const FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UV);
const uint SubsurfaceProfileInt = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer);
const bool bIsProfile = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID);
const float PixelRadiusScale = ScreenSpaceData.GBuffer.CustomData.a;
#endif
const float ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, SubsurfaceProfileInt).a;
OutColor = float4(0.5f, 0.5f, 0.5f, 0);
BRANCH if (bIsProfile)
{
if (SubsurfaceProfileInt == 0)
{
// default (no Profile)
OutColor = float4(0.8f, 0.7f, 0.6f, 0);
}
if (SubsurfaceProfileInt == 1)
{
OutColor = float4(1, 0, 0, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 2)
{
OutColor = float4(0, 1, 0, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 3)
{
OutColor = float4(0, 0, 1, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 4)
{
OutColor = float4(1, 0, 1, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 5)
{
OutColor = float4(0, 1, 1, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 6)
{
OutColor = float4(1, 1, 0, 0) * 0.5f;
}
if (SubsurfaceProfileInt == 100)
{
OutColor = float4(0, 0.2f, 0, 0);
}
if (SubsurfaceProfileInt == 255)
{
OutColor = float4(1, 1, 1, 0);
}
int2 LeftTop = (PixelPos / 8) * 8;
PrintCharacter(PixelPos, OutColor.rgb, float3(1, 1, 1), LeftTop, MiniFontCharFromSubsurfaceProfile(SubsurfaceProfileInt));
OutColor.rgb *= ComputeFullResLerp(ProfileRadiusScale, PixelRadiusScale, UV, SubsurfaceInput0_ExtentInverse);
}
}
}
struct SDiffuseAndSpecular
{
float3 Diffuse;
float3 Specular;
};
// can be moved/shared
half3 LookupSceneColor(float2 SceneUV, int2 PixelOffset)
{
// faster
return SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, SceneUV, 0, PixelOffset).rgb;
}
// @param UVSceneColor for the full res rendertarget (BufferSize) e.g. SceneColor or GBuffers
// @param ReconstructMethod 0/1/2/3 (should be a literal constant to allow compiler optimizations)
SDiffuseAndSpecular ReconstructLighting(float2 UVSceneColor, uint ReconstructMethod)
{
SDiffuseAndSpecular Ret;
#if STRATA_OPAQUE_ROUGH_REFRACTION_ENABLED
// In this mode, we only read isolated diffuse lighting.
float3 CenterSample = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0).rgb;
Ret.Diffuse = CenterSample;
Ret.Specular = 0.0f;
return Ret;
#endif
// If SUBSURFACE_CHANNEL_MODE is 0, checkerboard is forced on
#if SUBSURFACE_PROFILE_CHECKERBOARD || SUBSURFACE_CHANNEL_MODE == 0
{
bool bChecker = CheckerFromSceneColorUV(UVSceneColor);
// todo: We could alternate the diagonal with TemporalAA or even only only 1 sample for low spec or 4 for high spec
float3 Quant0 = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0).rgb;
// todo: expose as scalability setting (can be evaluate best without TemporalAA)
// 0:fast but pattern can appear, 1:better, 2: even better, 3: best but expensive
float3 Quant1;
if(ReconstructMethod == 0)
{
// cheap, crappy
Quant1 = LookupSceneColor(UVSceneColor, int2(1, 0));
}
else if(ReconstructMethod == 1)
{
// acceptable but not perfect
Quant1 = 0.5f * (
LookupSceneColor(UVSceneColor, int2( 1, 0)) +
LookupSceneColor(UVSceneColor, int2(-1, 0)));
}
else if(ReconstructMethod == 2)
{
// almost same as 1?
Quant1 = 0.25f * (
LookupSceneColor(UVSceneColor, int2( 1, 0)) +
LookupSceneColor(UVSceneColor, int2( 0, 1)) +
LookupSceneColor(UVSceneColor, int2(-1, 0)) +
LookupSceneColor(UVSceneColor, int2( 0, -1)));
}
else if(ReconstructMethod == 3)
{
// very good
float3 A = LookupSceneColor(UVSceneColor, int2( 1, 0));
float3 B = LookupSceneColor(UVSceneColor, int2(-1, 0));
float3 C = LookupSceneColor(UVSceneColor, int2( 0, 1));
float3 D = LookupSceneColor(UVSceneColor, int2( 0, -1));
// Luminance could be green channel only
float a = Luminance(A);
float b = Luminance(B);
float c = Luminance(C);
float d = Luminance(D);
float ab = abs(a - b);
float cd = abs(c - d);
// take the average in the direction that avoids dither pattern
Quant1 = 0.5f * lerp(A + B, C + D, ab > cd);
}
Ret.Diffuse = lerp(Quant1, Quant0, bChecker);
Ret.Specular = lerp(Quant0, Quant1, bChecker);
}
#else // SUBSURFACE_PROFILE_CHECKERBOARD
{
// If we're not doing checkerboard encoding, we just need to read a single pixel and decode (combined diffuse/spec in RGB)
float4 CenterSample = SubsurfaceInput0_Texture.SampleLevel(SharedSubsurfaceSampler0, UVSceneColor, 0);
float3 CombinedColor = CenterSample.rgb;
float DiffuseLuminance = CenterSample.a;
float CombinedLuminance = Luminance(CombinedColor);
float DiffuseFactor = saturate(DiffuseLuminance / CombinedLuminance);
float SpecularFactor = 1.0f - DiffuseFactor;
Ret.Diffuse = CombinedColor * DiffuseFactor;
Ret.Specular = CombinedColor * SpecularFactor;
}
#endif // !SUBSURFACE_PROFILE_CHECKERBOARD
return Ret;
}
// @param UVSceneColor for the full res rendertarget (BufferSize) e.g. SceneColor or GBuffers
// @return .RGB Color that should be scattared, .A:1 for subsurface scattering material, 0 for not
float4 SetupSubsurfaceForOnePixel(float2 UVSceneColor)
{
#if STRATA_ENABLED
const FStrataSubsurfaceHeader SSSHeader = LoadStrataSSSHeader(UVSceneColor);
const bool bHasSSSDiffusion = StrataSubSurfaceHeaderGetUseDiffusion(SSSHeader);
#else
FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(UVSceneColor);
const bool bHasSSSDiffusion = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID);
#endif
float4 Ret = 0;
BRANCH if (bHasSSSDiffusion)
{
// '1' is lower quality but that is acceptable here
SDiffuseAndSpecular DiffuseAndSpecular = ReconstructLighting(UVSceneColor, 1);
Ret.rgb = DiffuseAndSpecular.Diffuse;
// it's a valid sample
Ret.a = 1;
}
return Ret;
}
Buffer<uint> TileTypeCountBuffer;
bool ShouldGenerateMipmaps(uint TileType)
{
bool bShouldGenerateMipmaps = false;
if (SUBSURFACE_TILE_TYPE_AFIS == TileType)
{
uint TileCount = TileTypeCountBuffer[TileType];
bShouldGenerateMipmaps = (TileCount >= SubsurfaceParams.w);
}
return bShouldGenerateMipmaps;
}
// Ref: https://graphics.pixar.com/library/ApproxBSSRDF/paper.pdf
#include "SubsurfaceBurleyNormalized.ush"
#if SUBSURFACE_BURLEY_COMPUTE
//-----------------------------------------------------------------------------------
// Indirect pass
// Indirect dispatch parameters
RWBuffer<uint> RWSeparableGroupBuffer;
RWBuffer<uint> RWBurleyGroupBuffer;
RWBuffer<uint> RWAllGroupBuffer;
RWBuffer<uint> RWIndirectDispatchArgsBuffer;
RWBuffer<uint> RWIndirectDrawArgsBuffer;
RWBuffer<uint> RWTileTypeCountBuffer;
Buffer<uint> GroupBuffer;
[numthreads(1, 1, 1)]
void InitValueBufferCS()
{
// Clear the buffer counter for all types of tile.
UNROLL
for (uint i = 0; i < SUBSURFACE_TILE_TYPE_COUNT; ++i)
{
RWTileTypeCountBuffer[i] = 0;
}
}
uint MinGenerateMipsTileCount;
uint Offset;
RWBuffer<uint> RWMipsConditionBuffer;
[numthreads(1, 1, 1)]
void FillMipsConditionBufferCS()
{
uint TileCount = TileTypeCountBuffer[Offset];
RWMipsConditionBuffer[Offset] = (TileCount >= MinGenerateMipsTileCount) ? 1 : 0;
}
[numthreads(8, 1, 1)]
void BuildIndirectDispatchArgsCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
if (DispatchThreadId.y == 0 && DispatchThreadId.x < SUBSURFACE_TILE_TYPE_COUNT)
{
uint TileType = DispatchThreadId.x;
const uint TileCount = TileTypeCountBuffer[TileType];
// Indirect draw
RWIndirectDrawArgsBuffer[TileType * 4 + 0] = SubsurfaceUniformParameters.bRectPrimitive > 0 ? 4 : 6; // VertexCountPerInstance
RWIndirectDrawArgsBuffer[TileType * 4 + 1] = TileCount; // InstanceCount
RWIndirectDrawArgsBuffer[TileType * 4 + 2] = 0; // StartVertexLocation
RWIndirectDrawArgsBuffer[TileType * 4 + 3] = 0; // StartInstanceLocation
// Indirect dispatch
RWIndirectDispatchArgsBuffer[TileType * 3 + 0] = TileCount;// (TileCount + (THREAD_SIZE_1D*THREAD_SIZE_1D) - 1) / (THREAD_SIZE_1D*THREAD_SIZE_1D);
RWIndirectDispatchArgsBuffer[TileType * 3 + 1] = 1;
RWIndirectDispatchArgsBuffer[TileType * 3 + 2] = 1;
}
}
#define TYPE_SEPARABLE 0x1
#define TYPE_BURLEY 0x2
#define TYPE_IN_VIEWPORT 0x4
// 0, no subsurface
// 1, Separable
// 2, Burley
// 3, Both
groupshared uint SubsurfaceTypeFlag;
uint GetSubsurfaceType(uint ProfileId)
{
return GetSubsurfaceProfileUseBurley(ProfileId) ? TYPE_BURLEY : TYPE_SEPARABLE;
}
void AddSeparableGroup(uint2 GroupXY)
{
//buffer counter is stored in the .Count in the first element.
uint IndexToStore = 0;
InterlockedAdd(RWTileTypeCountBuffer[SUBSURFACE_TILE_TYPE_SEPARABLE], 1, IndexToStore);
if (IndexToStore >= SubsurfaceUniformParameters.MaxGroupCount)
{
return;
}
RWSeparableGroupBuffer[2 * IndexToStore + 0] = GroupXY.x;
RWSeparableGroupBuffer[2 * IndexToStore + 1] = GroupXY.y;
}
void AddBurleyGroup(uint2 GroupXY)
{
//buffer counter is stored in the .Count in the first element.
uint IndexToStore = 0;
InterlockedAdd(RWTileTypeCountBuffer[SUBSURFACE_TILE_TYPE_AFIS], 1, IndexToStore);
if (IndexToStore >= SubsurfaceUniformParameters.MaxGroupCount)
{
return;
}
RWBurleyGroupBuffer[2* IndexToStore + 0] = GroupXY.x;
RWBurleyGroupBuffer[2* IndexToStore + 1] = GroupXY.y;
}
void AddAllGroup(uint2 GroupXY)
{
//buffer counter is stored in the .Count in the first element.
uint IndexToStore = 0;
InterlockedAdd(RWTileTypeCountBuffer[SUBSURFACE_TILE_TYPE_ALL], 1, IndexToStore);
if (IndexToStore >= SubsurfaceUniformParameters.MaxGroupCount)
{
return;
}
RWAllGroupBuffer[2 * IndexToStore + 0] = GroupXY.x;
RWAllGroupBuffer[2 * IndexToStore + 1] = GroupXY.y;
}
void AddSubsurfaceComputeGroup(uint GI, uint Type, uint2 GroupXY)
{
InterlockedOr(SubsurfaceTypeFlag, Type);
GroupMemoryBarrierWithGroupSync();
if (GI == 0 && ((SubsurfaceTypeFlag & TYPE_SEPARABLE) && (SubsurfaceTypeFlag & TYPE_IN_VIEWPORT)))
{
AddSeparableGroup(GroupXY);
}
if (GI == 0 && ((SubsurfaceTypeFlag & TYPE_BURLEY) && (SubsurfaceTypeFlag & TYPE_IN_VIEWPORT)))
{
AddBurleyGroup(GroupXY);
}
if (GI == 0 && ((SubsurfaceTypeFlag & TYPE_IN_VIEWPORT) &&
((SubsurfaceTypeFlag & TYPE_SEPARABLE) ||
(SubsurfaceTypeFlag & TYPE_BURLEY))))
{
AddAllGroup(GroupXY);
}
}
RWTexture2D<float4> SetupTexture;
RWTexture2D<float4> ProfileEdgeMask;
[numthreads(THREAD_SIZE_X, THREAD_SIZE_Y, 1)]
void SetupIndirectCS(uint3 DT_ID : SV_DispatchThreadID,
uint3 G_ID : SV_GroupID,
uint3 GT_ID : SV_GroupThreadID,
uint GI : SV_GroupIndex)
{
// initalize the group shared variable
if (GI == 0)
{
SubsurfaceTypeFlag = 0;
}
GroupMemoryBarrierWithGroupSync();
uint2 Pos = DT_ID.xy + Output_ViewportMin;
float2 BufferUV = ConvertGridPos2UV(Pos);
uint Type = 0;
float4 OutColor = 0;
float4 OutColor2 = 0;
#if SUBSURFACE_HALF_RES
// order aligned with Gather() hardware implementation
// RGB: color*A, A:weight 0 if no subsurface scattering
float4 A = SetupSubsurfaceForOnePixel(min(BufferUV + float2(-0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax));
float4 B = SetupSubsurfaceForOnePixel(min(BufferUV + float2(0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax));
float4 C = SetupSubsurfaceForOnePixel(min(BufferUV + float2(0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax));
float4 D = SetupSubsurfaceForOnePixel(min(BufferUV + float2(-0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax));
float4 Sum = (A + B) + (C + D);
// Each alpha value of A/B/C/D should be either 1.0f or 0.0, so a check of 0.5 should be safe
bool bHasSubsurface = (Sum.a >= .50f);
// Sum.a should either be one of {0,1,2,3,4}, add a max of 1 to avoid floating point specials. If Sum.a is 0, Sum.rgb should be 0 as well so it
// shouldn't matter what Div is.
float Div = 1.0f / max(Sum.a, 1.0f);
OutColor.rgb = Sum.rgb * Div;
float4 FourDepth = GatherSceneDepth(BufferUV, SubsurfaceInput0_ExtentInverse);
// average all valid depth values to a single one
float SingleDepth = dot(FourDepth, float4(A.a, B.a, C.a, D.a)) * Div;
OutColor.a = SingleDepth;
if (OutColor.a > 0)
{
bHasSubsurface = true;
}
//BufferUV += min(float2(-0.25, -0.25f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax);
#else // SUBSURFACE_HALF_RES
bool bHasSubsurface = false;
OutColor = SetupSubsurfaceForOnePixel(BufferUV);
if (OutColor.a > 0)
{
bHasSubsurface = true;
float SourceDepth = CalcSceneDepth(BufferUV);
float Noise = InterleavedGradientNoise(float2(Pos.x,Pos.y), View.StateFrameIndexMod8);
// Divide by 1024 because the mantissa is 10 bits.
OutColor.a = SourceDepth + Noise * (SourceDepth / 1024.0f);
//@TODO: sparkling
//supress the edge color lighting information at edge.
//with firefly elimination
}
#endif // SUBSURFACE_HALF_RES
#if !SUBSURFACE_FORCE_SEPARABLE && ENABLE_PROFILE_ID_CACHE
uint ProfileId = GetSubsurfaceProfileId(BufferUV);
#endif
// setup the Subsurface type
uint SelectedProfile = 0;
if (bHasSubsurface)
{
#if SUBSURFACE_HALF_RES || SUBSURFACE_FORCE_SEPARABLE
#if 0
// the code here is designed for half resolution burley.
float2 UVA = min(BufferUV + float2(-0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax);
uint PidA = GetSubsurfaceProfileId(UVA);
float2 UVB = min(BufferUV + float2(0.5, 0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax);
uint PidB = GetSubsurfaceProfileId(UVB);
float2 UVC = min(BufferUV + float2(0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax);
uint PidC = GetSubsurfaceProfileId(UVC);
float2 UVD = min(BufferUV + float2(-0.5, -0.5f) * SubsurfaceInput0_ExtentInverse, SubsurfaceInput0_UVViewportBilinearMax);
uint PidD = GetSubsurfaceProfileId(UVD);
// Gatter available and use the last subsurface type for the current pixel.
SelectedProfile = lerp(SelectedProfile, PidA, A.a);
SelectedProfile = lerp(SelectedProfile, PidB, B.a);
SelectedProfile = lerp(SelectedProfile, PidC, C.a);
SelectedProfile = lerp(SelectedProfile, PidD, D.a);
// record the uv of this pixel
BufferUV = lerp(BufferUV, UVA, A.a);
BufferUV = lerp(BufferUV, UVB, B.a);
BufferUV = lerp(BufferUV, UVC, C.a);
BufferUV = lerp(BufferUV, UVD, D.a);
SelectedProfile = GetSubsurfaceProfileId(BufferUV);
Type = GetSubsurfaceType(SelectedProfile);
#else
Type = TYPE_SEPARABLE; // use separable for half resolution
#endif
#else
SelectedProfile = GetSubsurfaceProfileId(BufferUV);
Type = GetSubsurfaceType(SelectedProfile);
#endif
}
if (all(Pos < Output_ViewportMax))
{
Type |= TYPE_IN_VIEWPORT;
SetupTexture[Pos] = OutColor;
#if !SUBSURFACE_FORCE_SEPARABLE && ENABLE_PROFILE_ID_CACHE
ProfileIdTexture[Pos] = ProfileId;
#endif
}
AddSubsurfaceComputeGroup(GI, Type, G_ID.xy);
}
[numthreads(THREAD_SIZE_X*THREAD_SIZE_Y, 1, 1)]
void MainIndirectDispatchCS(
uint3 IndirectG_ID : SV_GroupID,
uint GI : SV_GroupIndex
)
{
// The first element is used for counting
uint2 G_ID = uint2(GroupBuffer[2 * IndirectG_ID.x + 0], GroupBuffer[2 * IndirectG_ID.x + 1]);
uint2 DT_ID = G_ID * float2(THREAD_SIZE_X, THREAD_SIZE_Y) + float2(GI % THREAD_SIZE_X, GI / THREAD_SIZE_X);
//1. Call function based on which function is defined and in which phase
#if SUBSURFACE_TYPE == SSS_TYPE_BURLEY && !SUBSURFACE_FORCE_SEPARABLE
BurleyComputeMain(DT_ID, G_ID, GI);
#elif SUBSURFACE_TYPE == SSS_TYPE_SSSS
uint2 Pos = DT_ID.xy*SUBSURFACE_GROUP_SIZE / THREAD_SIZE_1D + Output_ViewportMin;
float2 BufferUV = ConvertGridPos2UV(Pos);
uint2 BufferPos = Pos;
#if STRATA_ENABLED
const FStrataSubsurfaceHeader SSSHeader = LoadStrataSSSHeader(BufferUV);
const uint bUseBurley = StrataSubSurfaceHeaderGetIsProfile(SSSHeader) ? GetSubsurfaceProfileUseBurley(StrataSubSurfaceHeaderGetProfileId(SSSHeader)) : StrataSubSurfaceHeaderGetUseDiffusion(SSSHeader);
#else
const FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(BufferUV);
const uint SubsurfaceProfileInt = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer);
const bool bUseBurley = GetSubsurfaceProfileUseBurley(SubsurfaceProfileInt);
#endif
// We need to check Burley here because the previous pass might write burley samplings here unless we have SUBSURFACE_FORCE_SEPARABLE to overwrite the setting.
#if !SUBSURFACE_FORCE_SEPARABLE
BRANCH if (bUseBurley)
{
return;
}
#endif
// pass one and pass two
#if SUBSURFACE_PASS == SUBSURFACE_PASS_ONE
// horizontal
float2 ViewportDirectionUV = float2(1, 0) * SUBSURFACE_RADIUS_SCALE;
#elif SUBSURFACE_PASS == SUBSURFACE_PASS_TWO
// vertical
float2 ViewportDirectionUV = float2(0, 1) * SUBSURFACE_RADIUS_SCALE * (SubsurfaceInput0_Extent.x * SubsurfaceInput0_ExtentInverse.y);
#endif
ViewportDirectionUV *= (SubsurfaceInput0_ViewportSize.x * SubsurfaceInput0_ExtentInverse.x);
float4 OutColor = SSSSBlurPS(BufferPos, BufferUV, ViewportDirectionUV, false, SubsurfaceInput0_Extent);
#if SUBSURFACE_PASS == SUBSURFACE_PASS_TWO
// second pass prepares the setup from the recombine pass which doesn't need depth but wants to reconstruct the color
OutColor.a = GetMaskFromDepthInAlpha(OutColor.a);
#endif
if (all(Pos < Output_ViewportMax))
{
SSSColorUAV[Pos] = OutColor;
}
#endif
}
#endif
// Recombines the half res Subsurface filtered lighting contribution (upsampled and renormalized with the alpha)
// with the SceneColor.
void SubsurfaceRecombinePS(in FScreenVertexOutput Input, out float4 OutColor : SV_Target0)
{
const float2 BufferUV = Input.UV.xy;
#if STRATA_ENABLED
FStrataSubsurfaceData SSSData = LoadStrataSSSData(BufferUV);
const bool bUseSSSDiffusion = StrataSubSurfaceHeaderGetUseDiffusion(SSSData.Header);
#else
FScreenSpaceData ScreenSpaceData = GetScreenSpaceData(BufferUV);
const bool bUseSSSDiffusion = UseSubsurfaceProfile(ScreenSpaceData.GBuffer.ShadingModelID);
#endif
if (!bUseSSSDiffusion)
{
OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV);
return;
}
#if STRATA_ENABLED
const uint ProfileId = StrataSubSurfaceHeaderGetProfileId(SSSData.Header);
// we multiply the base color later in to get more crips human skin textures (scanned data always has Subsurface included)
const float3 StoredBaseColor = StrataSubsurfaceExtrasGetBaseColor(SSSData.Extras);
const float PixelRadiusScale = StrataSubSurfaceHeaderGetProfileRadiusScale(SSSData.Header);
// asset specific color
float3 Tint = 0.0f;
float ProfileRadiusScale = 1.0f;
if (StrataSubSurfaceHeaderGetIsProfile(SSSData.Header))
{
Tint = GetSubsurfaceProfileKernel(SSSS_TINT_SCALE_OFFSET, ProfileId).rgb;
ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, ProfileId).a;
}
else if (ProfileId == SSS_PROFILE_ID_PERPIXEL)
{
// This color is used to lerping 'incident light' into 'diffused light', i.e, controlling the sharpness of the lighting.
// In case of direct DMPF control, this is not needed.
Tint = float3(1, 1, 1);
}
#else
const uint ProfileId = ExtractSubsurfaceProfileInt(ScreenSpaceData.GBuffer);
// we multiply the base color later in to get more crips human skin textures (scanned data always has Subsurface included)
const float3 StoredBaseColor = ScreenSpaceData.GBuffer.StoredBaseColor;
const float PixelRadiusScale = ScreenSpaceData.GBuffer.CustomData.a;
// asset specific color
const float3 Tint = GetSubsurfaceProfileKernel(SSSS_TINT_SCALE_OFFSET, ProfileId).rgb;
const float ProfileRadiusScale = GetSubsurfaceProfileKernel(SSSS_N_KERNELWEIGHTOFFSET + SSSS_N_KERNELWEIGHTCOUNT - 1, ProfileId).a;
#endif
float3 SSSColor = float3(0, 0, 0);
float LerpFactor = 1;
#if SUBSURFACE_RECOMBINE_MODE != SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING
#if SUBSURFACE_HALF_RES
// fade out subsurface scattering if radius is too small to be more crips (not blend with half resolution)
// minor quality improvement (faces are more detailed in distance)
LerpFactor = ComputeFullResLerp(ProfileRadiusScale, PixelRadiusScale, BufferUV, SubsurfaceInput1_ExtentInverse);
#endif // SUBSURFACE_HALF_RES
{
// subsurface scattering buffer has garbage data outside of viewport bilinear min/max.
float2 ClampedBufferUV = clamp(BufferUV, SubsurfaceInput1_UVViewportBilinearMin, SubsurfaceInput1_UVViewportBilinearMax);
float4 SSSColorWithAlpha = Texture2DSampleLevel(SubsurfaceInput1_Texture, SharedSubsurfaceSampler1, ClampedBufferUV, 0);
// renormalize to dilate RGB to fix half res upsampling artifacts
SSSColor = SSSColorWithAlpha.rgb / max(SSSColorWithAlpha.a, 0.00001f);
}
#else // SUBSURFACE_RECOMBINE_MODE == SUBSURFACE_RECOMBINE_MODE_NO_SCATTERING
// Scalability requests no Scatter, but we still need to reconstruct a color
LerpFactor = 0;
#endif // SUBSURFACE_RECOMBINE_MODE
float3 DirectColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV).rgb;
bool bIsHalfResUpsample = false;
if (bIsHalfResUpsample && GetSubsurfaceProfileUseBurley(ProfileId))
{
#if STRATA_ENABLED
FBurleyParameter BurleyParameter = GetBurleyParameters(SSSData);
#else
FBurleyParameter BurleyParameter = GetBurleyParameters(ProfileId, ScreenSpaceData.GBuffer);
#endif
float Depth = CalcSceneDepth(BufferUV);
float3 CenterSampleWeight = CalculateCenterSampleWeight(Depth, BurleyParameter);
SSSColor = lerp(SSSColor,DirectColor,CenterSampleWeight);
}
uint ReconstructMethod = (SUBSURFACE_RECOMBINE_QUALITY == SUBSURFACE_RECOMBINE_QUALITY_HIGH) ? 3 : 1;
SDiffuseAndSpecular DiffuseAndSpecular = ReconstructLighting(BufferUV, ReconstructMethod);
float3 ExtractedNonSubsurface = DiffuseAndSpecular.Specular;
float3 FadedTint = Tint * LerpFactor;
// combine potentially half res with full res
float3 SubsurfaceLighting = lerp(DiffuseAndSpecular.Diffuse, SSSColor, FadedTint);
OutColor = float4(SubsurfaceLighting * StoredBaseColor + ExtractedNonSubsurface, 0.0f);
}
void SubsurfaceRecombineCopyPS(in FScreenVertexOutput Input, out float4 OutColor : SV_Target0)
{
const float2 BufferUV = Input.UV.xy;
OutColor = Texture2DSampleLevel(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, BufferUV, 0);
}
void SubsurfaceViewportCopyPS(noperspective float4 InUV : TEXCOORD0, out float4 OutColor : SV_Target0)
{
OutColor = Texture2DSample(SubsurfaceInput0_Texture, SharedSubsurfaceSampler0, InUV.xy);
}