You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
Basic approach is to add HLSL types FLWCScalar, FLWCMatrix, FLWCVector, etc. Inside shaders, absolute world space position values should be represented as FLWCVector3. Matrices that transform *into* absolute world space become FLWCMatrix. Matrices that transform *from* world space become FLWCInverseMatrix. Generally LWC values work by extending the regular 'float' value with an additional tile coordinate. Final tile size will be a trade-off between scale/accuracy; I'm using 256k for now, but may need to be adjusted. Value represented by a FLWCVector thus becomes V.Tile * TileSize + V.Offset. Most operations can be performed directly on LWC values. There are HLSL functions like LWCAdd, LWCSub, LWCMultiply, LWCDivide (operator overloading would be really nice here). The goal is to stay with LWC values for as long as needed, then convert to regular float values when possible. One thing that comes up a lot is working in translated (rather than absolute) world space. WorldSpace + View.PrevPreViewTranslation = TranslatedWorldspace. Except 'View.PrevPreViewTranslation' is now a FLWCVector3, and WorldSpace quantities should be as well. So that becomes LWCAdd(WorldSpace, View.PrevPreViewTranslation) = TranslatedWorldspace. Assuming that we're talking about a position that's "reasonably close" to the camera, it should be safe to convert the translated WS value to float. The 'tile' coordinate of the 2 LWC values should cancel out when added together in this case. I've done some work throughout the shader code to do this. Materials are fully supporting LWC-values as well. Projective texturing and vertex animation materials that I've tested work correctly even when positioned "far away" from the origin. Lots of work remains to fully convert all of our shader code. There's a function LWCHackToFloat(), which is a simple wrapper for LWCToFloat(). The idea of HackToFloat is to mark places that need further attention, where I'm simply converting absolute WS positions to float, to get shaders to compile. Shaders converted in this way should continue to work for all existing content (without LWC-scale values), but they will break if positions get too large. General overview of changed files: LargeWorldCoordinates.ush - This defines the FLWC types and operations GPUScene.cpp, SceneData.ush - Primitives add an extra 'float3' tile coordinate. Instance data is unchanged, so instances need to stay within single-precision range of the primitive origin. Could potentially split instances behind the scenes (I think) if we don't want this limitation HLSLMaterialDerivativeAutogen.cpp, HLSLMaterialTranslator.cpp, Preshader.cpp - Translated materials to use LWC values SceneView.cpp, SceneRelativeViewMatrices.cpp, ShaderCompiler.cpp, InstancedStereo.ush - View uniform buffer includes LWC values where appropriate #jira UE-117101 #rb arne.schober, Michael.Galetzka [CL 17787435 by Ben Ingram in ue5-main branch]
397 lines
12 KiB
Plaintext
397 lines
12 KiB
Plaintext
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "Common.ush"
|
|
#include "ScreenPass.ush"
|
|
#include "PostProcessCommon.ush"
|
|
|
|
#ifndef MSAA_SAMPLE_COUNT
|
|
#define MSAA_SAMPLE_COUNT 1
|
|
#endif
|
|
|
|
#define CONFIG_SAMPLES 6
|
|
|
|
// K = Center of the nearest input pixel.
|
|
// O = Center of the output pixel.
|
|
//
|
|
// | |
|
|
// 0 | 1 | 2
|
|
// | |
|
|
// | |
|
|
// --------+-----------+--------
|
|
// | |
|
|
// | O |
|
|
// 3 | K | 5
|
|
// | |
|
|
// | |
|
|
// --------+-----------+--------
|
|
// | |
|
|
// | |
|
|
// 6 | 7 | 8
|
|
// | |
|
|
//
|
|
static const int2 kOffsets3x3[9] =
|
|
{
|
|
int2(-1, -1),
|
|
int2(0, -1),
|
|
int2(1, -1),
|
|
int2(-1, 0),
|
|
int2(0, 0), // K
|
|
int2(1, 0),
|
|
int2(-1, 1),
|
|
int2(0, 1),
|
|
int2(1, 1),
|
|
};
|
|
|
|
// T = Center of the nearest top left pixel input pixel.
|
|
// O = Center of the output pixel.
|
|
//
|
|
// |
|
|
// T | .
|
|
// |
|
|
// O |
|
|
// --------+--------
|
|
// |
|
|
// |
|
|
// . | .
|
|
// |
|
|
static const int2 Offsets2x2[4] =
|
|
{
|
|
int2( 0, 0), // T
|
|
int2( 1, 0),
|
|
int2( 0, 1),
|
|
int2( 1, 1),
|
|
};
|
|
|
|
// Indexes of the 3x3 square.
|
|
static const uint kSquareIndexes3x3[9] = { 4u, 0u, 1u, 2u, 3u, 5u, 6u, 7u, 8u };
|
|
|
|
// Indexes of the offsets to have plus + shape.
|
|
static const uint kPlusIndexes3x3[5] = { 4u, 1u, 3u, 5u, 7u };
|
|
|
|
#if MSAA_SAMPLE_COUNT > 1
|
|
Texture2DMS<float4, MSAA_SAMPLE_COUNT> EditorPrimitivesColor;
|
|
Texture2DMS<float, MSAA_SAMPLE_COUNT> EditorPrimitivesDepth;
|
|
#else
|
|
Texture2D EditorPrimitivesColor;
|
|
Texture2D<float> EditorPrimitivesDepth;
|
|
#endif
|
|
|
|
Texture2D ColorTexture;
|
|
SamplerState ColorSampler;
|
|
|
|
Texture2D DepthTexture;
|
|
SamplerState DepthSampler;
|
|
|
|
Texture2D VelocityTexture;
|
|
SamplerState VelocitySampler;
|
|
|
|
Texture2D PrevHistoryTexture;
|
|
SamplerState PrevHistorySampler;
|
|
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(Color)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(Depth)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(PrevHistory)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(History)
|
|
SCREEN_PASS_TEXTURE_VIEWPORT(Output)
|
|
FScreenTransform ColorToDepth;
|
|
|
|
float2 DepthTextureJitter;
|
|
uint bCameraCut;
|
|
|
|
float4 SampleOffsetArray[MSAA_SAMPLE_COUNT];
|
|
|
|
uint bOpaqueEditorGizmo;
|
|
uint bCompositeAnyNonNullDepth;
|
|
|
|
void MainTemporalUpsampleEditorDepthPS(
|
|
float4 SvPosition : SV_POSITION,
|
|
out float OutDeviceZ : SV_Target0)
|
|
{
|
|
float2 ViewportUV = (SvPosition.xy - History_ViewportMin) * History_ViewportSizeInverse;
|
|
float2 ScreenPos = ViewportUVToScreenPos(ViewportUV);
|
|
|
|
// Pixel coordinate of the center of output pixel O in the input viewport.
|
|
float2 PPCo = ViewportUV * Depth_ViewportSize + DepthTextureJitter;
|
|
|
|
// Pixel coordinate of the center of the nearest input pixel K.
|
|
float2 PPCk = floor(PPCo) + 0.5;
|
|
|
|
// Sample nearest depth
|
|
float InputDeviceZ;
|
|
{
|
|
float2 SampleInputBufferUV = (Depth_ViewportMin + PPCk) * Depth_ExtentInverse;
|
|
SampleInputBufferUV = clamp(SampleInputBufferUV, Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
|
|
InputDeviceZ = Texture2DSampleLevel(DepthTexture, DepthSampler, SampleInputBufferUV, 0).r;
|
|
}
|
|
|
|
// Compute the screen position to sample in history
|
|
float2 PrevScreenPos = ScreenPos;
|
|
#if 1
|
|
{
|
|
float4 ThisClip = float4(ScreenPos, InputDeviceZ, 1);
|
|
float4 PrevClip = mul(ThisClip, View.ClipToPrevClip);
|
|
|
|
PrevScreenPos = PrevClip.xy / PrevClip.w;
|
|
|
|
#if 0
|
|
{
|
|
float2 SampleInputBufferUV = (Depth_ViewportMin + PPCk) * Depth_ExtentInverse;
|
|
SampleInputBufferUV = clamp(SampleInputBufferUV, Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
|
|
float4 EncodedVelocity = VelocityTexture.SampleLevel(VelocitySampler, SampleInputBufferUV, 0);
|
|
bool DynamicN = EncodedVelocity.x > 0.0;
|
|
if(DynamicN)
|
|
{
|
|
PrevScreenPos = ScreenPos - DecodeVelocityFromTexture(EncodedVelocity).xy;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
// Sample the history.
|
|
float PrevHistoryDeviceZ;
|
|
{
|
|
float2 PrevHistoryBufferUV = (PrevHistory_ScreenPosToViewportScale * PrevScreenPos + PrevHistory_ScreenPosToViewportBias) * PrevHistory_ExtentInverse;
|
|
PrevHistoryBufferUV = clamp(PrevHistoryBufferUV, PrevHistory_UVViewportBilinearMin, PrevHistory_UVViewportBilinearMax);
|
|
|
|
PrevHistoryDeviceZ = Texture2DSampleLevel(PrevHistoryTexture, PrevHistorySampler, PrevHistoryBufferUV, 0).r;
|
|
}
|
|
|
|
// Correct the DeviceZ from the previous depth to the current depth.
|
|
float CorrectedPrevHistoryDeviceZ;
|
|
#if 1
|
|
{
|
|
float PrevHistoryDepth = ConvertFromDeviceZ(PrevHistoryDeviceZ);
|
|
|
|
const float3 PreViewTranslationOffset = LWCToFloat(LWCSubtract(PrimaryView.PreViewTranslation, PrimaryView.PrevPreViewTranslation));
|
|
const float3 PrevHistoryTranslatedWorldPosition = mul(float4(PrevScreenPos * PrevHistoryDepth, PrevHistoryDepth, 1), View.PrevScreenToTranslatedWorld).xyz + PreViewTranslationOffset;
|
|
|
|
float CorrectedPrevHistoryDepth = mul(float4(PrevHistoryTranslatedWorldPosition, 1.0), View.TranslatedWorldToView).z;
|
|
|
|
CorrectedPrevHistoryDeviceZ = ConvertToDeviceZ(CorrectedPrevHistoryDepth);
|
|
}
|
|
#else
|
|
{
|
|
CorrectedPrevHistoryDeviceZ = PrevHistoryDeviceZ;
|
|
}
|
|
#endif
|
|
|
|
// Replaces NaN from history with 0.0
|
|
CorrectedPrevHistoryDeviceZ = -min(-CorrectedPrevHistoryDeviceZ, 0.0);
|
|
|
|
// Sample current frame
|
|
bool bUpdateHistory;
|
|
float InputMinDeviceZ;
|
|
float InputMaxDeviceZ;
|
|
{
|
|
// Vector in pixel between pixel K -> O.
|
|
float2 dKO = float2(PPCo - PPCk);
|
|
|
|
bUpdateHistory = all(abs(dKO) < 0.5 * (Depth_ViewportSize.x * History_ViewportSizeInverse.x)) || bCameraCut != 0;
|
|
|
|
InputMinDeviceZ = InputDeviceZ;
|
|
InputMaxDeviceZ = InputDeviceZ;
|
|
|
|
UNROLL_N(CONFIG_SAMPLES - 1)
|
|
for (uint SampleId = 1; SampleId < CONFIG_SAMPLES; SampleId++)
|
|
{
|
|
float2 PixelOffset;
|
|
|
|
#if CONFIG_SAMPLES == 9
|
|
{
|
|
const uint SampleIdx = kSquareIndexes3x3[SampleId];
|
|
PixelOffset = kOffsets3x3[SampleIdx];
|
|
}
|
|
#elif CONFIG_SAMPLES == 5 || CONFIG_SAMPLES == 6
|
|
{
|
|
if (SampleId == 5)
|
|
{
|
|
PixelOffset = SignFastInt(float2(dKO));
|
|
}
|
|
else
|
|
{
|
|
const uint SampleIdx = kPlusIndexes3x3[SampleId];
|
|
PixelOffset = kOffsets3x3[SampleIdx];
|
|
}
|
|
}
|
|
#else
|
|
#error Unknown sample count
|
|
#endif
|
|
|
|
float2 SampleInputBufferUV = (Depth_ViewportMin + PPCk + PixelOffset) * Depth_ExtentInverse;
|
|
SampleInputBufferUV = clamp(SampleInputBufferUV, Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
|
|
float SampleDeviceZ = Texture2DSampleLevel(DepthTexture, DepthSampler, SampleInputBufferUV, 0).r;
|
|
|
|
InputMinDeviceZ = min(InputMinDeviceZ, SampleDeviceZ);
|
|
InputMaxDeviceZ = max(InputMaxDeviceZ, SampleDeviceZ);
|
|
}
|
|
}
|
|
|
|
// Reject history based on neighborhood
|
|
float ClampedHistoryDeviceZ = clamp(CorrectedPrevHistoryDeviceZ, InputMinDeviceZ, InputMaxDeviceZ);
|
|
|
|
// Output history
|
|
float FinalHistoryDeviceZ = bUpdateHistory ? InputDeviceZ : ClampedHistoryDeviceZ;
|
|
|
|
OutDeviceZ = -min(-FinalHistoryDeviceZ, 0.0);
|
|
}
|
|
|
|
void MainPopulateSceneDepthPS(
|
|
#if USE_MSAA && !COMPILER_HLSLCC
|
|
sample noperspective float4 SvPosition : SV_POSITION,
|
|
#else
|
|
noperspective float4 SvPosition : SV_POSITION,
|
|
#endif
|
|
out float4 OutColor : SV_Target0,
|
|
out float OutDepth : SV_DEPTH)
|
|
{
|
|
float2 ViewportUV = (SvPosition.xy - Color_ViewportMin) * Color_ViewportSizeInverse;
|
|
|
|
float2 DepthUV = clamp(
|
|
(Depth_ViewportMin + ViewportUV * Depth_ViewportSize - DepthTextureJitter) * Depth_ExtentInverse,
|
|
Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
|
|
OutDepth = Texture2DSampleLevel(DepthTexture, DepthSampler, DepthUV, 0).r;
|
|
OutColor = 0.0;
|
|
}
|
|
|
|
float ComputeDepthMask(float SceneDeviceZ, float EditorDeviceZ)
|
|
{
|
|
// Soft Bias with SceneDeviceZ for best quality
|
|
const float DeviceDepthFade = 0.00005f;
|
|
|
|
return saturate(1.0f - (SceneDeviceZ - EditorDeviceZ) / DeviceDepthFade);
|
|
}
|
|
|
|
void ResolveEditorPrimitiveColor(int2 PixelPos, out float4 OutColor, out float OutDeviceZ)
|
|
{
|
|
// Furthest device Z or 0 if there is none.
|
|
OutDeviceZ = 0;
|
|
OutColor = 0;
|
|
|
|
|
|
// Bring out of premultiplied.
|
|
OutColor.rgb /= max(OutColor.a, 0.0001f);
|
|
// Fix gamma.
|
|
OutColor.rgb = pow(OutColor.rgb, 1.0f / 2.2f);
|
|
// Bring back to premultiplied
|
|
OutColor.rgb *= OutColor.a;
|
|
}
|
|
|
|
void MainCompositeEditorPrimitivesPS(
|
|
float4 SvPosition : SV_POSITION,
|
|
out float4 OutColor : SV_Target0)
|
|
{
|
|
float2 ViewportUV = (SvPosition.xy - Output_ViewportMin) * Output_ViewportSizeInverse;
|
|
|
|
const float2 ColorUV = (ViewportUV * Color_ViewportSize + Color_ViewportMin) * Color_ExtentInverse;
|
|
const int2 ColorPixelPos = int2(ColorUV * Color_Extent);
|
|
|
|
float4 SceneColor = Texture2DSample(ColorTexture, ColorSampler, ColorUV);
|
|
|
|
#if OUTPUT_SRGB_BUFFER
|
|
SceneColor.rgb = pow(SceneColor.rgb, 1.0f / 2.2f);
|
|
#endif
|
|
|
|
// Resolve editor primitive scene color and depth.
|
|
float4 EditorPrimitiveColor;
|
|
float DepthMask;
|
|
|
|
#if MSAA_SAMPLE_COUNT > 1
|
|
{
|
|
EditorPrimitiveColor = 0.0;
|
|
DepthMask = 0.0;
|
|
float DepthMaskWeight = 0.0;
|
|
|
|
UNROLL_N(MSAA_SAMPLE_COUNT)
|
|
for (uint SampleIndex = 0; SampleIndex < MSAA_SAMPLE_COUNT; ++SampleIndex)
|
|
{
|
|
const float2 SampleOffset = SampleOffsetArray[SampleIndex].xy;
|
|
|
|
float2 SampleDepthUV = (ViewportUV * Depth_ViewportSize + Depth_ViewportMin + SampleOffset - DepthTextureJitter) * Depth_ExtentInverse;
|
|
SampleDepthUV = clamp(SampleDepthUV, Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
float SampleSceneDeviceZ = Texture2DSampleLevel(DepthTexture, DepthSampler, SampleDepthUV, 0).r;
|
|
|
|
float4 Sample = EditorPrimitivesColor.Load(ColorPixelPos, SampleIndex);
|
|
float SampleEditorDeviceZ = EditorPrimitivesDepth.Load(ColorPixelPos, SampleIndex).r;
|
|
|
|
// Check if any color was applied to this pixel. Note: This prevents actual black pixels from being visible.
|
|
float Weight = Sample.a;
|
|
|
|
float SampleDepthMask = ComputeDepthMask(SampleSceneDeviceZ, SampleEditorDeviceZ);
|
|
|
|
if (SampleEditorDeviceZ > 0.0 && bCompositeAnyNonNullDepth)
|
|
{
|
|
Weight = 1;
|
|
}
|
|
|
|
FLATTEN
|
|
if (Weight)
|
|
{
|
|
EditorPrimitiveColor += float4(Sample.rgb, Weight);
|
|
|
|
DepthMask += SampleDepthMask * Weight;
|
|
DepthMaskWeight += Weight;
|
|
}
|
|
}
|
|
|
|
EditorPrimitiveColor.rgb /= MSAA_SAMPLE_COUNT;
|
|
EditorPrimitiveColor.a /= MSAA_SAMPLE_COUNT;
|
|
|
|
DepthMask *= DepthMaskWeight > 0 ? rcp(DepthMaskWeight) : 0.0;
|
|
}
|
|
#else
|
|
{
|
|
// De-jitter the sample position and make a filtered lookup - for planes this allows to reconstruct a much less jittery depth comparison function. It doesn't fix silhouettes, however.
|
|
float2 SampleDepthUV = (ViewportUV * Depth_ViewportSize + Depth_ViewportMin - DepthTextureJitter) * Depth_ExtentInverse;
|
|
SampleDepthUV = clamp(SampleDepthUV, Depth_UVViewportBilinearMin, Depth_UVViewportBilinearMax);
|
|
|
|
float SceneDeviceZ = Texture2DSampleLevel(DepthTexture, DepthSampler, SampleDepthUV, 0).r;
|
|
float EditorDeviceZ = EditorPrimitivesDepth.Load(int3(ColorPixelPos, 0)).r;
|
|
|
|
EditorPrimitiveColor = EditorPrimitivesColor.Load(int3(ColorPixelPos, 0));
|
|
DepthMask = ComputeDepthMask(SceneDeviceZ, EditorDeviceZ);
|
|
|
|
if (EditorDeviceZ > 0.0 && bCompositeAnyNonNullDepth)
|
|
{
|
|
EditorPrimitiveColor.a = 1;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Fixes the gama of editor primitives
|
|
{
|
|
// Bring out of premultiplied.
|
|
EditorPrimitiveColor.rgb /= max(EditorPrimitiveColor.a, 0.0001f);
|
|
// Fix gamma.
|
|
EditorPrimitiveColor.rgb = pow(EditorPrimitiveColor.rgb, 1.0f / 2.2f);
|
|
// Bring back to premultiplied
|
|
EditorPrimitiveColor.rgb *= EditorPrimitiveColor.a;
|
|
}
|
|
|
|
float2 DepthUV = ApplyScreenTransform(ColorUV, ColorToDepth);
|
|
|
|
// Generate 2x2 square tiling pattern for foreground primitive that end up behind scene opaque primitives.
|
|
float PatternMask = ((ColorPixelPos.x/2 + ColorPixelPos.y/2) % 2) * 0.7f;
|
|
|
|
// the contants express the two opacity values we see when the primitive is hidden
|
|
float LowContrastPatternMask = lerp(0.2, 1, PatternMask);
|
|
|
|
LowContrastPatternMask = saturate(lerp(LowContrastPatternMask, 1, bOpaqueEditorGizmo));
|
|
|
|
float HiddenMask = lerp(0.7f, 1.0f, DepthMask);
|
|
float DarkenMask = lerp(LowContrastPatternMask, 1.0f, DepthMask);
|
|
|
|
// Blend editor ptimitives with scene color.
|
|
OutColor.rgb = SceneColor.rgb * (1 - EditorPrimitiveColor.a) + EditorPrimitiveColor.rgb * (DarkenMask * HiddenMask);
|
|
|
|
#if POST_PROCESS_ALPHA
|
|
OutColor.a = lerp(SceneColor.a, 1, EditorPrimitiveColor.a * DarkenMask * HiddenMask);
|
|
#else
|
|
OutColor.a = 0;
|
|
#endif
|
|
}
|