2019-12-26 14:45:42 -05:00
// Copyright Epic Games, Inc. All Rights Reserved.
2018-12-18 21:41:17 -05:00
/*=============================================================================
ScreenSpaceDenoise . cpp : Denoise in screen space .
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = */
# include "ScreenSpaceDenoise.h"
# include "StaticBoundShaderState.h"
# include "SceneUtils.h"
# include "PostProcess/SceneRenderTargets.h"
# include "SceneRenderTargetParameters.h"
# include "ScenePrivate.h"
# include "ClearQuad.h"
# include "PipelineStateCache.h"
2019-06-11 18:27:07 -04:00
# include "SceneTextureParameters.h"
2020-07-06 18:58:26 -04:00
# include "Lumen/LumenSceneRendering.h"
2018-12-18 21:41:17 -05:00
// ---------------------------------------------------- Cvars
2019-03-07 11:25:32 -05:00
static TAutoConsoleVariable < int32 > CVarShadowReconstructionSampleCount (
2019-03-15 13:58:07 -04:00
TEXT ( " r.Shadow.Denoiser.ReconstructionSamples " ) , 8 ,
2019-03-07 11:25:32 -05:00
TEXT ( " Maximum number of samples for the reconstruction pass (default = 16). " ) ,
ECVF_RenderThreadSafe ) ;
2019-03-15 13:58:07 -04:00
static TAutoConsoleVariable < int32 > CVarShadowPreConvolutionCount (
TEXT ( " r.Shadow.Denoiser.PreConvolution " ) , 1 ,
TEXT ( " Number of pre-convolution passes (default = 1). " ) ,
ECVF_RenderThreadSafe ) ;
2019-01-21 23:41:05 -05:00
static TAutoConsoleVariable < int32 > CVarShadowTemporalAccumulation (
TEXT ( " r.Shadow.Denoiser.TemporalAccumulation " ) , 1 ,
2018-12-18 21:41:17 -05:00
TEXT ( " " ) ,
ECVF_RenderThreadSafe ) ;
2019-03-12 10:09:36 -04:00
static TAutoConsoleVariable < int32 > CVarShadowHistoryConvolutionSampleCount (
TEXT ( " r.Shadow.Denoiser.HistoryConvolutionSamples " ) , 1 ,
TEXT ( " Number of samples to use to convolve the history over time. " ) ,
ECVF_RenderThreadSafe ) ;
2020-04-02 12:24:48 -04:00
static TAutoConsoleVariable < int32 > CVarUseReflectionDenoiser (
TEXT ( " r.Reflections.Denoiser " ) ,
2 ,
TEXT ( " Choose the denoising algorithm. \n " )
TEXT ( " 0: Disabled; \n " )
TEXT ( " 1: Forces the default denoiser of the renderer; \n " )
TEXT ( " 2: GScreenSpaceDenoiser which may be overriden by a third party plugin (default). " ) ,
ECVF_RenderThreadSafe ) ;
2019-01-10 20:14:17 -05:00
static TAutoConsoleVariable < int32 > CVarReflectionReconstructionSampleCount (
2020-04-29 16:25:02 -04:00
TEXT ( " r.Reflections.Denoiser.ReconstructionSamples " ) , 8 ,
TEXT ( " Maximum number of samples for the reconstruction pass (default = 8). " ) ,
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < int32 > CVarReflectionPreConvolutionCount (
TEXT ( " r.Reflections.Denoiser.PreConvolution " ) , 1 ,
TEXT ( " Number of pre-convolution passes (default = 1). " ) ,
2019-01-10 20:14:17 -05:00
ECVF_RenderThreadSafe ) ;
2019-01-11 16:33:47 -05:00
static TAutoConsoleVariable < int32 > CVarReflectionTemporalAccumulation (
2019-01-17 17:03:37 -05:00
TEXT ( " r.Reflections.Denoiser.TemporalAccumulation " ) , 1 ,
2019-01-22 19:17:17 -05:00
TEXT ( " Accumulates the samples over multiple frames. " ) ,
2019-01-11 16:33:47 -05:00
ECVF_RenderThreadSafe ) ;
2019-01-22 19:17:17 -05:00
static TAutoConsoleVariable < int32 > CVarAOReconstructionSampleCount (
TEXT ( " r.AmbientOcclusion.Denoiser.ReconstructionSamples " ) , 16 ,
TEXT ( " Maximum number of samples for the reconstruction pass (default = 16). " ) ,
ECVF_RenderThreadSafe ) ;
2020-09-24 00:43:27 -04:00
static TAutoConsoleVariable < int32 > CVarAOPreConvolutionCount (
TEXT ( " r.AmbientOcclusion.Denoiser.PreConvolution " ) , 2 ,
TEXT ( " Number of pre-convolution passes (default = 1). " ) ,
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < float > CVarAOKernelSpreadFactor (
TEXT ( " r.AmbientOcclusion.Denoiser.KernelSpreadFactor " ) , 4 ,
TEXT ( " Spread factor of the preconvolution passes. " ) ,
ECVF_RenderThreadSafe ) ;
2019-01-22 19:17:17 -05:00
static TAutoConsoleVariable < int32 > CVarAOTemporalAccumulation (
TEXT ( " r.AmbientOcclusion.Denoiser.TemporalAccumulation " ) , 1 ,
TEXT ( " Accumulates the samples over multiple frames. " ) ,
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < int32 > CVarAOHistoryConvolutionSampleCount (
2020-09-24 00:43:27 -04:00
TEXT ( " r.AmbientOcclusion.Denoiser.HistoryConvolution.SampleCount " ) , 1 ,
2019-02-14 14:23:27 -05:00
TEXT ( " Number of samples to use for history post filter (default = 16). " ) ,
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < float > CVarAOHistoryConvolutionKernelSpreadFactor (
2020-09-24 00:43:27 -04:00
TEXT ( " r.AmbientOcclusion.Denoiser.HistoryConvolution.KernelSpreadFactor " ) , 7 ,
TEXT ( " Multiplication factor applied on the kernel sample offset (default = 7). " ) ,
2019-02-14 14:23:27 -05:00
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < int32 > CVarGIReconstructionSampleCount (
TEXT ( " r.GlobalIllumination.Denoiser.ReconstructionSamples " ) , 16 ,
TEXT ( " Maximum number of samples for the reconstruction pass (default = 16). " ) ,
ECVF_RenderThreadSafe ) ;
2019-06-11 18:27:07 -04:00
static TAutoConsoleVariable < int32 > CVarGIPreConvolutionCount (
TEXT ( " r.GlobalIllumination.Denoiser.PreConvolution " ) , 1 ,
TEXT ( " Number of pre-convolution passes (default = 1). " ) ,
ECVF_RenderThreadSafe ) ;
2019-02-14 14:23:27 -05:00
static TAutoConsoleVariable < int32 > CVarGITemporalAccumulation (
TEXT ( " r.GlobalIllumination.Denoiser.TemporalAccumulation " ) , 1 ,
TEXT ( " Accumulates the samples over multiple frames. " ) ,
ECVF_RenderThreadSafe ) ;
static TAutoConsoleVariable < int32 > CVarGIHistoryConvolutionSampleCount (
2019-06-11 18:27:07 -04:00
TEXT ( " r.GlobalIllumination.Denoiser.HistoryConvolution.SampleCount " ) , 1 ,
2019-01-22 19:17:17 -05:00
TEXT ( " Number of samples to use for history post filter (default = 1). " ) ,
ECVF_RenderThreadSafe ) ;
2019-02-14 14:23:27 -05:00
static TAutoConsoleVariable < float > CVarGIHistoryConvolutionKernelSpreadFactor (
TEXT ( " r.GlobalIllumination.Denoiser.HistoryConvolution.KernelSpreadFactor " ) , 3 ,
TEXT ( " Multiplication factor applied on the kernel sample offset (default=3). " ) ,
ECVF_RenderThreadSafe ) ;
2018-12-18 21:41:17 -05:00
/** The maximum number of mip level supported in the denoiser. */
2019-10-02 18:47:26 -04:00
// TODO(Denoiser): jump to 3 because bufefr size already have a size multiple of 4.
2019-09-14 09:45:25 -04:00
static const int32 kMaxMipLevel = 2 ;
2018-12-18 21:41:17 -05:00
2019-01-10 20:14:17 -05:00
/** Maximum number of sample per pixel supported in the stackowiak sample set. */
static const int32 kStackowiakMaxSampleCountPerSet = 56 ;
2019-03-07 11:25:32 -05:00
/** The maximum number of buffers. */
2020-07-06 18:58:26 -04:00
static const int32 kMaxBufferProcessingCount = kMaxDenoiserBufferProcessingCount ;
2019-03-07 11:25:32 -05:00
2019-10-01 13:03:04 -04:00
/** Number of texture to store compressed metadata. */
2020-07-06 18:58:26 -04:00
static const int32 kCompressedMetadataTextures = 2 ;
2019-10-01 13:03:04 -04:00
2019-03-07 11:25:32 -05:00
static_assert ( IScreenSpaceDenoiser : : kMaxBatchSize < = kMaxBufferProcessingCount , " Can't batch more signal than there is internal buffer in the denoiser. " ) ;
2018-12-18 21:41:17 -05:00
// ---------------------------------------------------- Globals
const IScreenSpaceDenoiser * GScreenSpaceDenoiser = nullptr ;
2019-06-11 18:27:07 -04:00
DECLARE_GPU_STAT ( ReflectionsDenoiser )
DECLARE_GPU_STAT ( ShadowsDenoiser )
DECLARE_GPU_STAT ( AmbientOcclusionDenoiser )
DECLARE_GPU_STAT ( DiffuseIndirectDenoiser )
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
# if WITH_MGPU
extern const FName SSDAmbientOcclusionEffectName ;
const FName SSDShadowVisibilityMasksEffectName ( " SSDShadowVisibility " ) ;
const FName SSDMultiPolychromaticPenumbraHarmonicsEffectNames [ 4 ] =
{
" SSDPolychromaticPenumbra0 " ,
" SSDPolychromaticPenumbra1 " ,
" SSDPolychromaticPenumbra2 " ,
" SSDPolychromaticPenumbra3 " ,
} ;
const FName SSDMultiPolychromaticPenumbraIntegralEffectName ( " SSDPolychromaticPenumbraIntegrand " ) ;
const FName SSDMultiPolychromaticPenumbraFinalEffectName ( " SSDPolychromaticPenumbraFinal " ) ;
const FName SSDReflectionsEffectName ( " SSDReflections " ) ;
const FName SSDWaterReflectionsEffectName ( " SSDWaterReflections " ) ;
const FName SSDAmbientOcclusionEffectName ( " SSDAmbientOcclusion " ) ;
const FName SSDDiffuseIndirectEffectName ( " SSDDiffuseIndirect " ) ;
const FName SSDSkyLightEffectName ( " SSDSkyLight " ) ;
const FName SSDReflectedSkyLightEffectName ( " SSDReflectedSkyLight " ) ;
const FName SSDDiffuseIndirectHarmonicEffectName ( " SSDDiffuseIndirectHarmonic " ) ;
const FName SSDScreenSpaceDiffuseIndirectEffectName ( " SSDScreenSpaceDiffuseIndirect " ) ;
const FName SSDIndirectProbeHierarchyEffectName ( " SSDIndirectProbeHierarchy " ) ;
# define DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS(EFFECT_NAME) FSSDConstantPixelDensitySettings Settings(EFFECT_NAME)
# else // WITH_MGPU
# define DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS(EFFECT_NAME) FSSDConstantPixelDensitySettings Settings
# endif // !WITH_MGPU
2019-02-14 14:23:27 -05:00
namespace
{
// ---------------------------------------------------- Enums
2019-10-01 13:03:04 -04:00
/** Layout for compressed meta data. */
enum class ECompressedMetadataLayout
{
// The signal denoiser use directly depth buffer and gbuffer.
Disabled ,
// Compress scene depth and world space normal into same render target.
DepthAndNormal ,
// Compress scene depth and view space normal into same render target The advantage of having the normal
// in the view space is to use much faster ScreenToView than ScreenToTranslatedWorld. But doesn't
// support history bilateral rejection.
DepthAndViewNormal ,
2020-07-06 18:58:26 -04:00
// Scene depth and shading model ID are in separate render target.
FedDepthAndShadingModelID ,
2019-10-01 13:03:04 -04:00
MAX ,
} ;
2019-02-14 14:23:27 -05:00
/** Different signals to denoise. */
enum class ESignalProcessing
{
2019-06-11 18:27:07 -04:00
// Denoise a shadow mask.
2019-10-01 13:03:04 -04:00
ShadowVisibilityMask ,
2019-06-11 18:27:07 -04:00
2019-09-14 09:45:25 -04:00
// Denoise one lighting harmonic when denoising multiple light's penumbra.
PolychromaticPenumbraHarmonic ,
2019-06-11 18:27:07 -04:00
// Denoise first bounce specular.
2019-02-14 14:23:27 -05:00
Reflections ,
2019-06-11 18:27:07 -04:00
// Denoise ambient occlusion.
2019-02-14 14:23:27 -05:00
AmbientOcclusion ,
2019-06-11 18:27:07 -04:00
// Denoise first bounce diffuse and ambient occlusion.
DiffuseAndAmbientOcclusion ,
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
// Denoise first bounce diffuse as sperical harmonic
DiffuseSphericalHarmonic ,
2019-10-01 13:03:04 -04:00
// Denoise SSGI.
ScreenSpaceDiffuseIndirect ,
2020-07-06 18:58:26 -04:00
// Denoise diffuse indirect hierarchy.
IndirectProbeHierarchy ,
2019-02-14 14:23:27 -05:00
MAX ,
} ;
2018-12-18 21:41:17 -05:00
// ---------------------------------------------------- Simple functions
static bool IsSupportedLightType ( ELightComponentType LightType )
{
2019-03-07 11:25:32 -05:00
return LightType = = LightType_Point | | LightType = = LightType_Directional | | LightType = = LightType_Rect | | LightType = = LightType_Spot ;
}
/** Returns whether a signal processing is supported by the constant pixel density pass layout. */
static bool UsesConstantPixelDensityPassLayout ( ESignalProcessing SignalProcessing )
{
return (
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : PolychromaticPenumbraHarmonic | |
SignalProcessing = = ESignalProcessing : : Reflections | |
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion | |
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseSphericalHarmonic | |
2020-07-06 18:58:26 -04:00
SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect | |
2021-04-26 15:47:32 -04:00
SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy ) ;
2019-09-14 09:45:25 -04:00
}
/** Returns whether a signal processing support upscaling. */
static bool SignalSupportsUpscaling ( ESignalProcessing SignalProcessing )
{
return (
2019-03-07 11:25:32 -05:00
SignalProcessing = = ESignalProcessing : : Reflections | |
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
2019-06-11 18:27:07 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion ) ;
2019-03-07 11:25:32 -05:00
}
/** Returns whether a signal processing uses an injestion pass. */
static bool SignalUsesInjestion ( ESignalProcessing SignalProcessing )
{
2020-07-06 18:58:26 -04:00
return (
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask ) ;
}
/** Returns whether a signal processing uses a reduction pass before the reconstruction. */
static bool SignalUsesReduction ( ESignalProcessing SignalProcessing )
{
return false ; //SignalProcessing == ESignalProcessing::DiffuseSphericalHarmonic;
2018-12-18 21:41:17 -05:00
}
2019-03-15 13:58:07 -04:00
/** Returns whether a signal processing uses an additional pre convolution pass. */
static bool SignalUsesPreConvolution ( ESignalProcessing SignalProcessing )
{
2019-06-11 18:27:07 -04:00
return
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask | |
2020-04-29 16:25:02 -04:00
SignalProcessing = = ESignalProcessing : : Reflections | |
2020-09-24 00:43:27 -04:00
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
2021-04-26 15:47:32 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion ;
2019-03-15 13:58:07 -04:00
}
2019-02-14 14:23:27 -05:00
/** Returns whether a signal processing uses a history rejection pre convolution pass. */
static bool SignalUsesRejectionPreConvolution ( ESignalProcessing SignalProcessing )
{
2019-03-12 10:09:38 -04:00
return (
2019-10-01 13:03:04 -04:00
//SignalProcessing == ESignalProcessing::ShadowVisibilityMask ||
2020-04-29 16:25:02 -04:00
//SignalProcessing == ESignalProcessing::Reflections ||
2019-06-11 18:27:07 -04:00
SignalProcessing = = ESignalProcessing : : AmbientOcclusion ) ;
2019-02-14 14:23:27 -05:00
}
2019-09-14 09:45:25 -04:00
/** Returns whether a signal processing uses a convolution pass after temporal accumulation pass. */
static bool SignalUsesPostConvolution ( ESignalProcessing SignalProcessing )
{
return (
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion ) ;
}
2019-03-07 11:25:32 -05:00
/** Returns whether a signal processing uses a history rejection pre convolution pass. */
static bool SignalUsesFinalConvolution ( ESignalProcessing SignalProcessing )
{
2020-07-06 18:58:26 -04:00
return (
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask ) ;
2019-10-01 13:03:04 -04:00
}
/** Returns what meta data compression should be used when denoising a signal. */
static ECompressedMetadataLayout GetSignalCompressedMetadata ( ESignalProcessing SignalProcessing )
{
if ( SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect )
{
return ECompressedMetadataLayout : : DepthAndViewNormal ;
}
2020-07-06 18:58:26 -04:00
else if ( SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy )
{
return ECompressedMetadataLayout : : FedDepthAndShadingModelID ;
}
2019-10-01 13:03:04 -04:00
return ECompressedMetadataLayout : : Disabled ;
2019-03-07 11:25:32 -05:00
}
/** Returns the number of signal that might be batched at the same time. */
static int32 SignalMaxBatchSize ( ESignalProcessing SignalProcessing )
{
2020-07-06 18:58:26 -04:00
if ( SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask
)
2019-03-07 11:25:32 -05:00
{
return IScreenSpaceDenoiser : : kMaxBatchSize ;
}
else if (
SignalProcessing = = ESignalProcessing : : Reflections | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : PolychromaticPenumbraHarmonic | |
2019-03-07 11:25:32 -05:00
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion | |
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseSphericalHarmonic | |
2020-07-06 18:58:26 -04:00
SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect | |
2021-04-26 15:47:32 -04:00
SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy )
2019-03-07 11:25:32 -05:00
{
return 1 ;
}
check ( 0 ) ;
return 1 ;
}
2019-06-11 18:27:07 -04:00
/** Returns whether a signal have a code path for 1 sample per pixel. */
static bool SignalSupport1SPP ( ESignalProcessing SignalProcessing )
{
return (
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion ) ;
}
2019-03-07 11:25:32 -05:00
/** Returns whether a signal can denoise multi sample per pixel. */
static bool SignalSupportMultiSPP ( ESignalProcessing SignalProcessing )
{
2019-06-11 18:27:07 -04:00
return (
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : PolychromaticPenumbraHarmonic | |
2019-06-11 18:27:07 -04:00
SignalProcessing = = ESignalProcessing : : Reflections | |
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
2019-09-14 09:45:25 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion | |
2019-10-01 13:03:04 -04:00
SignalProcessing = = ESignalProcessing : : DiffuseSphericalHarmonic | |
2020-07-06 18:58:26 -04:00
SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect | |
2021-04-26 15:47:32 -04:00
SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy ) ;
2019-03-07 11:25:32 -05:00
}
2018-12-18 21:41:17 -05:00
// ---------------------------------------------------- Shaders
2019-03-07 11:25:32 -05:00
// Permutation dimension for the type of signal being denoised.
2019-01-07 17:22:05 -05:00
class FSignalProcessingDim : SHADER_PERMUTATION_ENUM_CLASS ( " DIM_SIGNAL_PROCESSING " , ESignalProcessing ) ;
2019-03-07 11:25:32 -05:00
// Permutation dimension for the number of signal being denoised at the same time.
class FSignalBatchSizeDim : SHADER_PERMUTATION_RANGE_INT ( " DIM_SIGNAL_BATCH_SIZE " , 1 , IScreenSpaceDenoiser : : kMaxBatchSize ) ;
// Permutation dimension for denoising multiple sample at same time.
class FMultiSPPDim : SHADER_PERMUTATION_BOOL ( " DIM_MULTI_SPP " ) ;
const TCHAR * const kInjestResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.Injest0 " ) ,
TEXT ( " Shadow.Denoiser.Injest1 " ) ,
2020-04-02 22:21:32 -04:00
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
// Reflections
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// AmbientOcclusion
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
2019-03-07 11:25:32 -05:00
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
} ;
2020-07-06 18:58:26 -04:00
const TCHAR * const kReduceResourceNames [ ] = {
// ShadowVisibilityMask
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// Reflections
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// AmbientOcclusion
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// DiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// DiffuseSphericalHarmonic
TEXT ( " DiffuseHarmonicReduce0 " ) ,
TEXT ( " DiffuseHarmonicReduce1 " ) ,
TEXT ( " DiffuseHarmonicReduce2 " ) ,
TEXT ( " DiffuseHarmonicReduce3 " ) ,
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
} ;
2019-01-07 17:22:05 -05:00
2019-02-14 14:23:27 -05:00
const TCHAR * const kReconstructionResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.Reconstruction0 " ) ,
TEXT ( " Shadow.Denoiser.Reconstruction1 " ) ,
TEXT ( " Shadow.Denoiser.Reconstruction2 " ) ,
TEXT ( " Shadow.Denoiser.Reconstruction3 " ) ,
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
TEXT ( " PolychromaticPenumbraHarmonicReconstruction0 " ) ,
TEXT ( " PolychromaticPenumbraHarmonicReconstruction1 " ) ,
TEXT ( " PolychromaticPenumbraHarmonicReconstruction2 " ) ,
TEXT ( " PolychromaticPenumbraHarmonicReconstruction3 " ) ,
2019-02-14 14:23:27 -05:00
// Reflections
2021-01-29 06:49:13 -04:00
TEXT ( " Reflections.Denoiser.Reconstruction0 " ) ,
TEXT ( " Reflections.Denoiser.Reconstruction1 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// AmbientOcclusion
2021-01-29 06:49:13 -04:00
TEXT ( " AO.Denoiser.Reconstruction0 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
TEXT ( " DiffuseIndirectReconstruction0 " ) ,
TEXT ( " DiffuseIndirectReconstruction1 " ) ,
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
TEXT ( " DiffuseHarmonicReconstruction0 " ) ,
TEXT ( " DiffuseHarmonicReconstruction1 " ) ,
TEXT ( " DiffuseHarmonicReconstruction2 " ) ,
TEXT ( " DiffuseHarmonicReconstruction3 " ) ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
2021-01-29 06:49:13 -04:00
TEXT ( " SSGI.Denoiser.Reconstruction0 " ) ,
TEXT ( " SSGI.Denoiser.Reconstruction1 " ) ,
2019-10-01 13:03:04 -04:00
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
} ;
2019-03-15 13:58:07 -04:00
const TCHAR * const kPreConvolutionResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.PreConvolution0 " ) ,
TEXT ( " Shadow.Denoiser.PreConvolution1 " ) ,
TEXT ( " Shadow.Denoiser.PreConvolution2 " ) ,
TEXT ( " Shadow.Denoiser.PreConvolution3 " ) ,
2019-03-15 13:58:07 -04:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-03-15 13:58:07 -04:00
// Reflections
2021-01-29 06:49:13 -04:00
TEXT ( " Reflections.Denoiser.PreConvolution0 " ) ,
TEXT ( " Reflections.Denoiser.PreConvolution1 " ) ,
2019-03-15 13:58:07 -04:00
nullptr ,
nullptr ,
// AmbientOcclusion
2021-01-29 06:49:13 -04:00
TEXT ( " AO.Denoiser.PreConvolution0 " ) ,
2019-03-15 13:58:07 -04:00
nullptr ,
nullptr ,
nullptr ,
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
TEXT ( " DiffuseIndirectPreConvolution0 " ) ,
TEXT ( " DiffuseIndirectPreConvolution1 " ) ,
2019-03-15 13:58:07 -04:00
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-03-15 13:58:07 -04:00
} ;
2019-02-14 14:23:27 -05:00
const TCHAR * const kRejectionPreConvolutionResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.RejectionPreConvolution0 " ) ,
TEXT ( " Shadow.Denoiser.RejectionPreConvolution1 " ) ,
TEXT ( " Shadow.Denoiser.RejectionPreConvolution2 " ) ,
TEXT ( " Shadow.Denoiser.RejectionPreConvolution3 " ) ,
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// Reflections
2021-01-29 06:49:13 -04:00
TEXT ( " Reflections.Denoiser.RejectionPreConvolution0 " ) ,
TEXT ( " Reflections.Denoiser.RejectionPreConvolution1 " ) ,
TEXT ( " Reflections.Denoiser.RejectionPreConvolution2 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
2019-02-14 14:23:27 -05:00
// AmbientOcclusion
2021-01-29 06:49:13 -04:00
TEXT ( " AO.Denoiser.RejectionPreConvolution0 " ) ,
2019-02-14 14:23:27 -05:00
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
nullptr ,
2019-02-14 14:23:27 -05:00
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
2019-02-14 14:23:27 -05:00
nullptr ,
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
} ;
const TCHAR * const kTemporalAccumulationResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.TemporalAccumulation0 " ) ,
TEXT ( " Shadow.Denoiser.TemporalAccumulation1 " ) ,
TEXT ( " Shadow.Denoiser.TemporalAccumulation2 " ) ,
TEXT ( " Shadow.Denoiser.TemporalAccumulation3 " ) ,
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
TEXT ( " PolychromaticPenumbraHistory0 " ) ,
TEXT ( " PolychromaticPenumbraHistory1 " ) ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// Reflections
2021-01-29 06:49:13 -04:00
TEXT ( " Reflections.Denoiser.TemporalAccumulation0 " ) ,
TEXT ( " Reflections.Denoiser.TemporalAccumulation1 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// AmbientOcclusion
2021-01-29 06:49:13 -04:00
TEXT ( " AO.Denoiser.TemporalAccumulation0 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
2021-01-29 06:49:13 -04:00
TEXT ( " DiffuseIndirect.Denoiser.TemporalAccumulation0 " ) ,
TEXT ( " DiffuseIndirect.Denoiser.TemporalAccumulation1 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
TEXT ( " DiffuseHarmonicTemporalAccumulation0 " ) ,
TEXT ( " DiffuseHarmonicTemporalAccumulation1 " ) ,
TEXT ( " DiffuseHarmonicTemporalAccumulation2 " ) ,
TEXT ( " DiffuseHarmonicTemporalAccumulation3 " ) ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
2021-01-29 06:49:13 -04:00
TEXT ( " SSGI.Denoiser.TemporalAccumulation0 " ) ,
TEXT ( " SSGI.Denoiser.TemporalAccumulation1 " ) ,
2019-10-01 13:03:04 -04:00
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
TEXT ( " ProbeHierarchy.TemporalAccumulation0 " ) ,
TEXT ( " ProbeHierarchy.TemporalAccumulation1 " ) ,
TEXT ( " ProbeHierarchy.TemporalAccumulation2 " ) ,
nullptr ,
2019-02-14 14:23:27 -05:00
} ;
const TCHAR * const kHistoryConvolutionResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.HistoryConvolution0 " ) ,
TEXT ( " Shadow.Denoiser.HistoryConvolution1 " ) ,
TEXT ( " Shadow.Denoiser.HistoryConvolution2 " ) ,
TEXT ( " Shadow.Denoiser.HistoryConvolution3 " ) ,
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// Reflections
2021-01-29 06:49:13 -04:00
TEXT ( " Reflections.Denoiser.HistoryConvolution0 " ) ,
TEXT ( " Reflections.Denoiser.HistoryConvolution1 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
// AmbientOcclusion
2021-01-29 06:49:13 -04:00
TEXT ( " AO.Denoiser.HistoryConvolution0 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
2021-01-29 06:49:13 -04:00
TEXT ( " DiffuseIndirect.Denoiser.HistoryConvolution0 " ) ,
TEXT ( " DiffuseIndirect.Denoiser.HistoryConvolution1 " ) ,
2019-06-11 18:27:07 -04:00
nullptr ,
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
} ;
const TCHAR * const kDenoiserOutputResourceNames [ ] = {
2019-10-01 13:03:04 -04:00
// ShadowVisibilityMask
2021-01-29 06:49:13 -04:00
TEXT ( " Shadow.Denoiser.DenoiserOutput0 " ) ,
TEXT ( " Shadow.Denoiser.DenoiserOutput1 " ) ,
TEXT ( " Shadow.Denoiser.DenoiserOutput2 " ) ,
TEXT ( " Shadow.Denoiser.DenoiserOutput3 " ) ,
2019-03-07 11:25:32 -05:00
2019-09-14 09:45:25 -04:00
// PolychromaticPenumbraHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-03-07 11:25:32 -05:00
// Reflections
nullptr ,
nullptr ,
nullptr ,
nullptr ,
// AmbientOcclusion
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-06-11 18:27:07 -04:00
// DiffuseIndirect
2019-03-07 11:25:32 -05:00
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
nullptr ,
2019-09-14 09:45:25 -04:00
// DiffuseSphericalHarmonic
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-10-01 13:03:04 -04:00
// ScreenSpaceDiffuseIndirect
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2020-07-06 18:58:26 -04:00
// IndirectProbeHierarchy
nullptr ,
nullptr ,
nullptr ,
nullptr ,
2019-02-14 14:23:27 -05:00
} ;
2019-09-28 08:19:35 -04:00
static_assert ( UE_ARRAY_COUNT ( kReconstructionResourceNames ) = = int32 ( ESignalProcessing : : MAX ) * kMaxBufferProcessingCount , " You forgot me! " ) ;
static_assert ( UE_ARRAY_COUNT ( kRejectionPreConvolutionResourceNames ) = = int32 ( ESignalProcessing : : MAX ) * kMaxBufferProcessingCount , " You forgot me! " ) ;
static_assert ( UE_ARRAY_COUNT ( kTemporalAccumulationResourceNames ) = = int32 ( ESignalProcessing : : MAX ) * kMaxBufferProcessingCount , " You forgot me! " ) ;
static_assert ( UE_ARRAY_COUNT ( kHistoryConvolutionResourceNames ) = = int32 ( ESignalProcessing : : MAX ) * kMaxBufferProcessingCount , " You forgot me! " ) ;
static_assert ( UE_ARRAY_COUNT ( kDenoiserOutputResourceNames ) = = int32 ( ESignalProcessing : : MAX ) * kMaxBufferProcessingCount , " You forgot me! " ) ;
2019-02-14 14:23:27 -05:00
2019-10-11 15:33:31 -04:00
/** Returns whether should compile pipeline for a given shader platform.*/
2021-01-21 16:22:06 -04:00
static bool ShouldCompileSignalPipeline ( ESignalProcessing SignalProcessing , EShaderPlatform Platform )
2018-12-18 21:41:17 -05:00
{
2021-04-26 15:47:32 -04:00
if ( SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect )
2019-08-20 13:59:54 -04:00
{
2021-11-18 14:37:34 -05:00
return FDataDrivenShaderPlatformInfo : : GetCompileSignalProcessingPipeline ( Platform ) | | FDataDrivenShaderPlatformInfo : : GetSupportsSSDIndirect ( Platform ) ;
2018-12-18 21:41:17 -05:00
}
2020-07-06 18:58:26 -04:00
else if ( SignalProcessing = = ESignalProcessing : : Reflections )
2019-10-01 13:03:04 -04:00
{
2021-11-18 14:37:34 -05:00
return RHISupportsRayTracingShaders ( Platform ) ;
2019-10-01 13:03:04 -04:00
}
else if (
SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask | |
2019-06-11 18:27:07 -04:00
SignalProcessing = = ESignalProcessing : : AmbientOcclusion | |
SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion )
{
// Only for ray tracing denoising.
2019-10-11 15:33:31 -04:00
return RHISupportsRayTracingShaders ( Platform ) ;
2019-06-11 18:27:07 -04:00
}
2020-04-29 16:23:18 -04:00
else if ( SignalProcessing = = ESignalProcessing : : PolychromaticPenumbraHarmonic )
{
return false ;
}
2020-07-06 18:58:26 -04:00
else if (
SignalProcessing = = ESignalProcessing : : DiffuseSphericalHarmonic | |
SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy )
{
return DoesPlatformSupportLumenGI ( Platform ) ;
}
2019-06-11 18:27:07 -04:00
check ( 0 ) ;
return false ;
}
2018-12-18 21:41:17 -05:00
/** Shader parameter structure used for all shaders. */
BEGIN_SHADER_PARAMETER_STRUCT ( FSSDCommonParameters , )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( Denoiser : : FCommonShaderParameters , PublicCommonParameters )
2019-09-14 09:45:25 -04:00
SHADER_PARAMETER ( FIntPoint , ViewportMin )
SHADER_PARAMETER ( FIntPoint , ViewportMax )
2021-09-22 10:01:48 -04:00
SHADER_PARAMETER ( FVector4f , ThreadIdToBufferUV )
2021-11-18 14:37:34 -05:00
SHADER_PARAMETER ( FVector2f , BufferUVToOutputPixelPosition )
2021-05-05 15:07:25 -04:00
SHADER_PARAMETER ( FMatrix44f , ScreenToView )
2021-11-18 14:37:34 -05:00
SHADER_PARAMETER ( FVector2f , BufferUVBilinearCorrection )
2019-09-14 09:45:25 -04:00
2019-06-11 18:27:07 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FSceneTextureParameters , SceneTextures )
2022-03-08 13:29:54 -05:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FStrataGlobalUniformParameters , Strata )
2019-10-01 13:03:04 -04:00
SHADER_PARAMETER_RDG_TEXTURE_ARRAY ( Texture2D < uint > , CompressedMetadata , [ kCompressedMetadataTextures ] )
2020-09-24 00:43:27 -04:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D , EyeAdaptationTexture )
2018-12-18 21:41:17 -05:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D < uint > , TileClassificationTexture )
SHADER_PARAMETER_STRUCT_REF ( FViewUniformShaderParameters , ViewUniformBuffer )
2021-04-15 12:55:18 -04:00
SHADER_PARAMETER ( uint32 , FrameIndex )
2018-12-18 21:41:17 -05:00
END_SHADER_PARAMETER_STRUCT ( )
2019-10-01 13:03:04 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FSSDSignalSRVs , )
SHADER_PARAMETER_RDG_TEXTURE_SRV_ARRAY ( Texture2D , Textures , [ kMaxBufferProcessingCount ] )
END_SHADER_PARAMETER_STRUCT ( )
2019-03-07 11:25:32 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FSSDSignalUAVs , )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_RDG_TEXTURE_UAV_ARRAY ( RWTexture2D , UAVs , [ kMaxBufferProcessingCount ] )
2019-01-22 19:17:17 -05:00
END_SHADER_PARAMETER_STRUCT ( )
2018-12-18 21:41:17 -05:00
/** Shader parameter structure to have all information to spatial filtering. */
BEGIN_SHADER_PARAMETER_STRUCT ( FSSDConvolutionMetaData , )
2021-09-22 10:01:48 -04:00
SHADER_PARAMETER_ARRAY ( FVector4f , LightPositionAndRadius , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
SHADER_PARAMETER_ARRAY ( FVector4f , LightDirectionAndLength , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
2021-10-04 09:14:58 -04:00
SHADER_PARAMETER_SCALAR_ARRAY ( float , HitDistanceToWorldBluringRadius , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
SHADER_PARAMETER_SCALAR_ARRAY ( uint32 , LightType , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
2018-12-18 21:41:17 -05:00
END_SHADER_PARAMETER_STRUCT ( )
2019-03-07 11:25:32 -05:00
FSSDSignalTextures CreateMultiplexedTextures (
FRDGBuilder & GraphBuilder ,
int32 TextureCount ,
const TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > & DescArray ,
const TCHAR * const * TextureNames )
2018-12-18 21:41:17 -05:00
{
2019-03-07 11:25:32 -05:00
check ( TextureCount < = kMaxBufferProcessingCount ) ;
FSSDSignalTextures SignalTextures ;
for ( int32 i = 0 ; i < TextureCount ; i + + )
{
const TCHAR * TextureName = TextureNames [ i ] ;
SignalTextures . Textures [ i ] = GraphBuilder . CreateTexture ( DescArray [ i ] , TextureName ) ;
}
return SignalTextures ;
}
2018-12-18 21:41:17 -05:00
2019-10-01 13:03:04 -04:00
FSSDSignalSRVs CreateMultiplexedUintSRVs ( FRDGBuilder & GraphBuilder , const FSSDSignalTextures & SignalTextures )
{
FSSDSignalSRVs SRVs ;
for ( int32 i = 0 ; i < kMaxBufferProcessingCount ; i + + )
{
if ( SignalTextures . Textures [ i ] )
{
EPixelFormat Format = SignalTextures . Textures [ i ] - > Desc . Format ;
int32 Bytes = GPixelFormats [ Format ] . BlockBytes ;
EPixelFormat UIntFormat = PF_Unknown ;
if ( Bytes = = 1 )
UIntFormat = PF_R8_UINT ;
else if ( Bytes = = 2 )
UIntFormat = PF_R16_UINT ;
else if ( Bytes = = 4 )
UIntFormat = PF_R32_UINT ;
else if ( Bytes = = 8 )
UIntFormat = PF_R32G32_UINT ;
else if ( Bytes = = 16 )
UIntFormat = PF_R32G32B32A32_UINT ;
else
{
check ( 0 ) ;
}
SRVs . Textures [ i ] = GraphBuilder . CreateSRV ( FRDGTextureSRVDesc : : CreateWithPixelFormat ( SignalTextures . Textures [ i ] , UIntFormat ) ) ;
}
}
return SRVs ;
}
2019-09-14 09:45:25 -04:00
FSSDSignalUAVs CreateMultiplexedUAVs ( FRDGBuilder & GraphBuilder , const FSSDSignalTextures & SignalTextures , int32 MipLevel = 0 )
2019-03-06 23:59:18 -05:00
{
2019-03-07 11:25:32 -05:00
FSSDSignalUAVs UAVs ;
for ( int32 i = 0 ; i < kMaxBufferProcessingCount ; i + + )
{
if ( SignalTextures . Textures [ i ] )
2019-09-14 09:45:25 -04:00
UAVs . UAVs [ i ] = GraphBuilder . CreateUAV ( FRDGTextureUAVDesc ( SignalTextures . Textures [ i ] , MipLevel ) ) ;
2019-03-07 11:25:32 -05:00
}
return UAVs ;
}
2019-10-01 13:03:04 -04:00
class FSSDCompressMetadataCS : public FGlobalShader
{
DECLARE_GLOBAL_SHADER ( FSSDCompressMetadataCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FSSDCompressMetadataCS , FGlobalShader ) ;
class FMetadataLayoutDim : SHADER_PERMUTATION_ENUM_CLASS ( " DIM_METADATA_LAYOUT " , ECompressedMetadataLayout ) ;
using FPermutationDomain = TShaderPermutationDomain < FMetadataLayoutDim > ;
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
FPermutationDomain PermutationVector ( Parameters . PermutationId ) ;
if ( PermutationVector . Get < FMetadataLayoutDim > ( ) = = ECompressedMetadataLayout : : Disabled )
{
return false ;
}
2020-07-06 18:58:26 -04:00
// Precomputed by denoiser caller.
if ( PermutationVector . Get < FMetadataLayoutDim > ( ) = = ECompressedMetadataLayout : : FedDepthAndShadingModelID )
{
return false ;
}
2020-01-16 14:00:26 -05:00
return ShouldCompileSignalPipeline ( ESignalProcessing : : ScreenSpaceDiffuseIndirect , Parameters . Platform ) ;
2019-10-01 13:03:04 -04:00
}
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDCommonParameters , CommonParameters )
SHADER_PARAMETER_RDG_TEXTURE_UAV_ARRAY ( RWTexture2D < uint > , CompressedMetadataOutput , [ kCompressedMetadataTextures ] )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2019-03-07 11:25:32 -05:00
2019-06-11 18:27:07 -04:00
class FSSDInjestCS : public FGlobalShader
2019-03-07 11:25:32 -05:00
{
DECLARE_GLOBAL_SHADER ( FSSDInjestCS ) ;
2019-06-11 18:27:07 -04:00
SHADER_USE_PARAMETER_STRUCT ( FSSDInjestCS , FGlobalShader ) ;
2019-03-07 11:25:32 -05:00
using FPermutationDomain = TShaderPermutationDomain < FSignalProcessingDim , FSignalBatchSizeDim , FMultiSPPDim > ;
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
FPermutationDomain PermutationVector ( Parameters . PermutationId ) ;
ESignalProcessing SignalProcessing = PermutationVector . Get < FSignalProcessingDim > ( ) ;
// Only compile this shader for signal processing that uses it.
if ( ! SignalUsesInjestion ( SignalProcessing ) )
{
return false ;
}
// Not all signal processing allow to batch multiple signals at the same time.
if ( PermutationVector . Get < FSignalBatchSizeDim > ( ) > SignalMaxBatchSize ( SignalProcessing ) )
{
return false ;
}
// Only compiler multi SPP permutation for signal that supports it.
if ( PermutationVector . Get < FMultiSPPDim > ( ) & & ! SignalSupportMultiSPP ( SignalProcessing ) )
{
return false ;
}
2019-10-01 13:03:04 -04:00
// Compile out the shader if this permutation gets remapped.
if ( RemapPermutationVector ( PermutationVector ) ! = PermutationVector )
{
return false ;
}
2019-06-11 18:27:07 -04:00
return ShouldCompileSignalPipeline ( SignalProcessing , Parameters . Platform ) ;
2019-03-07 11:25:32 -05:00
}
2019-03-06 23:59:18 -05:00
2019-10-01 13:03:04 -04:00
static FPermutationDomain RemapPermutationVector ( FPermutationDomain PermutationVector )
{
ESignalProcessing SignalProcessing = PermutationVector . Get < FSignalProcessingDim > ( ) ;
// force use the multi sample per pixel code path.
if ( ! SignalSupport1SPP ( SignalProcessing ) )
{
PermutationVector . Set < FMultiSPPDim > ( true ) ;
}
return PermutationVector ;
}
2019-03-06 23:59:18 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDCommonParameters , CommonParameters )
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDConvolutionMetaData , ConvolutionMetaData )
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , SignalInput )
SHADER_PARAMETER_STRUCT ( FSSDSignalUAVs , SignalOutput )
2020-09-01 14:07:48 -04:00
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D , DebugOutput )
2018-12-18 21:41:17 -05:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2019-06-11 18:27:07 -04:00
class FSSDSpatialAccumulationCS : public FGlobalShader
2018-12-18 21:41:17 -05:00
{
DECLARE_GLOBAL_SHADER ( FSSDSpatialAccumulationCS ) ;
2019-06-11 18:27:07 -04:00
SHADER_USE_PARAMETER_STRUCT ( FSSDSpatialAccumulationCS , FGlobalShader ) ;
2019-03-07 11:25:32 -05:00
static const uint32 kGroupSize = 8 ;
2019-01-10 17:58:11 -05:00
2019-01-10 20:14:17 -05:00
enum class EStage
{
// Spatial kernel used to process raw input for the temporal accumulation.
ReConstruction ,
2019-03-15 13:58:07 -04:00
// Spatial kernel to pre filter.
PreConvolution ,
2019-02-14 14:23:27 -05:00
// Spatial kernel used to pre convolve history rejection.
RejectionPreConvolution ,
2019-01-10 20:14:17 -05:00
// Spatial kernel used to post filter the temporal accumulation.
PostFiltering ,
2019-03-07 11:25:32 -05:00
// Final spatial kernel, that may output specific buffer encoding to integrate with the rest of the renderer
FinalOutput ,
2019-01-10 20:14:17 -05:00
MAX
} ;
class FStageDim : SHADER_PERMUTATION_ENUM_CLASS ( " DIM_STAGE " , EStage ) ;
2019-01-17 17:03:37 -05:00
class FUpscaleDim : SHADER_PERMUTATION_BOOL ( " DIM_UPSCALE " ) ;
2019-01-10 20:14:17 -05:00
2019-03-07 11:25:32 -05:00
using FPermutationDomain = TShaderPermutationDomain < FSignalProcessingDim , FStageDim , FUpscaleDim , FSignalBatchSizeDim , FMultiSPPDim > ;
2019-01-10 20:14:17 -05:00
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
2020-10-08 09:43:48 -04:00
2019-01-10 20:14:17 -05:00
FPermutationDomain PermutationVector ( Parameters . PermutationId ) ;
2019-03-07 11:25:32 -05:00
ESignalProcessing SignalProcessing = PermutationVector . Get < FSignalProcessingDim > ( ) ;
2019-01-10 20:14:17 -05:00
2019-03-07 11:25:32 -05:00
// Only constant pixel density pass layout uses this shader.
if ( ! UsesConstantPixelDensityPassLayout ( PermutationVector . Get < FSignalProcessingDim > ( ) ) )
{
return false ;
}
// Not all signal processing allow to batch multiple signals at the same time.
if ( PermutationVector . Get < FSignalBatchSizeDim > ( ) > SignalMaxBatchSize ( SignalProcessing ) )
2019-01-10 20:14:17 -05:00
{
return false ;
}
2019-01-17 17:03:37 -05:00
// Only reconstruction have upscale capability for now.
if ( PermutationVector . Get < FUpscaleDim > ( ) & &
PermutationVector . Get < FStageDim > ( ) ! = EStage : : ReConstruction )
{
return false ;
}
2019-09-14 09:45:25 -04:00
// Only upscale is only for signal that needs it.
if ( PermutationVector . Get < FUpscaleDim > ( ) & &
! SignalSupportsUpscaling ( SignalProcessing ) )
{
return false ;
}
2019-03-15 13:58:07 -04:00
// Only compile pre convolution for signal that uses it.
if ( ! SignalUsesPreConvolution ( SignalProcessing ) & &
PermutationVector . Get < FStageDim > ( ) = = EStage : : PreConvolution )
{
return false ;
}
2019-02-14 14:23:27 -05:00
// Only compile rejection pre convolution for signal that uses it.
2019-03-07 11:25:32 -05:00
if ( ! SignalUsesRejectionPreConvolution ( SignalProcessing ) & &
2019-02-14 14:23:27 -05:00
PermutationVector . Get < FStageDim > ( ) = = EStage : : RejectionPreConvolution )
{
return false ;
}
2019-09-14 09:45:25 -04:00
// Only compile post convolution for signal that uses it.
if ( ! SignalUsesPostConvolution ( SignalProcessing ) & &
PermutationVector . Get < FStageDim > ( ) = = EStage : : PostFiltering )
{
return false ;
}
2019-03-07 11:25:32 -05:00
// Only compile final convolution for signal that uses it.
if ( ! SignalUsesFinalConvolution ( SignalProcessing ) & &
PermutationVector . Get < FStageDim > ( ) = = EStage : : FinalOutput )
{
return false ;
}
// Only compile multi SPP permutation for signal that supports it.
if ( PermutationVector . Get < FStageDim > ( ) = = EStage : : ReConstruction & &
PermutationVector . Get < FMultiSPPDim > ( ) & & ! SignalSupportMultiSPP ( SignalProcessing ) )
{
return false ;
}
2019-06-11 18:27:07 -04:00
// Compile out the shader if this permutation gets remapped.
if ( RemapPermutationVector ( PermutationVector ) ! = PermutationVector )
2019-03-07 11:25:32 -05:00
{
return false ;
}
2019-06-11 18:27:07 -04:00
return ShouldCompileSignalPipeline ( SignalProcessing , Parameters . Platform ) ;
}
static FPermutationDomain RemapPermutationVector ( FPermutationDomain PermutationVector )
{
ESignalProcessing SignalProcessing = PermutationVector . Get < FSignalProcessingDim > ( ) ;
if ( PermutationVector . Get < FStageDim > ( ) = = EStage : : ReConstruction )
{
// force use the multi sample per pixel code path.
if ( ! SignalSupport1SPP ( SignalProcessing ) )
{
PermutationVector . Set < FMultiSPPDim > ( true ) ;
}
}
else
{
PermutationVector . Set < FMultiSPPDim > ( true ) ;
}
return PermutationVector ;
2019-01-10 20:14:17 -05:00
}
2018-12-18 21:41:17 -05:00
2021-02-18 18:13:28 -04:00
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
// TODO: UECON-464 - force optimizations to workaround shader compiler issue on DXC until fixed by MS
OutEnvironment . CompilerFlags . Add ( CFLAG_ForceOptimization ) ;
}
2018-12-18 21:41:17 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-09-22 10:01:48 -04:00
SHADER_PARAMETER_ARRAY ( FVector4f , InputBufferUVMinMax , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
2019-09-14 09:45:25 -04:00
2019-01-10 20:14:17 -05:00
SHADER_PARAMETER ( uint32 , MaxSampleCount )
2020-04-29 16:25:02 -04:00
SHADER_PARAMETER ( uint32 , PreviousCumulativeMaxSampleCount )
2019-01-17 17:03:37 -05:00
SHADER_PARAMETER ( int32 , UpscaleFactor )
2019-02-14 14:23:27 -05:00
SHADER_PARAMETER ( float , KernelSpreadFactor )
2019-09-14 09:45:25 -04:00
SHADER_PARAMETER ( float , HarmonicPeriode )
2019-06-11 18:27:07 -04:00
2018-12-18 21:41:17 -05:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDCommonParameters , CommonParameters )
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDConvolutionMetaData , ConvolutionMetaData )
2018-12-18 21:41:17 -05:00
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , SignalInput )
2019-10-01 13:03:04 -04:00
SHADER_PARAMETER_STRUCT ( FSSDSignalSRVs , SignalInputUint )
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT ( FSSDSignalUAVs , SignalOutput )
2019-01-14 15:34:19 -05:00
2019-10-02 18:47:26 -04:00
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D , DebugOutput ) // TODO(Denoiser): remove
2018-12-18 21:41:17 -05:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2019-06-11 18:27:07 -04:00
class FSSDTemporalAccumulationCS : public FGlobalShader
2018-12-18 21:41:17 -05:00
{
DECLARE_GLOBAL_SHADER ( FSSDTemporalAccumulationCS ) ;
2019-06-11 18:27:07 -04:00
SHADER_USE_PARAMETER_STRUCT ( FSSDTemporalAccumulationCS , FGlobalShader ) ;
2018-12-18 21:41:17 -05:00
2019-03-07 11:25:32 -05:00
using FPermutationDomain = TShaderPermutationDomain < FSignalProcessingDim , FSignalBatchSizeDim > ;
2019-01-07 17:22:05 -05:00
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
FPermutationDomain PermutationVector ( Parameters . PermutationId ) ;
2019-03-07 11:25:32 -05:00
ESignalProcessing SignalProcessing = PermutationVector . Get < FSignalProcessingDim > ( ) ;
2019-01-07 17:22:05 -05:00
2019-03-07 11:25:32 -05:00
// Only constant pixel density pass layout uses this shader.
if ( ! UsesConstantPixelDensityPassLayout ( SignalProcessing ) )
{
return false ;
}
// Not all signal processing allow to batch multiple signals at the same time.
if ( PermutationVector . Get < FSignalBatchSizeDim > ( ) > SignalMaxBatchSize ( SignalProcessing ) )
2019-01-07 17:22:05 -05:00
{
return false ;
}
2019-06-11 18:27:07 -04:00
return ShouldCompileSignalPipeline ( SignalProcessing , Parameters . Platform ) ;
2019-01-07 17:22:05 -05:00
}
2018-12-18 21:41:17 -05:00
2021-02-18 18:13:28 -04:00
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
// TODO: UECON-464 - force optimizations to workaround shader compiler issue on DXC until fixed by MS
OutEnvironment . CompilerFlags . Add ( CFLAG_ForceOptimization ) ;
}
2019-02-14 14:23:27 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-10-04 09:14:58 -04:00
SHADER_PARAMETER_SCALAR_ARRAY ( int32 , bCameraCut , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
2019-06-11 18:27:07 -04:00
SHADER_PARAMETER ( float , HistoryPreExposureCorrection )
2021-09-22 10:01:48 -04:00
SHADER_PARAMETER ( FVector4f , ScreenPosToHistoryBufferUV )
SHADER_PARAMETER ( FVector4f , HistoryBufferSizeAndInvSize )
SHADER_PARAMETER ( FVector4f , HistoryBufferUVMinMax )
SHADER_PARAMETER_ARRAY ( FVector4f , HistoryBufferScissorUVMinMax , [ IScreenSpaceDenoiser : : kMaxBatchSize ] )
SHADER_PARAMETER ( FVector4f , PrevSceneBufferUVToScreenPosition )
2019-09-14 09:45:25 -04:00
2019-02-14 14:23:27 -05:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDCommonParameters , CommonParameters )
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDConvolutionMetaData , ConvolutionMetaData )
2019-02-14 14:23:27 -05:00
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , SignalInput )
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , HistoryRejectionSignal )
SHADER_PARAMETER_STRUCT ( FSSDSignalUAVs , SignalHistoryOutput )
2019-02-14 14:23:27 -05:00
2019-03-07 11:25:32 -05:00
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , PrevHistory )
2019-02-14 14:23:27 -05:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D , PrevDepthBuffer )
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D , PrevGBufferA )
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D , PrevGBufferB )
2019-10-01 13:03:04 -04:00
SHADER_PARAMETER_RDG_TEXTURE_ARRAY ( Texture2D < uint > , PrevCompressedMetadata , [ kCompressedMetadataTextures ] )
2019-10-02 18:47:26 -04:00
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D , DebugOutput ) // TODO(Denoiser): remove
2019-02-14 14:23:27 -05:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2019-09-14 09:45:25 -04:00
class FSSDComposeHarmonicsCS : public FGlobalShader
{
DECLARE_GLOBAL_SHADER ( FSSDComposeHarmonicsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FSSDComposeHarmonicsCS , FGlobalShader ) ;
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
return ShouldCompileSignalPipeline ( ESignalProcessing : : PolychromaticPenumbraHarmonic , Parameters . Platform ) ;
}
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_STRUCT_ARRAY ( FSSDSignalTextures , SignalHarmonics , [ IScreenSpaceDenoiser : : kMultiPolychromaticPenumbraHarmonics ] )
SHADER_PARAMETER_STRUCT ( FSSDSignalTextures , SignalIntegrand )
SHADER_PARAMETER_STRUCT_INCLUDE ( FSSDCommonParameters , CommonParameters )
SHADER_PARAMETER_STRUCT ( FSSDSignalUAVs , SignalOutput )
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D , DebugOutput )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2019-10-01 13:03:04 -04:00
IMPLEMENT_GLOBAL_SHADER ( FSSDCompressMetadataCS , " /Engine/Private/ScreenSpaceDenoise/SSDCompressMetadata.usf " , " MainCS " , SF_Compute ) ;
2019-03-07 11:25:32 -05:00
IMPLEMENT_GLOBAL_SHADER ( FSSDInjestCS , " /Engine/Private/ScreenSpaceDenoise/SSDInjest.usf " , " MainCS " , SF_Compute ) ;
2019-02-14 14:23:27 -05:00
IMPLEMENT_GLOBAL_SHADER ( FSSDSpatialAccumulationCS , " /Engine/Private/ScreenSpaceDenoise/SSDSpatialAccumulation.usf " , " MainCS " , SF_Compute ) ;
IMPLEMENT_GLOBAL_SHADER ( FSSDTemporalAccumulationCS , " /Engine/Private/ScreenSpaceDenoise/SSDTemporalAccumulation.usf " , " MainCS " , SF_Compute ) ;
2019-09-14 09:45:25 -04:00
IMPLEMENT_GLOBAL_SHADER ( FSSDComposeHarmonicsCS , " /Engine/Private/ScreenSpaceDenoise/SSDComposeHarmonics.usf " , " MainCS " , SF_Compute ) ;
2018-12-18 21:41:17 -05:00
} // namespace
2020-01-24 18:07:01 -05:00
/** PrevViewInfo and PrevFrameViewInfo pooled render targets to use for temporal storage of scene textures. */
struct FViewInfoPooledRenderTargets
{
TRefCountPtr < IPooledRenderTarget > PrevDepthBuffer ;
TRefCountPtr < IPooledRenderTarget > PrevGBufferA ;
TRefCountPtr < IPooledRenderTarget > PrevGBufferB ;
TRefCountPtr < IPooledRenderTarget > PrevCompressedDepthViewNormal ;
TRefCountPtr < IPooledRenderTarget > * NextDepthBuffer ;
TRefCountPtr < IPooledRenderTarget > * NextGBufferA ;
TRefCountPtr < IPooledRenderTarget > * NextGBufferB ;
TRefCountPtr < IPooledRenderTarget > * NextCompressedDepthViewNormal ;
} ;
void SetupSceneViewInfoPooledRenderTargets (
const FViewInfo & View ,
FViewInfoPooledRenderTargets * OutViewInfoPooledRenderTargets )
{
auto & & PrevViewInfo = View . PrevViewInfo ;
auto & & PrevFrameViewInfo = View . ViewState - > PrevFrameViewInfo ;
OutViewInfoPooledRenderTargets - > PrevDepthBuffer = PrevViewInfo . DepthBuffer ;
OutViewInfoPooledRenderTargets - > PrevGBufferA = PrevViewInfo . GBufferA ;
OutViewInfoPooledRenderTargets - > PrevGBufferB = PrevViewInfo . GBufferB ;
OutViewInfoPooledRenderTargets - > PrevCompressedDepthViewNormal = PrevViewInfo . CompressedDepthViewNormal ;
OutViewInfoPooledRenderTargets - > NextDepthBuffer = & PrevFrameViewInfo . DepthBuffer ;
OutViewInfoPooledRenderTargets - > NextGBufferA = & PrevFrameViewInfo . GBufferA ;
OutViewInfoPooledRenderTargets - > NextGBufferB = & PrevFrameViewInfo . GBufferB ;
OutViewInfoPooledRenderTargets - > NextCompressedDepthViewNormal = & PrevFrameViewInfo . CompressedDepthViewNormal ;
}
void SetupImaginaryReflectionViewInfoPooledRenderTargets (
const FViewInfo & View ,
FViewInfoPooledRenderTargets * OutViewInfoPooledRenderTargets )
{
auto & & PrevViewInfo = View . PrevViewInfo ;
auto & & PrevFrameViewInfo = View . ViewState - > PrevFrameViewInfo ;
OutViewInfoPooledRenderTargets - > PrevDepthBuffer = PrevViewInfo . ImaginaryReflectionDepthBuffer ;
OutViewInfoPooledRenderTargets - > PrevGBufferA = PrevViewInfo . ImaginaryReflectionGBufferA ;
OutViewInfoPooledRenderTargets - > PrevGBufferB = nullptr ; // GBufferB not used
OutViewInfoPooledRenderTargets - > PrevCompressedDepthViewNormal = PrevViewInfo . ImaginaryReflectionCompressedDepthViewNormal ;
OutViewInfoPooledRenderTargets - > NextDepthBuffer = & PrevFrameViewInfo . ImaginaryReflectionDepthBuffer ;
OutViewInfoPooledRenderTargets - > NextGBufferA = & PrevFrameViewInfo . ImaginaryReflectionGBufferA ;
OutViewInfoPooledRenderTargets - > NextGBufferB = nullptr ; // GBufferB not used
OutViewInfoPooledRenderTargets - > NextCompressedDepthViewNormal = & PrevFrameViewInfo . ImaginaryReflectionCompressedDepthViewNormal ;
}
2020-07-06 18:58:26 -04:00
void Denoiser : : SetupCommonShaderParameters (
const FViewInfo & View ,
const FSceneTextureParameters & SceneTextures ,
const FIntRect DenoiserFullResViewport ,
float DenoisingResolutionFraction ,
Denoiser : : FCommonShaderParameters * OutPublicCommonParameters )
{
check ( OutPublicCommonParameters ) ;
2020-09-24 00:43:27 -04:00
FIntPoint FullResBufferExtent = SceneTextures . SceneDepthTexture - > Desc . Extent ;
2020-07-06 18:58:26 -04:00
FIntPoint DenoiserBufferExtent = FullResBufferExtent ;
FIntRect DenoiserViewport = DenoiserFullResViewport ;
if ( DenoisingResolutionFraction = = 0.5f )
{
DenoiserBufferExtent / = 2 ;
DenoiserViewport = FIntRect : : DivideAndRoundUp ( DenoiserViewport , 2 ) ;
}
2021-09-22 10:01:48 -04:00
OutPublicCommonParameters - > DenoiserBufferSizeAndInvSize = FVector4f (
2020-07-06 18:58:26 -04:00
float ( DenoiserBufferExtent . X ) ,
float ( DenoiserBufferExtent . Y ) ,
1.0f / float ( DenoiserBufferExtent . X ) ,
1.0f / float ( DenoiserBufferExtent . Y ) ) ;
OutPublicCommonParameters - > SceneBufferUVToScreenPosition . X = float ( FullResBufferExtent . X ) / float ( View . ViewRect . Width ( ) ) * 2.0f ;
OutPublicCommonParameters - > SceneBufferUVToScreenPosition . Y = - float ( FullResBufferExtent . Y ) / float ( View . ViewRect . Height ( ) ) * 2.0f ;
OutPublicCommonParameters - > SceneBufferUVToScreenPosition . Z = float ( View . ViewRect . Min . X ) / float ( View . ViewRect . Width ( ) ) * 2.0f - 1.0f ;
OutPublicCommonParameters - > SceneBufferUVToScreenPosition . W = - float ( View . ViewRect . Min . Y ) / float ( View . ViewRect . Height ( ) ) * 2.0f + 1.0f ;
2021-09-22 10:01:48 -04:00
OutPublicCommonParameters - > DenoiserBufferBilinearUVMinMax = FVector4f (
2020-07-06 18:58:26 -04:00
float ( DenoiserViewport . Min . X + 0.5f ) / float ( DenoiserBufferExtent . X ) ,
float ( DenoiserViewport . Min . Y + 0.5f ) / float ( DenoiserBufferExtent . Y ) ,
float ( DenoiserViewport . Max . X - 0.5f ) / float ( DenoiserBufferExtent . X ) ,
float ( DenoiserViewport . Max . Y - 0.5f ) / float ( DenoiserBufferExtent . Y ) ) ;
float TanHalfFieldOfView = View . ViewMatrices . GetInvProjectionMatrix ( ) . M [ 0 ] [ 0 ] ;
// Should be multiplied 0.5* for the diameter to radius, and by 2.0 because GetTanHalfFieldOfView() cover only half of the pixels.
OutPublicCommonParameters - > WorldDepthToPixelWorldRadius = TanHalfFieldOfView / float ( View . ViewRect . Width ( ) ) ;
}
2019-01-22 19:17:17 -05:00
/** Generic settings to denoise signal at constant pixel density across the viewport. */
struct FSSDConstantPixelDensitySettings
{
2019-10-01 13:03:04 -04:00
FIntRect FullResViewport ;
2019-01-22 19:17:17 -05:00
ESignalProcessing SignalProcessing ;
2019-03-07 11:25:32 -05:00
int32 SignalBatchSize = 1 ;
2019-09-14 09:45:25 -04:00
float HarmonicPeriode = 1.0f ;
2019-03-07 11:25:32 -05:00
int32 MaxInputSPP = 1 ;
2019-01-22 19:17:17 -05:00
float InputResolutionFraction = 1.0f ;
2019-10-01 13:03:04 -04:00
float DenoisingResolutionFraction = 1.0f ;
2019-09-14 09:45:25 -04:00
bool bEnableReconstruction = true ;
2019-01-22 19:17:17 -05:00
int32 ReconstructionSamples = 1 ;
2019-03-15 13:58:07 -04:00
int32 PreConvolutionCount = 0 ;
2020-09-24 00:43:27 -04:00
float KernelSpreadFactor = 8 ;
2019-01-22 19:17:17 -05:00
bool bUseTemporalAccumulation = false ;
int32 HistoryConvolutionSampleCount = 1 ;
2019-02-14 14:23:27 -05:00
float HistoryConvolutionKernelSpreadFactor = 1.0f ;
2019-09-14 09:45:25 -04:00
TStaticArray < FIntRect , IScreenSpaceDenoiser : : kMaxBatchSize > SignalScissor ;
2019-03-07 11:25:32 -05:00
TStaticArray < const FLightSceneInfo * , IScreenSpaceDenoiser : : kMaxBatchSize > LightSceneInfo ;
2020-07-06 18:58:26 -04:00
FRDGTextureRef CompressedDepthTexture = nullptr ;
FRDGTextureRef CompressedShadingModelTexture = nullptr ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
# if WITH_MGPU
FName EffectName ;
FSSDConstantPixelDensitySettings ( const FName & InEffectName )
: EffectName ( InEffectName )
{ }
# endif // WITH_MGPU
2019-01-22 19:17:17 -05:00
} ;
/** Denoises a signal at constant pixel density across the viewport. */
static void DenoiseSignalAtConstantPixelDensity (
2019-01-07 17:22:05 -05:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
2020-01-24 18:07:01 -05:00
const FViewInfoPooledRenderTargets & ViewInfoPooledRenderTargets ,
2019-01-22 19:17:17 -05:00
const FSSDSignalTextures & InputSignal ,
FSSDConstantPixelDensitySettings Settings ,
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevFilteringHistory ,
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewFilteringHistory ,
2019-01-22 19:17:17 -05:00
FSSDSignalTextures * OutputSignal )
2019-01-07 17:22:05 -05:00
{
2019-03-07 11:25:32 -05:00
check ( UsesConstantPixelDensityPassLayout ( Settings . SignalProcessing ) ) ;
2019-09-14 09:45:25 -04:00
// Make sure the viewport of the denoiser is within the viewport of the view.
{
FIntRect Union = View . ViewRect ;
2019-10-01 13:03:04 -04:00
Union . Union ( Settings . FullResViewport ) ;
2019-09-14 09:45:25 -04:00
check ( Union = = View . ViewRect ) ;
}
2019-10-01 10:25:32 -04:00
ensure ( Settings . InputResolutionFraction = = 1.0f | | Settings . InputResolutionFraction = = 0.5f | | Settings . InputResolutionFraction = = 0.25f ) ;
2019-02-14 14:23:27 -05:00
2019-03-07 11:25:32 -05:00
auto GetResourceNames = [ & ] ( const TCHAR * const ResourceNames [ ] )
2019-02-14 14:23:27 -05:00
{
2019-03-07 11:25:32 -05:00
return ResourceNames + ( int32 ( Settings . SignalProcessing ) * kMaxBufferProcessingCount ) ;
2019-02-14 14:23:27 -05:00
} ;
2019-01-17 17:03:37 -05:00
2019-10-01 13:03:04 -04:00
const bool bUseMultiInputSPPShaderPath = Settings . MaxInputSPP > 1 ;
2020-09-24 00:43:27 -04:00
FIntPoint FullResBufferExtent = SceneTextures . SceneDepthTexture - > Desc . Extent ;
2019-10-01 13:03:04 -04:00
FIntPoint BufferExtent = FullResBufferExtent ;
FIntRect Viewport = Settings . FullResViewport ;
if ( Settings . DenoisingResolutionFraction = = 0.5f )
{
BufferExtent / = 2 ;
Viewport = FIntRect : : DivideAndRoundUp ( Viewport , 2 ) ;
}
2019-03-07 11:25:32 -05:00
// Number of signal to batch.
int32 MaxSignalBatchSize = SignalMaxBatchSize ( Settings . SignalProcessing ) ;
check ( Settings . SignalBatchSize > = 1 & & Settings . SignalBatchSize < = MaxSignalBatchSize ) ;
// Number of texture per batched signal.
int32 InjestTextureCount = 0 ;
int32 ReconstructionTextureCount = 0 ;
int32 HistoryTextureCountPerSignal = 0 ;
2019-01-07 17:22:05 -05:00
// Descriptor to allocate internal denoising buffer.
2019-03-07 11:25:32 -05:00
bool bHasReconstructionLayoutDifferentFromHistory = false ;
TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > InjestDescs ;
TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > ReconstructionDescs ;
TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > HistoryDescs ;
2019-01-22 19:17:17 -05:00
FRDGTextureDesc DebugDesc ;
{
2019-09-14 09:45:25 -04:00
// Manually format texel in the shader to reduce VGPR pressure with overlapped texture fetched.
const bool bManualTexelFormatting = true ;
2019-03-12 10:09:36 -04:00
static const EPixelFormat PixelFormatPerChannel [ ] = {
PF_Unknown ,
PF_R16F ,
PF_G16R16F ,
PF_FloatRGBA , // there is no 16bits float RGB
PF_FloatRGBA ,
} ;
2020-09-24 00:43:27 -04:00
FRDGTextureDesc RefDesc = FRDGTextureDesc : : Create2D (
2019-09-14 09:45:25 -04:00
BufferExtent ,
2019-03-07 11:25:32 -05:00
PF_Unknown ,
FClearValueBinding : : Black ,
2020-09-24 00:43:27 -04:00
TexCreate_ShaderResource | TexCreate_RenderTargetable | TexCreate_UAV ) ;
2019-01-22 19:17:17 -05:00
2019-03-07 11:25:32 -05:00
DebugDesc = RefDesc ;
2019-01-22 19:17:17 -05:00
DebugDesc . Format = PF_FloatRGBA ;
2019-03-07 11:25:32 -05:00
for ( int32 i = 0 ; i < kMaxBufferProcessingCount ; i + + )
2019-01-22 19:17:17 -05:00
{
2019-03-07 11:25:32 -05:00
InjestDescs [ i ] = RefDesc ;
ReconstructionDescs [ i ] = RefDesc ;
HistoryDescs [ i ] = RefDesc ;
}
2019-10-01 13:03:04 -04:00
if ( Settings . SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask )
2019-03-07 11:25:32 -05:00
{
check ( Settings . SignalBatchSize > = 1 & & Settings . SignalBatchSize < = IScreenSpaceDenoiser : : kMaxBatchSize ) ;
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
2020-04-02 22:21:32 -04:00
InjestDescs [ BatchedSignalId / 2 ] . Format = ( BatchedSignalId % 2 ) ? PF_R32G32_UINT : PF_R32_UINT ;
InjestTextureCount = BatchedSignalId / 2 + 1 ;
2020-04-07 01:28:45 -04:00
ReconstructionDescs [ BatchedSignalId ] . Format = PF_FloatRGBA ;
HistoryDescs [ BatchedSignalId ] . Format = PF_FloatRGBA ;
2019-03-07 11:25:32 -05:00
}
HistoryTextureCountPerSignal = 1 ;
2019-03-12 10:09:36 -04:00
ReconstructionTextureCount = Settings . SignalBatchSize ;
2020-03-02 04:08:24 -05:00
bHasReconstructionLayoutDifferentFromHistory = false ;
2019-03-07 11:25:32 -05:00
}
2019-09-14 09:45:25 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : PolychromaticPenumbraHarmonic )
{
ReconstructionTextureCount = 4 ;
ReconstructionDescs [ 0 ] . Format = PF_FloatRGBA ;
ReconstructionDescs [ 1 ] . Format = PF_FloatRGBA ;
ReconstructionDescs [ 2 ] . Format = PF_FloatRGBA ;
ReconstructionDescs [ 3 ] . Format = PF_FloatRGBA ;
HistoryTextureCountPerSignal = 2 ;
HistoryDescs [ 0 ] . Format = PF_FloatRGBA ;
HistoryDescs [ 1 ] . Format = PF_FloatRGBA ;
}
2019-03-07 11:25:32 -05:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : Reflections )
{
ReconstructionDescs [ 0 ] . Format = HistoryDescs [ 0 ] . Format = PF_FloatRGBA ;
2020-04-29 16:25:02 -04:00
ReconstructionDescs [ 1 ] . Format = HistoryDescs [ 1 ] . Format = PF_G16R16F ;
2019-03-07 11:25:32 -05:00
ReconstructionTextureCount = HistoryTextureCountPerSignal = 2 ;
bHasReconstructionLayoutDifferentFromHistory = false ;
2019-01-22 19:17:17 -05:00
}
else if ( Settings . SignalProcessing = = ESignalProcessing : : AmbientOcclusion )
{
2020-09-24 00:43:27 -04:00
ReconstructionDescs [ 0 ] . Format = HistoryDescs [ 0 ] . Format = PF_FloatRGBA ;
2019-03-07 11:25:32 -05:00
ReconstructionTextureCount = HistoryTextureCountPerSignal = 1 ;
bHasReconstructionLayoutDifferentFromHistory = false ;
2019-01-22 19:17:17 -05:00
}
2019-06-11 18:27:07 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : DiffuseAndAmbientOcclusion )
2019-02-14 14:23:27 -05:00
{
2019-03-07 11:25:32 -05:00
ReconstructionDescs [ 0 ] . Format = PF_FloatRGBA ;
ReconstructionDescs [ 1 ] . Format = PF_R16F ;
ReconstructionTextureCount = 2 ;
HistoryDescs [ 0 ] . Format = PF_FloatRGBA ;
HistoryDescs [ 1 ] . Format = PF_R16F ; //PF_FloatRGB;
HistoryTextureCountPerSignal = 2 ;
bHasReconstructionLayoutDifferentFromHistory = false ;
2019-02-14 14:23:27 -05:00
}
2019-09-14 09:45:25 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : DiffuseSphericalHarmonic )
{
2020-07-06 18:58:26 -04:00
ReconstructionDescs [ 0 ] . Format = PF_FloatRGBA ;
ReconstructionDescs [ 1 ] . Format = PF_FloatRGBA ;
ReconstructionTextureCount = 2 ;
2019-09-14 09:45:25 -04:00
2020-07-06 18:58:26 -04:00
HistoryDescs = ReconstructionDescs ;
HistoryTextureCountPerSignal = 2 ;
2019-09-14 09:45:25 -04:00
bHasReconstructionLayoutDifferentFromHistory = false ;
}
2019-10-01 13:03:04 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : ScreenSpaceDiffuseIndirect )
{
ReconstructionDescs [ 0 ] . Format = PF_FloatR11G11B10 ;
2020-01-16 14:00:26 -05:00
HistoryDescs [ 0 ] . Format = PF_FloatR11G11B10 ;
2019-10-01 13:03:04 -04:00
ReconstructionDescs [ 1 ] . Format = PF_R8G8 ;
ReconstructionTextureCount = 2 ;
2020-01-16 14:00:26 -05:00
2019-10-01 13:03:04 -04:00
HistoryDescs [ 1 ] . Format = PF_R8G8 ;
HistoryTextureCountPerSignal = 2 ;
bHasReconstructionLayoutDifferentFromHistory = false ;
}
2020-07-06 18:58:26 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : IndirectProbeHierarchy )
{
ReconstructionDescs [ 0 ] . Format = PF_FloatR11G11B10 ;
ReconstructionDescs [ 1 ] . Format = PF_FloatR11G11B10 ;
ReconstructionDescs [ 2 ] . Format = PF_R8 ;
ReconstructionTextureCount = 3 ;
HistoryDescs [ 0 ] . Format = PF_FloatR11G11B10 ;
HistoryDescs [ 1 ] . Format = PF_FloatR11G11B10 ;
HistoryDescs [ 2 ] . Format = PF_R8 ;
HistoryTextureCountPerSignal = 3 ;
bHasReconstructionLayoutDifferentFromHistory = true ;
}
2019-01-22 19:17:17 -05:00
else
{
check ( 0 ) ;
}
2019-03-07 11:25:32 -05:00
check ( HistoryTextureCountPerSignal > 0 ) ;
check ( ReconstructionTextureCount > 0 ) ;
2019-01-22 19:17:17 -05:00
}
2019-01-07 17:22:05 -05:00
2019-06-11 18:27:07 -04:00
// Create a UAV use to output debugging information from the shader.
auto CreateDebugUAV = [ & ] ( const TCHAR * DebugTextureName )
{
return GraphBuilder . CreateUAV ( GraphBuilder . CreateTexture ( DebugDesc , DebugTextureName ) ) ;
} ;
2019-03-07 11:25:32 -05:00
int32 HistoryTextureCount = HistoryTextureCountPerSignal * Settings . SignalBatchSize ;
check ( HistoryTextureCount < = kMaxBufferProcessingCount ) ;
2019-01-07 17:22:05 -05:00
// Setup common shader parameters.
FSSDCommonParameters CommonParameters ;
{
2020-07-06 18:58:26 -04:00
Denoiser : : SetupCommonShaderParameters (
View , SceneTextures ,
Settings . FullResViewport ,
Settings . DenoisingResolutionFraction ,
/* out */ & CommonParameters . PublicCommonParameters ) ;
2019-10-01 13:03:04 -04:00
CommonParameters . ViewportMin = Viewport . Min ;
CommonParameters . ViewportMax = Viewport . Max ;
2019-09-14 09:45:25 -04:00
2019-06-11 18:27:07 -04:00
CommonParameters . SceneTextures = SceneTextures ;
2022-04-01 08:35:55 -04:00
CommonParameters . Strata = Strata : : BindStrataGlobalUniformParameters ( View ) ;
2019-01-07 17:22:05 -05:00
CommonParameters . ViewUniformBuffer = View . ViewUniformBuffer ;
2020-09-24 00:43:27 -04:00
CommonParameters . EyeAdaptationTexture = GetEyeAdaptationTexture ( GraphBuilder , View ) ;
2019-06-11 18:27:07 -04:00
// Remove dependency of the velocity buffer on camera cut, given it's going to be ignored by the shaders.
2020-09-24 00:43:27 -04:00
if ( View . bCameraCut | | ! CommonParameters . SceneTextures . GBufferVelocityTexture )
2019-06-11 18:27:07 -04:00
{
2020-09-24 00:43:27 -04:00
CommonParameters . SceneTextures . GBufferVelocityTexture = GraphBuilder . RegisterExternalTexture ( GSystemTextures . BlackDummy ) ;
2019-06-11 18:27:07 -04:00
}
2019-10-01 13:03:04 -04:00
float PixelPositionToFullResPixel = 1.0f / Settings . DenoisingResolutionFraction ;
2019-10-02 18:47:26 -04:00
FVector2D FullResPixelOffset = FVector2D ( 0.5f , 0.5f ) ; // TODO(Denoiser).
2019-10-01 13:03:04 -04:00
CommonParameters . ThreadIdToBufferUV . X = PixelPositionToFullResPixel / float ( FullResBufferExtent . X ) ;
CommonParameters . ThreadIdToBufferUV . Y = PixelPositionToFullResPixel / float ( FullResBufferExtent . Y ) ;
CommonParameters . ThreadIdToBufferUV . Z = ( Viewport . Min . X * PixelPositionToFullResPixel + FullResPixelOffset . X ) / float ( FullResBufferExtent . X ) ;
CommonParameters . ThreadIdToBufferUV . W = ( Viewport . Min . Y * PixelPositionToFullResPixel + FullResPixelOffset . Y ) / float ( FullResBufferExtent . Y ) ;
CommonParameters . BufferUVToOutputPixelPosition . X = BufferExtent . X ;
CommonParameters . BufferUVToOutputPixelPosition . Y = BufferExtent . Y ;
2022-01-27 07:20:20 -05:00
CommonParameters . ScreenToView = FMatrix44f ( FMatrix ( // LWC_TODO: Precision loss
2019-10-01 13:03:04 -04:00
FPlane ( 1 , 0 , 0 , 0 ) ,
FPlane ( 0 , 1 , 0 , 0 ) ,
FPlane ( 0 , 0 , View . ProjectionMatrixUnadjustedForRHI . M [ 2 ] [ 2 ] , 1 ) ,
FPlane ( 0 , 0 , View . ProjectionMatrixUnadjustedForRHI . M [ 3 ] [ 2 ] , 0 ) )
2022-01-27 07:20:20 -05:00
* View . ViewMatrices . GetInvProjectionMatrix ( ) ) ;
2019-11-14 07:02:07 -05:00
CommonParameters . BufferUVBilinearCorrection . X = ( 0.5f * PixelPositionToFullResPixel - FullResPixelOffset . X ) / float ( FullResBufferExtent . X ) ;
CommonParameters . BufferUVBilinearCorrection . Y = ( 0.5f * PixelPositionToFullResPixel - FullResPixelOffset . Y ) / float ( FullResBufferExtent . Y ) ;
2019-01-07 17:22:05 -05:00
}
2021-04-15 12:55:18 -04:00
CommonParameters . FrameIndex = View . ViewState ? View . ViewState - > FrameIndex : 0 ;
2019-06-11 18:27:07 -04:00
2019-03-07 11:25:32 -05:00
// Setup all the metadata to do spatial convolution.
FSSDConvolutionMetaData ConvolutionMetaData ;
2021-04-26 15:47:32 -04:00
if ( Settings . SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask
2020-07-06 18:58:26 -04:00
)
2019-01-22 19:17:17 -05:00
{
2019-03-07 11:25:32 -05:00
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
FLightSceneProxy * LightSceneProxy = Settings . LightSceneInfo [ BatchedSignalId ] - > Proxy ;
2022-01-26 13:56:31 -05:00
FLightRenderParameters Parameters ;
2019-03-07 11:25:32 -05:00
LightSceneProxy - > GetLightShaderParameters ( Parameters ) ;
2022-01-26 13:56:31 -05:00
const FVector3f TranslatedWorldPosition = FVector3f ( View . ViewMatrices . GetPreViewTranslation ( ) + Parameters . WorldPosition ) ;
2021-09-22 10:01:48 -04:00
ConvolutionMetaData . LightPositionAndRadius [ BatchedSignalId ] = FVector4f (
2022-01-26 13:56:31 -05:00
TranslatedWorldPosition , Parameters . SourceRadius ) ;
2021-09-22 10:01:48 -04:00
ConvolutionMetaData . LightDirectionAndLength [ BatchedSignalId ] = FVector4f (
2019-03-07 11:25:32 -05:00
Parameters . Direction , Parameters . SourceLength ) ;
2021-10-04 09:14:58 -04:00
GET_SCALAR_ARRAY_ELEMENT ( ConvolutionMetaData . HitDistanceToWorldBluringRadius , BatchedSignalId ) =
2020-09-01 14:07:48 -04:00
FMath : : Tan ( 0.5 * FMath : : DegreesToRadians ( LightSceneProxy - > GetLightSourceAngle ( ) ) * LightSceneProxy - > GetShadowSourceAngleFactor ( ) ) ;
2021-10-04 09:14:58 -04:00
GET_SCALAR_ARRAY_ELEMENT ( ConvolutionMetaData . LightType , BatchedSignalId ) = LightSceneProxy - > GetLightType ( ) ;
2019-03-07 11:25:32 -05:00
}
}
2019-10-01 13:03:04 -04:00
// Compress the meta data for lower memory bandwidth, half res for coherent memory access, and lower VGPR footprint.
ECompressedMetadataLayout CompressedMetadataLayout = GetSignalCompressedMetadata ( Settings . SignalProcessing ) ;
2020-07-06 18:58:26 -04:00
if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : FedDepthAndShadingModelID )
{
check ( Settings . CompressedDepthTexture ) ;
check ( Settings . CompressedShadingModelTexture ) ;
CommonParameters . CompressedMetadata [ 0 ] = Settings . CompressedDepthTexture ;
CommonParameters . CompressedMetadata [ 1 ] = Settings . CompressedShadingModelTexture ;
}
else if ( CompressedMetadataLayout ! = ECompressedMetadataLayout : : Disabled )
2019-10-01 13:03:04 -04:00
{
if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : DepthAndNormal | |
CompressedMetadataLayout = = ECompressedMetadataLayout : : DepthAndViewNormal )
{
2020-09-24 00:43:27 -04:00
FRDGTextureDesc Desc = FRDGTextureDesc : : Create2D (
2019-10-01 13:03:04 -04:00
BufferExtent ,
PF_R32_UINT ,
FClearValueBinding : : Black ,
2020-09-24 00:43:27 -04:00
TexCreate_ShaderResource | TexCreate_RenderTargetable | TexCreate_UAV ) ;
2019-10-01 13:03:04 -04:00
CommonParameters . CompressedMetadata [ 0 ] = GraphBuilder . CreateTexture ( Desc , TEXT ( " DenoiserMetadata0 " ) ) ;
2020-07-06 18:58:26 -04:00
CommonParameters . CompressedMetadata [ 1 ] = nullptr ;
2019-10-01 13:03:04 -04:00
}
else
{
check ( 0 ) ;
}
FSSDCompressMetadataCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSSDCompressMetadataCS : : FMetadataLayoutDim > ( CompressedMetadataLayout ) ;
FSSDCompressMetadataCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDCompressMetadataCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
for ( int32 i = 0 ; i < kCompressedMetadataTextures ; i + + )
2020-07-06 18:58:26 -04:00
PassParameters - > CompressedMetadataOutput [ i ] = CommonParameters . CompressedMetadata [ i ] ? GraphBuilder . CreateUAV ( CommonParameters . CompressedMetadata [ i ] ) : nullptr ;
2019-10-01 13:03:04 -04:00
TShaderMapRef < FSSDCompressMetadataCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SSD CompressMetadata %dx%d " , Viewport . Width ( ) , Viewport . Height ( ) ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-10-01 13:03:04 -04:00
PassParameters ,
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FComputeShaderUtils : : kGolden2DGroupSize ) ) ;
}
2019-03-07 11:25:32 -05:00
FSSDSignalTextures SignalHistory = InputSignal ;
// Injestion pass to precompute some values for the reconstruction pass.
if ( SignalUsesInjestion ( Settings . SignalProcessing ) )
{
FSSDSignalTextures NewSignalOutput = CreateMultiplexedTextures (
GraphBuilder ,
InjestTextureCount , InjestDescs ,
GetResourceNames ( kInjestResourceNames ) ) ;
FSSDInjestCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDInjestCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-07 11:25:32 -05:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , NewSignalOutput ) ;
2020-09-01 14:07:48 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserInjest " ) ) ;
2019-03-07 11:25:32 -05:00
FSSDInjestCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
PermutationVector . Set < FMultiSPPDim > ( bUseMultiInputSPPShaderPath ) ;
2019-10-01 13:03:04 -04:00
PermutationVector = FSSDInjestCS : : RemapPermutationVector ( PermutationVector ) ;
2019-03-07 11:25:32 -05:00
TShaderMapRef < FSSDInjestCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SSD Injest(MultiSPP=%i) " ,
int32 ( PermutationVector . Get < FMultiSPPDim > ( ) ) ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-03-07 11:25:32 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FComputeShaderUtils : : kGolden2DGroupSize ) ) ;
2019-03-07 11:25:32 -05:00
SignalHistory = NewSignalOutput ;
2019-01-22 19:17:17 -05:00
}
2019-01-07 17:22:05 -05:00
2019-09-14 09:45:25 -04:00
// Spatial reconstruction with ratio estimator to be more precise in the history rejection.
if ( Settings . bEnableReconstruction )
2019-01-10 20:14:17 -05:00
{
2019-03-07 11:25:32 -05:00
FSSDSignalTextures NewSignalOutput = CreateMultiplexedTextures (
GraphBuilder ,
ReconstructionTextureCount , ReconstructionDescs ,
GetResourceNames ( kReconstructionResourceNames ) ) ;
2019-01-10 20:14:17 -05:00
FSSDSpatialAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDSpatialAccumulationCS : : FParameters > ( ) ;
2019-09-14 09:45:25 -04:00
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
FIntRect SignalScissor = Settings . SignalScissor [ BatchedSignalId ] ;
2021-09-22 10:01:48 -04:00
PassParameters - > InputBufferUVMinMax [ BatchedSignalId ] = FVector4f (
2019-09-14 09:45:25 -04:00
float ( SignalScissor . Min . X + 0.5f ) / float ( BufferExtent . X ) ,
float ( SignalScissor . Min . Y + 0.5f ) / float ( BufferExtent . Y ) ,
float ( SignalScissor . Max . X - 0.5f ) / float ( BufferExtent . X ) ,
float ( SignalScissor . Max . Y - 0.5f ) / float ( BufferExtent . Y ) ) ;
}
2020-04-29 16:25:02 -04:00
PassParameters - > MaxSampleCount = Settings . ReconstructionSamples ;
PassParameters - > PreviousCumulativeMaxSampleCount = 1 ;
2019-10-01 13:03:04 -04:00
PassParameters - > UpscaleFactor = int32 ( Settings . DenoisingResolutionFraction / Settings . InputResolutionFraction ) ;
2019-09-14 09:45:25 -04:00
PassParameters - > HarmonicPeriode = Settings . HarmonicPeriode ;
2019-01-10 20:14:17 -05:00
PassParameters - > CommonParameters = CommonParameters ;
2019-03-07 11:25:32 -05:00
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-10-01 13:03:04 -04:00
//PassParameters->SignalInputUint = CreateMultiplexedUintSRVs(GraphBuilder, SignalHistory);
2019-03-07 11:25:32 -05:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , NewSignalOutput ) ;
2019-01-14 15:34:19 -05:00
2019-06-11 18:27:07 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserReconstruction " ) ) ;
2019-01-10 20:14:17 -05:00
FSSDSpatialAccumulationCS : : FPermutationDomain PermutationVector ;
2019-01-22 19:17:17 -05:00
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
2019-01-10 20:14:17 -05:00
PermutationVector . Set < FSSDSpatialAccumulationCS : : FStageDim > ( FSSDSpatialAccumulationCS : : EStage : : ReConstruction ) ;
2019-01-17 17:03:37 -05:00
PermutationVector . Set < FSSDSpatialAccumulationCS : : FUpscaleDim > ( PassParameters - > UpscaleFactor ! = 1 ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FMultiSPPDim > ( bUseMultiInputSPPShaderPath ) ;
2019-06-11 18:27:07 -04:00
PermutationVector = FSSDSpatialAccumulationCS : : RemapPermutationVector ( PermutationVector ) ;
2019-01-10 20:14:17 -05:00
TShaderMapRef < FSSDSpatialAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2020-04-29 16:23:18 -04:00
RDG_EVENT_NAME ( " SSD Reconstruction(MaxSamples=%i Scissor=%ix%i%s%s) " ,
2019-03-07 11:25:32 -05:00
PassParameters - > MaxSampleCount ,
2019-10-01 13:03:04 -04:00
Viewport . Width ( ) , Viewport . Height ( ) ,
2019-09-14 09:45:25 -04:00
PermutationVector . Get < FSSDSpatialAccumulationCS : : FUpscaleDim > ( ) ? TEXT ( " Upscale " ) : TEXT ( " " ) ,
PermutationVector . Get < FMultiSPPDim > ( ) ? TEXT ( " " ) : TEXT ( " 1SPP " ) ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-01-10 20:14:17 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
2019-01-10 20:14:17 -05:00
2019-03-07 11:25:32 -05:00
SignalHistory = NewSignalOutput ;
2019-01-10 20:14:17 -05:00
}
2019-01-07 17:22:05 -05:00
2019-03-15 13:58:07 -04:00
// Spatial pre convolutions
for ( int32 PreConvolutionId = 0 ; PreConvolutionId < Settings . PreConvolutionCount ; PreConvolutionId + + )
{
check ( SignalUsesPreConvolution ( Settings . SignalProcessing ) ) ;
FSSDSignalTextures NewSignalOutput = CreateMultiplexedTextures (
GraphBuilder ,
ReconstructionTextureCount , ReconstructionDescs ,
GetResourceNames ( kPreConvolutionResourceNames ) ) ;
FSSDSpatialAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDSpatialAccumulationCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2020-04-29 16:25:02 -04:00
PassParameters - > MaxSampleCount = Settings . ReconstructionSamples ;
2021-01-04 07:59:22 -04:00
PassParameters - > PreviousCumulativeMaxSampleCount = FMath : : Pow ( static_cast < float > ( PassParameters - > MaxSampleCount ) , 1 + PreConvolutionId ) ;
2020-09-24 00:43:27 -04:00
PassParameters - > KernelSpreadFactor = Settings . KernelSpreadFactor * ( 1 < < PreConvolutionId ) ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-15 13:58:07 -04:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , NewSignalOutput ) ;
2019-06-11 18:27:07 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserPreConvolution " ) ) ;
2019-03-15 13:58:07 -04:00
FSSDSpatialAccumulationCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
PermutationVector . Set < FSSDSpatialAccumulationCS : : FStageDim > ( FSSDSpatialAccumulationCS : : EStage : : PreConvolution ) ;
PermutationVector . Set < FMultiSPPDim > ( true ) ;
TShaderMapRef < FSSDSpatialAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2020-04-29 16:25:02 -04:00
RDG_EVENT_NAME (
" SSD PreConvolution(MaxSamples=%d Spread=%f) " ,
PassParameters - > MaxSampleCount ,
PassParameters - > KernelSpreadFactor ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-03-15 13:58:07 -04:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
2019-03-15 13:58:07 -04:00
SignalHistory = NewSignalOutput ;
}
2019-10-01 13:03:04 -04:00
bool bExtractSceneDepth = false ;
bool bExtractSceneGBufferA = false ;
bool bExtractSceneGBufferB = false ;
TStaticArray < bool , kCompressedMetadataTextures > bExtractCompressedMetadata ;
for ( int32 i = 0 ; i < kCompressedMetadataTextures ; i + + )
bExtractCompressedMetadata [ i ] = false ;
2019-01-10 17:58:11 -05:00
// Temporal pass.
2019-03-07 11:25:32 -05:00
//
// Note: always done even if there is no ViewState, because it is already not an idea case for the denoiser quality, therefore not really
// care about the performance, and the reconstruction may have a different layout than temporal accumulation output.
if ( bHasReconstructionLayoutDifferentFromHistory | | Settings . bUseTemporalAccumulation )
2019-01-07 17:22:05 -05:00
{
2019-03-07 11:25:32 -05:00
FSSDSignalTextures RejectionPreConvolutionSignal ;
2019-02-14 14:23:27 -05:00
// Temporal rejection might make use of a separable preconvolution.
if ( SignalUsesRejectionPreConvolution ( Settings . SignalProcessing ) )
{
{
2019-03-07 11:25:32 -05:00
int32 RejectionTextureCount = 1 ;
TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > RejectionSignalProcessingDescs ;
for ( int32 i = 0 ; i < kMaxBufferProcessingCount ; i + + )
{
RejectionSignalProcessingDescs [ i ] = HistoryDescs [ i ] ;
}
2019-10-01 13:03:04 -04:00
if ( Settings . SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask )
2019-03-12 10:09:38 -04:00
{
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
2020-04-07 01:28:45 -04:00
RejectionSignalProcessingDescs [ BatchedSignalId ] . Format = PF_FloatRGBA ;
2019-03-12 10:09:38 -04:00
}
RejectionTextureCount = Settings . SignalBatchSize ;
}
2019-06-11 18:27:07 -04:00
else if ( Settings . SignalProcessing = = ESignalProcessing : : AmbientOcclusion )
{
RejectionSignalProcessingDescs [ 0 ] . Format = PF_FloatRGBA ;
}
2019-02-14 14:23:27 -05:00
else
{
check ( 0 ) ;
}
2019-03-07 11:25:32 -05:00
RejectionPreConvolutionSignal = CreateMultiplexedTextures (
GraphBuilder ,
RejectionTextureCount , RejectionSignalProcessingDescs ,
GetResourceNames ( kRejectionPreConvolutionResourceNames ) ) ;
2019-02-14 14:23:27 -05:00
}
FSSDSpatialAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDSpatialAccumulationCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
2019-03-07 11:25:32 -05:00
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-07 11:25:32 -05:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , RejectionPreConvolutionSignal ) ;
2019-02-14 14:23:27 -05:00
FSSDSpatialAccumulationCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
2019-02-14 14:23:27 -05:00
PermutationVector . Set < FSSDSpatialAccumulationCS : : FStageDim > ( FSSDSpatialAccumulationCS : : EStage : : RejectionPreConvolution ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FMultiSPPDim > ( true ) ;
2019-06-11 18:27:07 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserRejectionPreConvolution " ) ) ;
2019-02-14 14:23:27 -05:00
TShaderMapRef < FSSDSpatialAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2020-09-24 00:43:27 -04:00
RDG_EVENT_NAME ( " SSD RejectionPreConvolution(MaxSamples=5) " ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-02-14 14:23:27 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
2019-02-14 14:23:27 -05:00
} // if (SignalUsesRejectionPreConvolution(Settings.SignalProcessing))
2019-03-07 11:25:32 -05:00
FSSDSignalTextures SignalOutput = CreateMultiplexedTextures (
GraphBuilder ,
HistoryTextureCount , HistoryDescs ,
GetResourceNames ( kTemporalAccumulationResourceNames ) ) ;
2019-01-10 20:14:17 -05:00
2019-01-07 17:22:05 -05:00
FSSDTemporalAccumulationCS : : FPermutationDomain PermutationVector ;
2019-01-22 19:17:17 -05:00
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
2019-01-07 17:22:05 -05:00
TShaderMapRef < FSSDTemporalAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FSSDTemporalAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDTemporalAccumulationCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
2019-03-07 11:25:32 -05:00
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-06-11 18:27:07 -04:00
PassParameters - > HistoryPreExposureCorrection = View . PreExposure / View . PrevViewInfo . SceneColorPreExposure ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-07 11:25:32 -05:00
PassParameters - > HistoryRejectionSignal = RejectionPreConvolutionSignal ;
PassParameters - > SignalHistoryOutput = CreateMultiplexedUAVs ( GraphBuilder , SignalOutput ) ;
2019-02-14 14:23:27 -05:00
2019-03-07 11:25:32 -05:00
// Setup common previous frame data.
2020-01-24 18:07:01 -05:00
PassParameters - > PrevDepthBuffer = RegisterExternalTextureWithFallback ( GraphBuilder , ViewInfoPooledRenderTargets . PrevDepthBuffer , GSystemTextures . BlackDummy ) ;
PassParameters - > PrevGBufferA = RegisterExternalTextureWithFallback ( GraphBuilder , ViewInfoPooledRenderTargets . PrevGBufferA , GSystemTextures . BlackDummy ) ;
PassParameters - > PrevGBufferB = RegisterExternalTextureWithFallback ( GraphBuilder , ViewInfoPooledRenderTargets . PrevGBufferB , GSystemTextures . BlackDummy ) ;
2019-01-07 17:22:05 -05:00
2020-07-06 18:58:26 -04:00
bool bGlobalCameraCut = ! View . PrevViewInfo . DepthBuffer . IsValid ( ) ;
2019-10-01 13:03:04 -04:00
if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : DepthAndViewNormal )
{
PassParameters - > PrevCompressedMetadata [ 0 ] = RegisterExternalTextureWithFallback (
2020-03-17 20:02:58 -04:00
GraphBuilder , ViewInfoPooledRenderTargets . PrevCompressedDepthViewNormal , GSystemTextures . ZeroUIntDummy ) ;
2020-07-06 18:58:26 -04:00
bGlobalCameraCut = ! View . PrevViewInfo . CompressedDepthViewNormal . IsValid ( ) ;
}
else if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : FedDepthAndShadingModelID )
{
PassParameters - > PrevCompressedMetadata [ 0 ] = RegisterExternalTextureWithFallback (
GraphBuilder , View . PrevViewInfo . CompressedOpaqueDepth , GSystemTextures . BlackDummy ) ;
PassParameters - > PrevCompressedMetadata [ 1 ] = RegisterExternalTextureWithFallback (
GraphBuilder , View . PrevViewInfo . CompressedOpaqueShadingModel , GSystemTextures . ZeroUIntDummy ) ;
bGlobalCameraCut = ! View . PrevViewInfo . CompressedOpaqueDepth . IsValid ( ) | | ! View . PrevViewInfo . CompressedOpaqueShadingModel . IsValid ( ) ;
2019-10-01 13:03:04 -04:00
}
2020-04-23 20:19:23 -04:00
FIntPoint PrevFrameBufferExtent ;
2020-07-06 18:58:26 -04:00
if ( bGlobalCameraCut )
{
2021-09-22 10:01:48 -04:00
PassParameters - > ScreenPosToHistoryBufferUV = FVector4f ( 1.0f , 1.0f , 1.0f , 1.0f ) ;
PassParameters - > HistoryBufferUVMinMax = FVector4f ( 0.0f , 0.0f , 0.0f , 0.0f ) ;
PassParameters - > HistoryBufferSizeAndInvSize = FVector4f ( 1.0f , 1.0f , 1.0f , 1.0f ) ;
2020-07-06 18:58:26 -04:00
PrevFrameBufferExtent = FIntPoint ( 1 , 1 ) ;
}
else
{
FIntPoint ViewportOffset = View . PrevViewInfo . ViewRect . Min ;
FIntPoint ViewportExtent = View . PrevViewInfo . ViewRect . Size ( ) ;
if ( PassParameters - > PrevCompressedMetadata [ 0 ] )
{
PrevFrameBufferExtent = PassParameters - > PrevCompressedMetadata [ 0 ] - > Desc . Extent ;
}
else
{
PrevFrameBufferExtent = PassParameters - > PrevDepthBuffer - > Desc . Extent ;
}
float InvBufferSizeX = 1.f / float ( PrevFrameBufferExtent . X ) ;
float InvBufferSizeY = 1.f / float ( PrevFrameBufferExtent . Y ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > ScreenPosToHistoryBufferUV = FVector4f (
2020-07-06 18:58:26 -04:00
ViewportExtent . X * 0.5f * InvBufferSizeX ,
- ViewportExtent . Y * 0.5f * InvBufferSizeY ,
( ViewportExtent . X * 0.5f + ViewportOffset . X ) * InvBufferSizeX ,
( ViewportExtent . Y * 0.5f + ViewportOffset . Y ) * InvBufferSizeY ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > HistoryBufferUVMinMax = FVector4f (
2020-07-06 18:58:26 -04:00
( ViewportOffset . X + 0.5f ) * InvBufferSizeX ,
( ViewportOffset . Y + 0.5f ) * InvBufferSizeY ,
( ViewportOffset . X + ViewportExtent . X - 0.5f ) * InvBufferSizeX ,
( ViewportOffset . Y + ViewportExtent . Y - 0.5f ) * InvBufferSizeY ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > HistoryBufferSizeAndInvSize = FVector4f ( PrevFrameBufferExtent . X , PrevFrameBufferExtent . Y , InvBufferSizeX , InvBufferSizeY ) ;
2020-07-06 18:58:26 -04:00
PassParameters - > PrevSceneBufferUVToScreenPosition . X = float ( PrevFrameBufferExtent . X ) / float ( ViewportExtent . X ) * 2.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . Y = - float ( PrevFrameBufferExtent . Y ) / float ( ViewportExtent . Y ) * 2.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . Z = float ( ViewportOffset . X ) / float ( ViewportExtent . X ) * 2.0f - 1.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . W = - float ( ViewportOffset . Y ) / float ( ViewportExtent . Y ) * 2.0f + 1.0f ;
}
2020-04-23 20:19:23 -04:00
if ( bGlobalCameraCut )
{
2021-09-22 10:01:48 -04:00
PassParameters - > ScreenPosToHistoryBufferUV = FVector4f ( 1.0f , 1.0f , 1.0f , 1.0f ) ;
PassParameters - > HistoryBufferUVMinMax = FVector4f ( 0.0f , 0.0f , 0.0f , 0.0f ) ;
PassParameters - > HistoryBufferSizeAndInvSize = FVector4f ( 1.0f , 1.0f , 1.0f , 1.0f ) ;
2020-04-23 20:19:23 -04:00
PrevFrameBufferExtent = FIntPoint ( 1 , 1 ) ;
}
else
{
FIntPoint ViewportOffset = View . PrevViewInfo . ViewRect . Min ;
FIntPoint ViewportExtent = View . PrevViewInfo . ViewRect . Size ( ) ;
if ( PassParameters - > PrevCompressedMetadata [ 0 ] )
{
PrevFrameBufferExtent = PassParameters - > PrevCompressedMetadata [ 0 ] - > Desc . Extent ;
}
else
{
PrevFrameBufferExtent = PassParameters - > PrevDepthBuffer - > Desc . Extent ;
}
float InvBufferSizeX = 1.f / float ( PrevFrameBufferExtent . X ) ;
float InvBufferSizeY = 1.f / float ( PrevFrameBufferExtent . Y ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > ScreenPosToHistoryBufferUV = FVector4f (
2020-04-23 20:19:23 -04:00
ViewportExtent . X * 0.5f * InvBufferSizeX ,
- ViewportExtent . Y * 0.5f * InvBufferSizeY ,
( ViewportExtent . X * 0.5f + ViewportOffset . X ) * InvBufferSizeX ,
( ViewportExtent . Y * 0.5f + ViewportOffset . Y ) * InvBufferSizeY ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > HistoryBufferUVMinMax = FVector4f (
2020-04-23 20:19:23 -04:00
( ViewportOffset . X + 0.5f ) * InvBufferSizeX ,
( ViewportOffset . Y + 0.5f ) * InvBufferSizeY ,
( ViewportOffset . X + ViewportExtent . X - 0.5f ) * InvBufferSizeX ,
( ViewportOffset . Y + ViewportExtent . Y - 0.5f ) * InvBufferSizeY ) ;
2021-09-22 10:01:48 -04:00
PassParameters - > HistoryBufferSizeAndInvSize = FVector4f ( PrevFrameBufferExtent . X , PrevFrameBufferExtent . Y , InvBufferSizeX , InvBufferSizeY ) ;
2020-04-29 16:27:57 -04:00
PassParameters - > PrevSceneBufferUVToScreenPosition . X = float ( PrevFrameBufferExtent . X ) / float ( ViewportExtent . X ) * 2.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . Y = - float ( PrevFrameBufferExtent . Y ) / float ( ViewportExtent . Y ) * 2.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . Z = float ( ViewportOffset . X ) / float ( ViewportExtent . X ) * 2.0f - 1.0f ;
PassParameters - > PrevSceneBufferUVToScreenPosition . W = - float ( ViewportOffset . Y ) / float ( ViewportExtent . Y ) * 2.0f + 1.0f ;
2020-04-23 20:19:23 -04:00
}
2019-09-14 09:45:25 -04:00
FScreenSpaceDenoiserHistory DummyPrevFrameHistory ;
2019-03-07 11:25:32 -05:00
// Setup signals' previous frame historu buffers.
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
2019-09-14 09:45:25 -04:00
FScreenSpaceDenoiserHistory * PrevFrameHistory = PrevFilteringHistory [ BatchedSignalId ] ? PrevFilteringHistory [ BatchedSignalId ] : & DummyPrevFrameHistory ;
2019-03-07 11:25:32 -05:00
2021-10-04 09:14:58 -04:00
GET_SCALAR_ARRAY_ELEMENT ( PassParameters - > bCameraCut , BatchedSignalId ) = ! PrevFrameHistory - > IsValid ( ) ;
2019-03-07 11:25:32 -05:00
2020-04-23 20:19:23 -04:00
if ( ! ( View . ViewState & & Settings . bUseTemporalAccumulation ) | | bGlobalCameraCut )
2019-03-07 11:25:32 -05:00
{
2021-10-04 09:14:58 -04:00
GET_SCALAR_ARRAY_ELEMENT ( PassParameters - > bCameraCut , BatchedSignalId ) = true ;
2019-03-07 11:25:32 -05:00
}
for ( int32 BufferId = 0 ; BufferId < HistoryTextureCountPerSignal ; BufferId + + )
{
int32 HistoryBufferId = BatchedSignalId * HistoryTextureCountPerSignal + BufferId ;
PassParameters - > PrevHistory . Textures [ HistoryBufferId ] = RegisterExternalTextureWithFallback (
GraphBuilder , PrevFrameHistory - > RT [ BufferId ] , GSystemTextures . BlackDummy ) ;
}
2021-09-22 10:01:48 -04:00
PassParameters - > HistoryBufferScissorUVMinMax [ BatchedSignalId ] = FVector4f (
2020-04-23 20:19:23 -04:00
float ( PrevFrameHistory - > Scissor . Min . X + 0.5f ) / float ( PrevFrameBufferExtent . X ) ,
float ( PrevFrameHistory - > Scissor . Min . Y + 0.5f ) / float ( PrevFrameBufferExtent . Y ) ,
float ( PrevFrameHistory - > Scissor . Max . X - 0.5f ) / float ( PrevFrameBufferExtent . X ) ,
float ( PrevFrameHistory - > Scissor . Max . Y - 0.5f ) / float ( PrevFrameBufferExtent . Y ) ) ;
2019-09-14 09:45:25 -04:00
2019-03-07 11:25:32 -05:00
// Releases the reference on previous frame so the history's render target can be reused ASAP.
PrevFrameHistory - > SafeRelease ( ) ;
} // for (uint32 BatchedSignalId = 0; BatchedSignalId < Settings.SignalBatchSize; BatchedSignalId++)
2019-06-11 18:27:07 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserTemporalAccumulation " ) ) ;
2019-01-07 17:22:05 -05:00
2019-10-01 13:03:04 -04:00
// Manually cleans the unused resource, to find out what the shader is actually going to need for next frame.
{
2020-02-06 13:13:41 -05:00
ClearUnusedGraphResources ( ComputeShader , PassParameters ) ;
2019-10-01 13:03:04 -04:00
bExtractSceneDepth = PassParameters - > PrevDepthBuffer ! = nullptr ;
bExtractSceneGBufferA = PassParameters - > PrevGBufferA ! = nullptr ;
bExtractSceneGBufferB = PassParameters - > PrevGBufferB ! = nullptr ;
for ( int32 i = 0 ; i < kCompressedMetadataTextures ; i + + )
bExtractCompressedMetadata [ i ] = PassParameters - > PrevCompressedMetadata [ i ] ! = nullptr ;
}
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
# if WITH_MGPU
{
FName EffectName = Settings . EffectName ;
FIntVector GroupCount = FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FComputeShaderUtils : : kGolden2DGroupSize ) ;
FComputeShaderUtils : : ValidateGroupCount ( GroupCount ) ;
const FShaderParametersMetadata * ParametersMetadata = FSSDTemporalAccumulationCS : : FParameters : : FTypeInfo : : GetStructMetadata ( ) ;
GraphBuilder . AddPass (
RDG_EVENT_NAME ( " SSD TemporalAccumulation%s " ,
( ! Settings . bUseTemporalAccumulation | | bGlobalCameraCut ) ? TEXT ( " (Disabled) " ) : TEXT ( " " ) ) ,
ParametersMetadata ,
PassParameters ,
ERDGPassFlags : : Compute ,
2022-01-24 11:35:55 -05:00
[ ParametersMetadata , PassParameters , ComputeShader , GroupCount , EffectName ] ( FRHIComputeCommandList & RHICmdList )
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
{
RHICmdList . WaitForTemporalEffect ( EffectName ) ;
FComputeShaderUtils : : Dispatch ( RHICmdList , ComputeShader , ParametersMetadata , * PassParameters , GroupCount ) ;
2022-01-24 11:35:55 -05:00
TArray < FRHITexture * , TFixedAllocator < kMaxBufferProcessingCount > > SignalOutputTexturesRHI ;
for ( FRDGTextureUAV * TextureUAV : PassParameters - > SignalHistoryOutput . UAVs )
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
{
2022-01-24 11:35:55 -05:00
if ( TextureUAV )
{
SignalOutputTexturesRHI . Add ( TextureUAV - > GetParentRHI ( ) ) ;
}
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
}
RHICmdList . BroadcastTemporalEffect (
EffectName , MakeArrayView ( SignalOutputTexturesRHI . GetData ( ) , SignalOutputTexturesRHI . Num ( ) ) ) ;
} ) ;
}
# else // WITH_MGPU
2019-01-07 17:22:05 -05:00
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2020-07-06 18:58:26 -04:00
RDG_EVENT_NAME ( " SSD TemporalAccumulation%s " ,
( ! Settings . bUseTemporalAccumulation | | bGlobalCameraCut ) ? TEXT ( " (Disabled) " ) : TEXT ( " " ) ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-01-07 17:22:05 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FComputeShaderUtils : : kGolden2DGroupSize ) ) ;
2019-01-10 20:14:17 -05:00
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
# endif // !WITH_MGPU
2019-03-07 11:25:32 -05:00
SignalHistory = SignalOutput ;
2019-02-14 14:23:27 -05:00
} // if (View.ViewState && Settings.bUseTemporalAccumulation)
2019-01-10 17:58:11 -05:00
// Spatial filter, to converge history faster.
2019-02-14 14:23:27 -05:00
int32 MaxPostFilterSampleCount = FMath : : Clamp ( Settings . HistoryConvolutionSampleCount , 1 , kStackowiakMaxSampleCountPerSet ) ;
2019-01-15 18:51:06 -05:00
if ( MaxPostFilterSampleCount > 1 )
2019-01-10 17:58:11 -05:00
{
2019-03-07 11:25:32 -05:00
FSSDSignalTextures SignalOutput = CreateMultiplexedTextures (
GraphBuilder ,
HistoryTextureCount , HistoryDescs ,
GetResourceNames ( kHistoryConvolutionResourceNames ) ) ;
2019-01-10 17:58:11 -05:00
FSSDSpatialAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDSpatialAccumulationCS : : FParameters > ( ) ;
2019-01-15 18:51:06 -05:00
PassParameters - > MaxSampleCount = FMath : : Clamp ( MaxPostFilterSampleCount , 1 , kStackowiakMaxSampleCountPerSet ) ;
2019-02-14 14:23:27 -05:00
PassParameters - > KernelSpreadFactor = Settings . HistoryConvolutionKernelSpreadFactor ;
2019-01-10 17:58:11 -05:00
PassParameters - > CommonParameters = CommonParameters ;
2019-03-07 11:25:32 -05:00
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-07 11:25:32 -05:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , SignalOutput ) ;
2019-01-10 17:58:11 -05:00
FSSDSpatialAccumulationCS : : FPermutationDomain PermutationVector ;
2019-01-22 19:17:17 -05:00
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
2019-01-10 20:14:17 -05:00
PermutationVector . Set < FSSDSpatialAccumulationCS : : FStageDim > ( FSSDSpatialAccumulationCS : : EStage : : PostFiltering ) ;
2019-03-07 11:25:32 -05:00
PermutationVector . Set < FMultiSPPDim > ( true ) ;
2019-01-15 18:51:06 -05:00
2019-06-11 18:27:07 -04:00
PassParameters - > DebugOutput = CreateDebugUAV ( TEXT ( " DebugDenoiserPostfilter " ) ) ;
2019-01-10 17:58:11 -05:00
TShaderMapRef < FSSDSpatialAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2020-09-24 00:43:27 -04:00
RDG_EVENT_NAME ( " SSD HistoryConvolution(MaxSamples=%i) " , MaxPostFilterSampleCount ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-01-10 17:58:11 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
2019-01-10 17:58:11 -05:00
2019-03-07 11:25:32 -05:00
SignalHistory = SignalOutput ;
2019-02-14 14:23:27 -05:00
} // if (MaxPostFilterSampleCount > 1)
2019-01-07 17:22:05 -05:00
2019-09-14 09:45:25 -04:00
if ( ! View . bStatePrevViewInfoIsReadOnly & & Settings . bUseTemporalAccumulation )
2019-01-07 17:22:05 -05:00
{
2019-03-07 11:25:32 -05:00
check ( View . ViewState ) ;
2019-10-01 13:03:04 -04:00
// Keep depth buffer and GBuffer around for next frame if the temporal accumulation needs it.
2019-01-07 17:22:05 -05:00
{
2019-10-01 13:03:04 -04:00
// Might requires the depth.
if ( bExtractSceneDepth )
{
2020-09-24 00:43:27 -04:00
GraphBuilder . QueueTextureExtraction ( SceneTextures . SceneDepthTexture , ViewInfoPooledRenderTargets . NextDepthBuffer ) ;
2019-10-01 13:03:04 -04:00
}
2019-01-22 19:17:17 -05:00
2019-10-01 13:03:04 -04:00
// Might requires the world normal that are in GBuffer A.
if ( bExtractSceneGBufferA )
2019-03-07 11:25:32 -05:00
{
2020-09-24 00:43:27 -04:00
GraphBuilder . QueueTextureExtraction ( SceneTextures . GBufferATexture , ViewInfoPooledRenderTargets . NextGBufferA ) ;
2019-03-07 11:25:32 -05:00
}
2019-01-22 19:17:17 -05:00
2019-10-01 13:03:04 -04:00
// Might need the roughness that is in GBuffer B.
if ( bExtractSceneGBufferB )
2019-03-07 11:25:32 -05:00
{
2020-09-24 00:43:27 -04:00
GraphBuilder . QueueTextureExtraction ( SceneTextures . GBufferBTexture , ViewInfoPooledRenderTargets . NextGBufferB ) ;
2019-03-07 11:25:32 -05:00
}
2019-10-01 13:03:04 -04:00
// Extract the compressed scene texture to make te history re-projection faster.
for ( int32 i = 0 ; i < kCompressedMetadataTextures ; i + + )
{
TRefCountPtr < IPooledRenderTarget > * Dest = nullptr ;
if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : DepthAndViewNormal )
{
2020-07-06 18:58:26 -04:00
if ( i = = 0 )
2019-10-01 13:03:04 -04:00
{
2020-01-24 18:07:01 -05:00
Dest = ViewInfoPooledRenderTargets . NextCompressedDepthViewNormal ;
2019-10-01 13:03:04 -04:00
}
}
2020-07-06 18:58:26 -04:00
else if ( CompressedMetadataLayout = = ECompressedMetadataLayout : : FedDepthAndShadingModelID )
{
if ( i = = 0 )
{
Dest = & View . ViewState - > PrevFrameViewInfo . CompressedOpaqueDepth ;
}
2020-08-06 15:34:17 -04:00
else // if (i == 1)
2020-07-06 18:58:26 -04:00
{
Dest = & View . ViewState - > PrevFrameViewInfo . CompressedOpaqueShadingModel ;
}
}
2019-10-01 13:03:04 -04:00
check ( ( CommonParameters . CompressedMetadata [ i ] ! = nullptr ) = = ( Dest ! = nullptr ) ) ;
2020-07-06 18:58:26 -04:00
if ( Dest )
2019-10-01 13:03:04 -04:00
{
check ( CommonParameters . CompressedMetadata [ i ] ) ;
GraphBuilder . QueueTextureExtraction ( CommonParameters . CompressedMetadata [ i ] , Dest ) ;
}
}
2019-01-07 17:22:05 -05:00
}
2019-03-07 11:25:32 -05:00
// Saves signal histories.
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
2019-01-07 17:22:05 -05:00
{
2019-09-14 09:45:25 -04:00
FScreenSpaceDenoiserHistory * NewHistory = NewFilteringHistory [ BatchedSignalId ] ;
2019-03-07 11:25:32 -05:00
check ( NewHistory ) ;
2019-01-22 19:17:17 -05:00
2019-03-07 11:25:32 -05:00
for ( int32 BufferId = 0 ; BufferId < HistoryTextureCountPerSignal ; BufferId + + )
{
int32 HistoryBufferId = BatchedSignalId * HistoryTextureCountPerSignal + BufferId ;
GraphBuilder . QueueTextureExtraction ( SignalHistory . Textures [ HistoryBufferId ] , & NewHistory - > RT [ BufferId ] ) ;
}
2019-09-14 09:45:25 -04:00
2019-10-01 13:03:04 -04:00
NewHistory - > Scissor = Settings . FullResViewport ;
2019-03-07 11:25:32 -05:00
} // for (uint32 BatchedSignalId = 0; BatchedSignalId < Settings.SignalBatchSize; BatchedSignalId++)
2019-01-07 17:22:05 -05:00
}
2019-03-07 11:25:32 -05:00
else if ( HistoryTextureCountPerSignal > = 2 )
2019-01-15 18:51:06 -05:00
{
// The SignalHistory1 is always generated for temporal history, but will endup useless if there is no view state,
// in witch case we do not extract any textures. Don't support a shader permutation that does not produce it, because
// it is already a not ideal case for the denoiser.
2019-03-07 11:25:32 -05:00
for ( int32 BufferId = 1 ; BufferId < HistoryTextureCountPerSignal ; BufferId + + )
{
GraphBuilder . RemoveUnusedTextureWarning ( SignalHistory . Textures [ BufferId ] ) ;
}
2019-01-15 18:51:06 -05:00
}
2019-01-07 17:22:05 -05:00
2019-03-07 11:25:32 -05:00
// Final convolution / output to correct
if ( SignalUsesFinalConvolution ( Settings . SignalProcessing ) )
{
TStaticArray < FRDGTextureDesc , kMaxBufferProcessingCount > OutputDescs ;
for ( int32 i = 0 ; i < kMaxBufferProcessingCount ; i + + )
{
OutputDescs [ i ] = HistoryDescs [ i ] ;
}
2019-10-01 13:03:04 -04:00
if ( Settings . SignalProcessing = = ESignalProcessing : : ShadowVisibilityMask )
2019-03-07 11:25:32 -05:00
{
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < Settings . SignalBatchSize ; BatchedSignalId + + )
{
OutputDescs [ BatchedSignalId ] . Format = PF_FloatRGBA ;
}
}
else
{
check ( 0 ) ;
}
* OutputSignal = CreateMultiplexedTextures (
GraphBuilder ,
Settings . SignalBatchSize , OutputDescs ,
GetResourceNames ( kDenoiserOutputResourceNames ) ) ;
FSSDSpatialAccumulationCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSSDSpatialAccumulationCS : : FParameters > ( ) ;
PassParameters - > CommonParameters = CommonParameters ;
2020-09-01 14:07:48 -04:00
PassParameters - > ConvolutionMetaData = ConvolutionMetaData ;
2019-09-14 09:45:25 -04:00
PassParameters - > SignalInput = SignalHistory ;
2019-03-07 11:25:32 -05:00
PassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , * OutputSignal ) ;
FSSDSpatialAccumulationCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSignalProcessingDim > ( Settings . SignalProcessing ) ;
PermutationVector . Set < FSignalBatchSizeDim > ( Settings . SignalBatchSize ) ;
PermutationVector . Set < FSSDSpatialAccumulationCS : : FStageDim > ( FSSDSpatialAccumulationCS : : EStage : : FinalOutput ) ;
PermutationVector . Set < FMultiSPPDim > ( true ) ;
TShaderMapRef < FSSDSpatialAccumulationCS > ComputeShader ( View . ShaderMap , PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SSD SpatialAccumulation(Final) " ) ,
2020-02-06 13:13:41 -05:00
ComputeShader ,
2019-03-07 11:25:32 -05:00
PassParameters ,
2019-10-01 13:03:04 -04:00
FComputeShaderUtils : : GetGroupCount ( Viewport . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
2019-03-07 11:25:32 -05:00
}
else
{
* OutputSignal = SignalHistory ;
}
2019-01-22 19:17:17 -05:00
} // DenoiseSignalAtConstantPixelDensity()
2018-12-18 21:41:17 -05:00
2019-09-14 09:45:25 -04:00
// static
IScreenSpaceDenoiser : : FHarmonicTextures IScreenSpaceDenoiser : : CreateHarmonicTextures ( FRDGBuilder & GraphBuilder , FIntPoint Extent , const TCHAR * DebugName )
{
2020-09-24 00:43:27 -04:00
FRDGTextureDesc Desc = FRDGTextureDesc : : Create2D (
2019-09-14 09:45:25 -04:00
Extent ,
PF_FloatRGBA ,
FClearValueBinding : : None ,
2020-09-24 00:43:27 -04:00
TexCreate_ShaderResource | TexCreate_UAV ) ;
2019-09-14 09:45:25 -04:00
FHarmonicTextures HarmonicTextures ;
for ( int32 HarmonicBorderId = 0 ; HarmonicBorderId < kHarmonicBordersCount ; HarmonicBorderId + + )
{
HarmonicTextures . Harmonics [ HarmonicBorderId ] = GraphBuilder . CreateTexture ( Desc , DebugName ) ;
}
return HarmonicTextures ;
}
// static
IScreenSpaceDenoiser : : FHarmonicUAVs IScreenSpaceDenoiser : : CreateUAVs ( FRDGBuilder & GraphBuilder , const FHarmonicTextures & Textures )
{
FHarmonicUAVs UAVs ;
for ( int32 HarmonicBorderId = 0 ; HarmonicBorderId < kHarmonicBordersCount ; HarmonicBorderId + + )
{
UAVs . Harmonics [ HarmonicBorderId ] = GraphBuilder . CreateUAV ( Textures . Harmonics [ HarmonicBorderId ] ) ;
}
return UAVs ;
}
2020-07-06 18:58:26 -04:00
// static
IScreenSpaceDenoiser : : FDiffuseIndirectHarmonicUAVs IScreenSpaceDenoiser : : CreateUAVs ( FRDGBuilder & GraphBuilder , const FDiffuseIndirectHarmonic & Textures )
{
FDiffuseIndirectHarmonicUAVs UAVs ;
for ( int32 HarmonicBorderId = 0 ; HarmonicBorderId < kSphericalHarmonicTextureCount ; HarmonicBorderId + + )
{
UAVs . SphericalHarmonic [ HarmonicBorderId ] = GraphBuilder . CreateUAV ( Textures . SphericalHarmonic [ HarmonicBorderId ] ) ;
}
return UAVs ;
}
2018-12-18 21:41:17 -05:00
/** The implementation of the default denoiser of the renderer. */
class FDefaultScreenSpaceDenoiser : public IScreenSpaceDenoiser
{
public :
const TCHAR * GetDebugName ( ) const override
{
2019-01-22 19:17:17 -05:00
return TEXT ( " ScreenSpaceDenoiser " ) ;
2018-12-18 21:41:17 -05:00
}
2019-03-07 11:25:32 -05:00
virtual EShadowRequirements GetShadowRequirements (
2019-03-06 23:42:45 -05:00
const FViewInfo & View ,
2019-03-06 23:59:18 -05:00
const FLightSceneInfo & LightSceneInfo ,
2019-03-07 11:25:32 -05:00
const FShadowRayTracingConfig & RayTracingConfig ) const override
2019-03-06 23:42:45 -05:00
{
2019-10-01 13:03:04 -04:00
check ( SignalSupportMultiSPP ( ESignalProcessing : : ShadowVisibilityMask ) ) ;
return IScreenSpaceDenoiser : : EShadowRequirements : : PenumbraAndClosestOccluder ;
2019-03-07 11:25:32 -05:00
}
2019-10-01 13:03:04 -04:00
virtual void DenoiseShadowVisibilityMasks (
2019-03-07 11:25:32 -05:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
2019-10-01 13:03:04 -04:00
const TStaticArray < FShadowVisibilityParameters , IScreenSpaceDenoiser : : kMaxBatchSize > & InputParameters ,
2019-03-07 11:25:32 -05:00
const int32 InputParameterCount ,
2019-10-01 13:03:04 -04:00
TStaticArray < FShadowVisibilityOutputs , IScreenSpaceDenoiser : : kMaxBatchSize > & Outputs ) const
2019-03-07 11:25:32 -05:00
{
2019-06-11 18:27:07 -04:00
RDG_GPU_STAT_SCOPE ( GraphBuilder , ShadowsDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-03-07 11:25:32 -05:00
FSSDSignalTextures InputSignal ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDShadowVisibilityMasksEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . SignalProcessing = ESignalProcessing : : ShadowVisibilityMask ;
2019-03-07 11:25:32 -05:00
Settings . InputResolutionFraction = 1.0f ;
2020-04-29 16:25:02 -04:00
Settings . ReconstructionSamples = FMath : : Clamp ( CVarShadowReconstructionSampleCount . GetValueOnRenderThread ( ) , 1 , kStackowiakMaxSampleCountPerSet ) ;
2019-03-15 13:58:07 -04:00
Settings . PreConvolutionCount = CVarShadowPreConvolutionCount . GetValueOnRenderThread ( ) ;
2019-03-07 11:25:32 -05:00
Settings . bUseTemporalAccumulation = CVarShadowTemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
2019-03-12 10:09:36 -04:00
Settings . HistoryConvolutionSampleCount = CVarShadowHistoryConvolutionSampleCount . GetValueOnRenderThread ( ) ;
2019-03-07 11:25:32 -05:00
Settings . SignalBatchSize = InputParameterCount ;
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < InputParameterCount ; BatchedSignalId + + )
2019-03-06 23:59:18 -05:00
{
2019-03-07 11:25:32 -05:00
Settings . MaxInputSPP = FMath : : Max ( Settings . MaxInputSPP , InputParameters [ BatchedSignalId ] . RayTracingConfig . RayCountPerPixel ) ;
2018-12-18 21:41:17 -05:00
}
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
2019-03-07 11:25:32 -05:00
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < InputParameterCount ; BatchedSignalId + + )
{
2019-10-01 13:03:04 -04:00
const FShadowVisibilityParameters & Parameters = InputParameters [ BatchedSignalId ] ;
2019-09-14 09:45:25 -04:00
const FLightSceneProxy * Proxy = Parameters . LightSceneInfo - > Proxy ;
// Scissor the denoiser.
{
FIntRect LightScissorRect ;
if ( Proxy - > GetScissorRect ( /* out */ LightScissorRect , View , View . ViewRect ) )
{
2018-12-18 21:41:17 -05:00
2019-09-14 09:45:25 -04:00
}
else
{
LightScissorRect = View . ViewRect ;
}
if ( BatchedSignalId = = 0 )
{
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = LightScissorRect ;
2019-09-14 09:45:25 -04:00
}
else
{
2019-10-01 13:03:04 -04:00
Settings . FullResViewport . Union ( LightScissorRect ) ;
2019-09-14 09:45:25 -04:00
}
Settings . SignalScissor [ BatchedSignalId ] = LightScissorRect ;
}
ensure ( IsSupportedLightType ( ELightComponentType ( Proxy - > GetLightType ( ) ) ) ) ;
2019-03-07 11:25:32 -05:00
Settings . LightSceneInfo [ BatchedSignalId ] = Parameters . LightSceneInfo ;
2019-10-01 13:03:04 -04:00
// Get the packed penumbra and hit distance in Penumbra texture.
InputSignal . Textures [ BatchedSignalId ] = Parameters . InputTextures . Mask ;
2021-10-25 20:05:28 -04:00
const ULightComponent * LightComponent = Settings . LightSceneInfo [ BatchedSignalId ] - > Proxy - > GetLightComponent ( ) ;
TSharedPtr < FScreenSpaceDenoiserHistory > * PrevHistoryEntry = PreviousViewInfos - > ShadowHistories . Find ( LightComponent ) ;
PrevHistories [ BatchedSignalId ] = PrevHistoryEntry ? PrevHistoryEntry - > Get ( ) : nullptr ;
2019-03-07 11:25:32 -05:00
NewHistories [ BatchedSignalId ] = nullptr ;
2019-09-14 09:45:25 -04:00
if ( ! View . bStatePrevViewInfoIsReadOnly )
2019-03-07 11:25:32 -05:00
{
check ( View . ViewState ) ;
2021-10-25 20:05:28 -04:00
TSharedPtr < FScreenSpaceDenoiserHistory > * NewHistoryEntry = View . ViewState - > PrevFrameViewInfo . ShadowHistories . Find ( LightComponent ) ;
if ( NewHistoryEntry = = nullptr )
{
FScreenSpaceDenoiserHistory * NewHistory = new FScreenSpaceDenoiserHistory ;
View . ViewState - > PrevFrameViewInfo . ShadowHistories . Emplace ( LightComponent , NewHistory ) ;
NewHistories [ BatchedSignalId ] = NewHistory ;
}
else
{
NewHistories [ BatchedSignalId ] = NewHistoryEntry - > Get ( ) ;
}
2019-03-07 11:25:32 -05:00
}
}
2019-09-14 09:45:25 -04:00
// Force viewport to be a multiple of 2, to avoid over frame interference between TAA jitter of the frame, and Stackowiack's SampleTrackId.
{
2019-10-01 13:03:04 -04:00
Settings . FullResViewport . Min . X & = ~ 1 ;
Settings . FullResViewport . Min . Y & = ~ 1 ;
2019-09-14 09:45:25 -04:00
}
2019-03-07 11:25:32 -05:00
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-03-07 11:25:32 -05:00
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
for ( int32 BatchedSignalId = 0 ; BatchedSignalId < InputParameterCount ; BatchedSignalId + + )
{
2019-10-01 13:03:04 -04:00
Outputs [ BatchedSignalId ] . Mask = SignalOutput . Textures [ BatchedSignalId ] ;
2019-03-07 11:25:32 -05:00
}
2018-12-18 21:41:17 -05:00
}
2019-09-14 09:45:25 -04:00
FPolychromaticPenumbraOutputs DenoisePolychromaticPenumbraHarmonics (
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FPolychromaticPenumbraHarmonics & Inputs ) const override
{
RDG_GPU_STAT_SCOPE ( GraphBuilder , ShadowsDenoiser ) ;
FRDGTextureRef BlackDummy = GraphBuilder . RegisterExternalTexture ( GSystemTextures . BlackDummy ) ;
FRDGTextureRef WhiteDummy = GraphBuilder . RegisterExternalTexture ( GSystemTextures . WhiteDummy ) ;
FSSDComposeHarmonicsCS : : FParameters * ComposePassParameters = GraphBuilder . AllocParameters < FSSDComposeHarmonicsCS : : FParameters > ( ) ;
// Harmonic 0 doesn't need any reconstruction given it's the highest frequency details.
{
const int32 HarmonicId = 0 ;
ComposePassParameters - > SignalHarmonics [ HarmonicId ] . Textures [ 0 ] = Inputs . Diffuse . Harmonics [ 0 ] ;
ComposePassParameters - > SignalHarmonics [ HarmonicId ] . Textures [ 1 ] = Inputs . Diffuse . Harmonics [ 1 ] ;
ComposePassParameters - > SignalHarmonics [ HarmonicId ] . Textures [ 2 ] = Inputs . Specular . Harmonics [ 0 ] ;
ComposePassParameters - > SignalHarmonics [ HarmonicId ] . Textures [ 3 ] = Inputs . Specular . Harmonics [ 1 ] ;
}
// Reconstruct each harmonic independently
for ( int32 HarmonicId = 1 ; HarmonicId < IScreenSpaceDenoiser : : kMultiPolychromaticPenumbraHarmonics ; HarmonicId + + )
{
int32 Periode = 1 < < HarmonicId ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDMultiPolychromaticPenumbraHarmonicsEffectNames [ HarmonicId ] ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-09-14 09:45:25 -04:00
Settings . SignalProcessing = ESignalProcessing : : PolychromaticPenumbraHarmonic ;
Settings . HarmonicPeriode = Periode ;
2019-10-02 18:47:26 -04:00
Settings . ReconstructionSamples = Periode * Periode ; // TODO(Denoiser): should use preconvolution instead for harmonic 3
2019-09-14 09:45:25 -04:00
Settings . bUseTemporalAccumulation = false ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = nullptr ;
NewHistories [ 0 ] = nullptr ;
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = Inputs . Diffuse . Harmonics [ HarmonicId + 0 ] ;
InputSignal . Textures [ 1 ] = Inputs . Diffuse . Harmonics [ HarmonicId + 1 ] ;
InputSignal . Textures [ 2 ] = Inputs . Specular . Harmonics [ HarmonicId + 0 ] ;
InputSignal . Textures [ 3 ] = Inputs . Specular . Harmonics [ HarmonicId + 1 ] ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-09-14 09:45:25 -04:00
InputSignal , Settings ,
PrevHistories , NewHistories ,
/* out */ & SignalOutput ) ;
ComposePassParameters - > SignalHarmonics [ HarmonicId ] = SignalOutput ;
}
// Denoise the entire integrand signal.
// TODO(Denoiser): this assume all the lights are going into lowest frequency harmonic.
if ( 1 )
{
const int32 HarmonicId = IScreenSpaceDenoiser : : kMultiPolychromaticPenumbraHarmonics - 1 ;
int32 Periode = 1 < < HarmonicId ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDMultiPolychromaticPenumbraIntegralEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-09-14 09:45:25 -04:00
Settings . SignalProcessing = ESignalProcessing : : PolychromaticPenumbraHarmonic ;
Settings . HarmonicPeriode = Periode ;
2019-10-02 18:47:26 -04:00
Settings . ReconstructionSamples = Periode * Periode ; // TODO(Denoiser): should use preconvolution instead for harmonic 3
2019-09-14 09:45:25 -04:00
Settings . bUseTemporalAccumulation = false ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = nullptr ;
NewHistories [ 0 ] = nullptr ;
// TODO(Denoiser): pipeline permutation to be faster.
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = Inputs . Diffuse . Harmonics [ 0 ] ;
InputSignal . Textures [ 1 ] = BlackDummy ;
InputSignal . Textures [ 2 ] = Inputs . Specular . Harmonics [ 0 ] ;
InputSignal . Textures [ 3 ] = BlackDummy ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-09-14 09:45:25 -04:00
InputSignal , Settings ,
PrevHistories , NewHistories ,
/* out */ & ComposePassParameters - > SignalIntegrand ) ;
}
else
{
ComposePassParameters - > SignalIntegrand . Textures [ 0 ] = WhiteDummy ;
ComposePassParameters - > SignalIntegrand . Textures [ 1 ] = BlackDummy ;
ComposePassParameters - > SignalIntegrand . Textures [ 2 ] = WhiteDummy ;
ComposePassParameters - > SignalIntegrand . Textures [ 3 ] = BlackDummy ;
}
// Merges the different harmonics.
FSSDSignalTextures ComposedHarmonics ;
{
2020-09-24 00:43:27 -04:00
FIntPoint BufferExtent = SceneTextures . SceneDepthTexture - > Desc . Extent ;
2019-09-14 09:45:25 -04:00
{
2020-09-24 00:43:27 -04:00
FRDGTextureDesc Desc = FRDGTextureDesc : : Create2D (
2019-09-14 09:45:25 -04:00
BufferExtent ,
PF_FloatRGBA ,
FClearValueBinding : : Black ,
2020-09-24 00:43:27 -04:00
TexCreate_ShaderResource | TexCreate_UAV ) ;
2019-09-14 09:45:25 -04:00
ComposedHarmonics . Textures [ 0 ] = GraphBuilder . CreateTexture ( Desc , TEXT ( " PolychromaticPenumbraComposition0 " ) ) ;
ComposedHarmonics . Textures [ 1 ] = GraphBuilder . CreateTexture ( Desc , TEXT ( " PolychromaticPenumbraComposition1 " ) ) ;
}
ComposePassParameters - > CommonParameters . ViewUniformBuffer = View . ViewUniformBuffer ;
ComposePassParameters - > CommonParameters . SceneTextures = SceneTextures ;
ComposePassParameters - > CommonParameters . ViewportMin = View . ViewRect . Min ;
ComposePassParameters - > CommonParameters . ViewportMax = View . ViewRect . Max ;
2021-09-22 10:01:48 -04:00
ComposePassParameters - > CommonParameters . PublicCommonParameters . DenoiserBufferBilinearUVMinMax = FVector4f (
2019-09-14 09:45:25 -04:00
float ( View . ViewRect . Min . X + 0.5f ) / float ( BufferExtent . X ) ,
float ( View . ViewRect . Min . Y + 0.5f ) / float ( BufferExtent . Y ) ,
float ( View . ViewRect . Max . X - 0.5f ) / float ( BufferExtent . X ) ,
float ( View . ViewRect . Max . Y - 0.5f ) / float ( BufferExtent . Y ) ) ;
ComposePassParameters - > SignalOutput = CreateMultiplexedUAVs ( GraphBuilder , ComposedHarmonics ) ;
{
2020-09-24 00:43:27 -04:00
FRDGTextureDesc DebugDesc = FRDGTextureDesc : : Create2D (
SceneTextures . SceneDepthTexture - > Desc . Extent ,
2019-09-14 09:45:25 -04:00
PF_FloatRGBA ,
FClearValueBinding : : Black ,
2020-09-24 00:43:27 -04:00
TexCreate_ShaderResource | TexCreate_UAV ) ;
2019-09-14 09:45:25 -04:00
FRDGTextureRef DebugTexture = GraphBuilder . CreateTexture ( DebugDesc , TEXT ( " DebugHarmonicComposition " ) ) ;
ComposePassParameters - > DebugOutput = GraphBuilder . CreateUAV ( DebugTexture ) ;
}
TShaderMapRef < FSSDComposeHarmonicsCS > ComputeShader ( View . ShaderMap ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SSD ComposeHarmonics " ) ,
2020-02-06 13:13:41 -05:00
ComputeShader , ComposePassParameters ,
2019-09-14 09:45:25 -04:00
FComputeShaderUtils : : GetGroupCount ( View . ViewRect . Size ( ) , FSSDSpatialAccumulationCS : : kGroupSize ) ) ;
}
FPolychromaticPenumbraOutputs Outputs ;
{
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDMultiPolychromaticPenumbraFinalEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-09-14 09:45:25 -04:00
Settings . SignalProcessing = ESignalProcessing : : PolychromaticPenumbraHarmonic ;
Settings . bEnableReconstruction = false ;
Settings . bUseTemporalAccumulation = CVarShadowTemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > PolychromaticPenumbraHarmonicsHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . PolychromaticPenumbraHarmonicsHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-09-14 09:45:25 -04:00
ComposedHarmonics , Settings ,
PrevHistories , NewHistories ,
/* out */ & SignalOutput ) ;
Outputs . Diffuse = SignalOutput . Textures [ 0 ] ;
Outputs . Specular = SignalOutput . Textures [ 1 ] ;
}
return Outputs ;
}
2019-01-17 17:03:37 -05:00
FReflectionsOutputs DenoiseReflections (
2018-12-18 21:41:17 -05:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
2019-03-07 11:25:32 -05:00
FPreviousViewInfo * PreviousViewInfos ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
2019-01-17 17:03:37 -05:00
const FReflectionsInputs & ReflectionInputs ,
const FReflectionsRayTracingConfig RayTracingConfig ) const override
2018-12-18 21:41:17 -05:00
{
2019-06-11 18:27:07 -04:00
RDG_GPU_STAT_SCOPE ( GraphBuilder , ReflectionsDenoiser ) ;
// Imaginary depth is only used for Nvidia denoiser.
2019-10-02 18:47:26 -04:00
// TODO(Denoiser): permutation to not generate it?
2020-07-06 18:58:26 -04:00
GraphBuilder . RemoveUnusedTextureWarning ( ReflectionInputs . RayImaginaryDepth ) ;
2019-06-11 18:27:07 -04:00
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-01-22 19:17:17 -05:00
FSSDSignalTextures InputSignal ;
2019-03-07 11:25:32 -05:00
InputSignal . Textures [ 0 ] = ReflectionInputs . Color ;
InputSignal . Textures [ 1 ] = ReflectionInputs . RayHitDistance ;
2019-01-22 19:17:17 -05:00
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDReflectionsEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-01-22 19:17:17 -05:00
Settings . SignalProcessing = ESignalProcessing : : Reflections ;
Settings . InputResolutionFraction = RayTracingConfig . ResolutionFraction ;
Settings . ReconstructionSamples = CVarReflectionReconstructionSampleCount . GetValueOnRenderThread ( ) ;
2020-04-29 16:25:02 -04:00
Settings . PreConvolutionCount = CVarReflectionPreConvolutionCount . GetValueOnRenderThread ( ) ;
2019-01-22 19:17:17 -05:00
Settings . bUseTemporalAccumulation = CVarReflectionTemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
2019-06-11 18:27:07 -04:00
Settings . MaxInputSPP = RayTracingConfig . RayCountPerPixel ;
2019-01-22 19:17:17 -05:00
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
2019-03-07 11:25:32 -05:00
PrevHistories [ 0 ] = & PreviousViewInfos - > ReflectionsHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . ReflectionsHistory : nullptr ;
2019-01-22 19:17:17 -05:00
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-01-22 19:17:17 -05:00
InputSignal , Settings ,
2019-03-07 11:25:32 -05:00
PrevHistories ,
NewHistories ,
2019-01-22 19:17:17 -05:00
& SignalOutput ) ;
FReflectionsOutputs ReflectionsOutput ;
2019-03-07 11:25:32 -05:00
ReflectionsOutput . Color = SignalOutput . Textures [ 0 ] ;
2019-01-22 19:17:17 -05:00
return ReflectionsOutput ;
}
2020-03-24 14:23:55 -04:00
FReflectionsOutputs DenoiseWaterReflections (
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FReflectionsInputs & ReflectionInputs ,
const FReflectionsRayTracingConfig RayTracingConfig ) const override
{
RDG_GPU_STAT_SCOPE ( GraphBuilder , ReflectionsDenoiser ) ;
// Imaginary depth is only used for Nvidia denoiser.
// TODO: permutation to not generate it?
if ( ReflectionInputs . RayImaginaryDepth )
GraphBuilder . RemoveUnusedTextureWarning ( ReflectionInputs . RayImaginaryDepth ) ;
2020-03-24 17:21:55 -04:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2020-03-24 14:23:55 -04:00
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = ReflectionInputs . Color ;
InputSignal . Textures [ 1 ] = ReflectionInputs . RayHitDistance ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDWaterReflectionsEffectName ) ;
2020-03-24 14:23:55 -04:00
Settings . FullResViewport = View . ViewRect ;
Settings . SignalProcessing = ESignalProcessing : : Reflections ; // TODO: water reflection to denoise only water pixels
Settings . InputResolutionFraction = RayTracingConfig . ResolutionFraction ;
Settings . ReconstructionSamples = CVarReflectionReconstructionSampleCount . GetValueOnRenderThread ( ) ;
2020-04-29 16:25:02 -04:00
Settings . PreConvolutionCount = CVarReflectionPreConvolutionCount . GetValueOnRenderThread ( ) ;
2020-03-24 14:23:55 -04:00
Settings . bUseTemporalAccumulation = CVarReflectionTemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . MaxInputSPP = RayTracingConfig . RayCountPerPixel ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > WaterReflectionsHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . WaterReflectionsHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
GraphBuilder , View , SceneTextures ,
2020-03-25 07:59:22 -04:00
ViewInfoPooledRenderTargets ,
2020-03-24 14:23:55 -04:00
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
FReflectionsOutputs ReflectionsOutput ;
ReflectionsOutput . Color = SignalOutput . Textures [ 0 ] ;
return ReflectionsOutput ;
}
2019-01-22 19:17:17 -05:00
FAmbientOcclusionOutputs DenoiseAmbientOcclusion (
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
2019-03-07 11:25:32 -05:00
FPreviousViewInfo * PreviousViewInfos ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
2019-01-22 19:17:17 -05:00
const FAmbientOcclusionInputs & ReflectionInputs ,
const FAmbientOcclusionRayTracingConfig RayTracingConfig ) const override
{
2019-06-11 18:27:07 -04:00
RDG_GPU_STAT_SCOPE ( GraphBuilder , AmbientOcclusionDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-01-22 19:17:17 -05:00
FSSDSignalTextures InputSignal ;
2019-03-07 11:25:32 -05:00
InputSignal . Textures [ 0 ] = ReflectionInputs . Mask ;
InputSignal . Textures [ 1 ] = ReflectionInputs . RayHitDistance ;
2019-01-22 19:17:17 -05:00
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDAmbientOcclusionEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-01-22 19:17:17 -05:00
Settings . SignalProcessing = ESignalProcessing : : AmbientOcclusion ;
Settings . InputResolutionFraction = RayTracingConfig . ResolutionFraction ;
2020-04-29 16:25:02 -04:00
Settings . ReconstructionSamples = FMath : : Clamp ( CVarAOReconstructionSampleCount . GetValueOnRenderThread ( ) , 1 , kStackowiakMaxSampleCountPerSet ) ;
2020-09-24 00:43:27 -04:00
Settings . PreConvolutionCount = CVarAOPreConvolutionCount . GetValueOnRenderThread ( ) ;
Settings . KernelSpreadFactor = CVarAOKernelSpreadFactor . GetValueOnRenderThread ( ) ;
2019-01-22 19:17:17 -05:00
Settings . bUseTemporalAccumulation = CVarAOTemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . HistoryConvolutionSampleCount = CVarAOHistoryConvolutionSampleCount . GetValueOnRenderThread ( ) ;
2019-02-14 14:23:27 -05:00
Settings . HistoryConvolutionKernelSpreadFactor = CVarAOHistoryConvolutionKernelSpreadFactor . GetValueOnRenderThread ( ) ;
2019-06-11 18:27:07 -04:00
Settings . MaxInputSPP = RayTracingConfig . RayCountPerPixel ;
2019-01-22 19:17:17 -05:00
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
2019-03-07 11:25:32 -05:00
PrevHistories [ 0 ] = & PreviousViewInfos - > AmbientOcclusionHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . AmbientOcclusionHistory : nullptr ;
2019-01-22 19:17:17 -05:00
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-01-22 19:17:17 -05:00
InputSignal , Settings ,
2019-03-07 11:25:32 -05:00
PrevHistories ,
NewHistories ,
2019-01-22 19:17:17 -05:00
& SignalOutput ) ;
FAmbientOcclusionOutputs AmbientOcclusionOutput ;
2019-03-07 11:25:32 -05:00
AmbientOcclusionOutput . AmbientOcclusionMask = SignalOutput . Textures [ 0 ] ;
2019-01-22 19:17:17 -05:00
return AmbientOcclusionOutput ;
2018-12-18 21:41:17 -05:00
}
2020-07-06 18:58:26 -04:00
FSSDSignalTextures DenoiseDiffuseIndirect (
2019-02-14 14:23:27 -05:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
2019-03-07 11:25:32 -05:00
FPreviousViewInfo * PreviousViewInfos ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
const FDiffuseIndirectInputs & Inputs ,
2019-02-14 14:23:27 -05:00
const FAmbientOcclusionRayTracingConfig Config ) const override
{
2019-06-11 18:27:07 -04:00
RDG_GPU_STAT_SCOPE ( GraphBuilder , DiffuseIndirectDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-02-14 14:23:27 -05:00
FSSDSignalTextures InputSignal ;
2019-03-07 11:25:32 -05:00
InputSignal . Textures [ 0 ] = Inputs . Color ;
InputSignal . Textures [ 1 ] = Inputs . RayHitDistance ;
2019-02-14 14:23:27 -05:00
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDDiffuseIndirectEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-06-11 18:27:07 -04:00
Settings . SignalProcessing = ESignalProcessing : : DiffuseAndAmbientOcclusion ;
2019-02-14 14:23:27 -05:00
Settings . InputResolutionFraction = Config . ResolutionFraction ;
2020-04-29 16:25:02 -04:00
Settings . ReconstructionSamples = FMath : : Clamp ( CVarGIReconstructionSampleCount . GetValueOnRenderThread ( ) , 1 , kStackowiakMaxSampleCountPerSet ) ;
2019-06-11 18:27:07 -04:00
Settings . PreConvolutionCount = CVarGIPreConvolutionCount . GetValueOnRenderThread ( ) ;
2019-02-14 14:23:27 -05:00
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . HistoryConvolutionSampleCount = CVarGIHistoryConvolutionSampleCount . GetValueOnRenderThread ( ) ;
Settings . HistoryConvolutionKernelSpreadFactor = CVarGIHistoryConvolutionKernelSpreadFactor . GetValueOnRenderThread ( ) ;
2019-06-11 18:27:07 -04:00
Settings . MaxInputSPP = Config . RayCountPerPixel ;
2019-02-14 14:23:27 -05:00
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
2019-06-11 18:27:07 -04:00
PrevHistories [ 0 ] = & PreviousViewInfos - > DiffuseIndirectHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . DiffuseIndirectHistory : nullptr ;
2019-03-07 11:25:32 -05:00
2019-02-14 14:23:27 -05:00
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-02-14 14:23:27 -05:00
InputSignal , Settings ,
2019-03-07 11:25:32 -05:00
PrevHistories ,
NewHistories ,
2019-02-14 14:23:27 -05:00
& SignalOutput ) ;
2020-07-06 18:58:26 -04:00
return SignalOutput ;
2019-03-07 11:25:32 -05:00
}
2019-06-11 18:27:07 -04:00
FDiffuseIndirectOutputs DenoiseSkyLight (
2019-03-07 11:25:32 -05:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
2019-06-11 18:27:07 -04:00
const FSceneTextureParameters & SceneTextures ,
const FDiffuseIndirectInputs & Inputs ,
2019-03-07 11:25:32 -05:00
const FAmbientOcclusionRayTracingConfig Config ) const override
{
2019-06-11 18:27:07 -04:00
RDG_GPU_STAT_SCOPE ( GraphBuilder , DiffuseIndirectDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-03-07 11:25:32 -05:00
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = Inputs . Color ;
InputSignal . Textures [ 1 ] = Inputs . RayHitDistance ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDSkyLightEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-06-11 18:27:07 -04:00
Settings . SignalProcessing = ESignalProcessing : : DiffuseAndAmbientOcclusion ;
2019-03-07 11:25:32 -05:00
Settings . InputResolutionFraction = Config . ResolutionFraction ;
2020-04-29 16:25:02 -04:00
Settings . ReconstructionSamples = FMath : : Clamp ( CVarGIReconstructionSampleCount . GetValueOnRenderThread ( ) , 1 , kStackowiakMaxSampleCountPerSet ) ;
2019-06-11 18:27:07 -04:00
Settings . PreConvolutionCount = CVarGIPreConvolutionCount . GetValueOnRenderThread ( ) ;
2019-03-07 11:25:32 -05:00
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . HistoryConvolutionSampleCount = CVarGIHistoryConvolutionSampleCount . GetValueOnRenderThread ( ) ;
Settings . HistoryConvolutionKernelSpreadFactor = CVarGIHistoryConvolutionKernelSpreadFactor . GetValueOnRenderThread ( ) ;
2019-06-11 18:27:07 -04:00
Settings . MaxInputSPP = Config . RayCountPerPixel ;
2019-03-07 11:25:32 -05:00
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
2019-03-07 11:25:32 -05:00
PrevHistories [ 0 ] = & PreviousViewInfos - > SkyLightHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . SkyLightHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
FDiffuseIndirectOutputs GlobalIlluminationOutputs ;
GlobalIlluminationOutputs . Color = SignalOutput . Textures [ 0 ] ;
return GlobalIlluminationOutputs ;
}
FDiffuseIndirectOutputs DenoiseReflectedSkyLight (
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FDiffuseIndirectInputs & Inputs ,
const FAmbientOcclusionRayTracingConfig Config ) const override
{
RDG_GPU_STAT_SCOPE ( GraphBuilder , DiffuseIndirectDenoiser ) ;
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupImaginaryReflectionViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = Inputs . Color ;
InputSignal . Textures [ 1 ] = Inputs . RayHitDistance ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDReflectedSkyLightEffectName ) ;
2020-01-24 18:07:01 -05:00
Settings . FullResViewport = View . ViewRect ;
Settings . SignalProcessing = ESignalProcessing : : DiffuseAndAmbientOcclusion ;
Settings . InputResolutionFraction = Config . ResolutionFraction ;
2020-04-29 16:25:02 -04:00
Settings . ReconstructionSamples = FMath : : Clamp ( CVarGIReconstructionSampleCount . GetValueOnRenderThread ( ) , 1 , kStackowiakMaxSampleCountPerSet ) ;
2020-01-24 18:07:01 -05:00
Settings . PreConvolutionCount = CVarGIPreConvolutionCount . GetValueOnRenderThread ( ) ;
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . HistoryConvolutionSampleCount = CVarGIHistoryConvolutionSampleCount . GetValueOnRenderThread ( ) ;
Settings . HistoryConvolutionKernelSpreadFactor = CVarGIHistoryConvolutionKernelSpreadFactor . GetValueOnRenderThread ( ) ;
Settings . MaxInputSPP = Config . RayCountPerPixel ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > ReflectedSkyLightHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . ReflectedSkyLightHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-03-07 11:25:32 -05:00
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
2019-06-11 18:27:07 -04:00
FDiffuseIndirectOutputs GlobalIlluminationOutputs ;
2019-03-07 11:25:32 -05:00
GlobalIlluminationOutputs . Color = SignalOutput . Textures [ 0 ] ;
2019-02-14 14:23:27 -05:00
return GlobalIlluminationOutputs ;
}
2020-07-06 18:58:26 -04:00
FSSDSignalTextures DenoiseDiffuseIndirectHarmonic (
2019-09-14 09:45:25 -04:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FDiffuseIndirectHarmonic & Inputs ,
2020-07-06 18:58:26 -04:00
const HybridIndirectLighting : : FCommonParameters & CommonDiffuseParameters ) const override
2019-09-14 09:45:25 -04:00
{
RDG_GPU_STAT_SCOPE ( GraphBuilder , DiffuseIndirectDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-09-14 09:45:25 -04:00
FSSDSignalTextures InputSignal ;
for ( int32 i = 0 ; i < IScreenSpaceDenoiser : : kSphericalHarmonicTextureCount ; i + + )
InputSignal . Textures [ i ] = Inputs . SphericalHarmonic [ i ] ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDDiffuseIndirectHarmonicEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
2019-09-14 09:45:25 -04:00
Settings . SignalProcessing = ESignalProcessing : : DiffuseSphericalHarmonic ;
2020-07-06 18:58:26 -04:00
Settings . InputResolutionFraction = 1.0f / float ( CommonDiffuseParameters . DownscaleFactor ) ;
2019-09-14 09:45:25 -04:00
Settings . ReconstructionSamples = CVarGIReconstructionSampleCount . GetValueOnRenderThread ( ) ;
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
2020-07-06 18:58:26 -04:00
Settings . MaxInputSPP = CommonDiffuseParameters . RayCountPerPixel ;
Settings . DenoisingResolutionFraction = 1.0f / float ( CommonDiffuseParameters . DownscaleFactor ) ;
2019-09-14 09:45:25 -04:00
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > DiffuseIndirectHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . DiffuseIndirectHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-09-14 09:45:25 -04:00
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
2020-07-06 18:58:26 -04:00
return SignalOutput ;
2019-09-14 09:45:25 -04:00
}
2019-10-01 13:03:04 -04:00
2019-11-20 19:33:19 -05:00
bool SupportsScreenSpaceDiffuseIndirectDenoiser ( EShaderPlatform Platform ) const override
{
return ShouldCompileSignalPipeline ( ESignalProcessing : : ScreenSpaceDiffuseIndirect , Platform ) ;
}
2020-07-06 18:58:26 -04:00
FSSDSignalTextures DenoiseScreenSpaceDiffuseIndirect (
2019-10-01 13:03:04 -04:00
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FDiffuseIndirectInputs & Inputs ,
2019-11-20 19:33:19 -05:00
const FAmbientOcclusionRayTracingConfig Config ) const override
2019-10-01 13:03:04 -04:00
{
RDG_GPU_STAT_SCOPE ( GraphBuilder , DiffuseIndirectDenoiser ) ;
2020-01-24 18:07:01 -05:00
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
2019-10-01 13:03:04 -04:00
FSSDSignalTextures InputSignal ;
InputSignal . Textures [ 0 ] = Inputs . Color ;
InputSignal . Textures [ 1 ] = Inputs . AmbientOcclusionMask ;
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDScreenSpaceDiffuseIndirectEffectName ) ;
2019-10-01 13:03:04 -04:00
Settings . FullResViewport = View . ViewRect ;
Settings . SignalProcessing = ESignalProcessing : : ScreenSpaceDiffuseIndirect ;
Settings . InputResolutionFraction = Config . ResolutionFraction ;
Settings . DenoisingResolutionFraction = Config . ResolutionFraction ;
Settings . ReconstructionSamples = 8 ;
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . MaxInputSPP = Config . RayCountPerPixel ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > DiffuseIndirectHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . DiffuseIndirectHistory : nullptr ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
2020-01-24 18:07:01 -05:00
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
2019-10-01 13:03:04 -04:00
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
2020-07-06 18:58:26 -04:00
return SignalOutput ;
2019-10-01 13:03:04 -04:00
}
2019-01-07 17:22:05 -05:00
} ; // class FDefaultScreenSpaceDenoiser
2018-12-18 21:41:17 -05:00
2020-07-06 18:58:26 -04:00
// static
FSSDSignalTextures IScreenSpaceDenoiser : : DenoiseIndirectProbeHierarchy (
FRDGBuilder & GraphBuilder ,
const FViewInfo & View ,
FPreviousViewInfo * PreviousViewInfos ,
const FSceneTextureParameters & SceneTextures ,
const FSSDSignalTextures & InputSignal ,
FRDGTextureRef CompressedDepthTexture ,
FRDGTextureRef CompressedShadingModelTexture )
{
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
DECLARE_FSSD_CONSTANT_PIXEL_DENSITY_SETTINGS ( SSDIndirectProbeHierarchyEffectName ) ;
2020-07-06 18:58:26 -04:00
Settings . FullResViewport = View . ViewRect ;
Settings . SignalProcessing = ESignalProcessing : : IndirectProbeHierarchy ;
Settings . bEnableReconstruction = false ;
Settings . bUseTemporalAccumulation = CVarGITemporalAccumulation . GetValueOnRenderThread ( ) ! = 0 ;
Settings . MaxInputSPP = 8 ;
Settings . CompressedDepthTexture = CompressedDepthTexture ;
Settings . CompressedShadingModelTexture = CompressedShadingModelTexture ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > PrevHistories ;
TStaticArray < FScreenSpaceDenoiserHistory * , IScreenSpaceDenoiser : : kMaxBatchSize > NewHistories ;
PrevHistories [ 0 ] = & PreviousViewInfos - > DiffuseIndirectHistory ;
NewHistories [ 0 ] = View . ViewState ? & View . ViewState - > PrevFrameViewInfo . DiffuseIndirectHistory : nullptr ;
FViewInfoPooledRenderTargets ViewInfoPooledRenderTargets ;
SetupSceneViewInfoPooledRenderTargets ( View , & ViewInfoPooledRenderTargets ) ;
FSSDSignalTextures SignalOutput ;
DenoiseSignalAtConstantPixelDensity (
GraphBuilder , View , SceneTextures , ViewInfoPooledRenderTargets ,
InputSignal , Settings ,
PrevHistories ,
NewHistories ,
& SignalOutput ) ;
return SignalOutput ;
}
2018-12-18 21:41:17 -05:00
// static
const IScreenSpaceDenoiser * IScreenSpaceDenoiser : : GetDefaultDenoiser ( )
{
static IScreenSpaceDenoiser * GDefaultDenoiser = new FDefaultScreenSpaceDenoiser ;
return GDefaultDenoiser ;
}
2020-04-02 12:24:48 -04:00
int GetReflectionsDenoiserMode ( )
{
return CVarUseReflectionDenoiser . GetValueOnRenderThread ( ) ;
2020-07-06 18:58:26 -04:00
}
// static
IScreenSpaceDenoiser : : EMode IScreenSpaceDenoiser : : GetDenoiserMode ( const TAutoConsoleVariable < int32 > & CVar )
{
int32 CVarSettings = CVar . GetValueOnRenderThread ( ) ;
if ( CVarSettings = = 0 )
{
return EMode : : Disabled ;
}
else if ( CVarSettings = = 1 | | GScreenSpaceDenoiser = = GetDefaultDenoiser ( ) )
{
return EMode : : DefaultDenoiser ;
}
return EMode : : ThirdPartyDenoiser ;
}