2020-07-06 18:58:26 -04:00
// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VirtualShadowMap . h :
= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = */
# include "VirtualShadowMapArray.h"
2022-01-27 15:49:31 -05:00
# include "VirtualShadowMapVisualizationData.h"
2022-04-12 23:44:41 -04:00
# include "VirtualShadowMapDefinitions.h"
2020-07-06 18:58:26 -04:00
# include "../BasePassRendering.h"
2022-01-27 15:49:31 -05:00
# include "../ScreenPass.h"
2021-05-20 19:32:20 -04:00
# include "Components/LightComponent.h"
2020-07-06 18:58:26 -04:00
# include "RendererModule.h"
2020-10-09 22:42:26 -04:00
# include "Rendering/NaniteResources.h"
2020-07-06 18:58:26 -04:00
# include "ShaderPrint.h"
# include "ShaderPrintParameters.h"
# include "VirtualShadowMapCacheManager.h"
# include "VirtualShadowMapClipmap.h"
2021-01-20 08:46:15 -04:00
# include "ComponentRecreateRenderStateContext.h"
2021-03-05 05:14:13 -04:00
# include "HairStrands/HairStrandsData.h"
2021-12-03 01:31:24 -05:00
# include "SceneTextureReductions.h"
2021-12-03 16:08:27 -05:00
# include "GPUMessaging.h"
2021-12-03 16:38:33 -05:00
# include "InstanceCulling/InstanceCullingMergedContext.h"
2020-07-06 18:58:26 -04:00
2022-04-07 18:36:13 -04:00
# define DEBUG_ALLOW_STATIC_SEPARATE_WITHOUT_CACHING 0
2021-01-19 10:17:05 -04:00
IMPLEMENT_STATIC_UNIFORM_BUFFER_SLOT ( VirtualShadowMapUbSlot ) ;
2020-07-06 18:58:26 -04:00
2021-03-08 23:14:54 -04:00
IMPLEMENT_STATIC_UNIFORM_BUFFER_STRUCT ( FVirtualShadowMapUniformParameters , " VirtualShadowMap " , VirtualShadowMapUbSlot ) ;
2020-07-06 18:58:26 -04:00
struct FShadowMapCacheData
{
2021-07-28 17:00:37 -04:00
int32 PrevVirtualShadowMapId = INDEX_NONE ;
2020-07-06 18:58:26 -04:00
} ;
struct FPhysicalPageMetaData
2021-07-28 17:00:37 -04:00
{
uint32 Flags ;
2020-07-06 18:58:26 -04:00
uint32 Age ;
2022-03-23 15:54:41 -04:00
uint32 VirtualPageOffset ;
2020-07-06 18:58:26 -04:00
} ;
2021-01-22 04:59:02 -04:00
int32 GEnableVirtualShadowMaps = 0 ;
FAutoConsoleVariableRef CVarEnableVirtualShadowMaps (
2021-02-18 13:44:36 -04:00
TEXT ( " r.Shadow.Virtual.Enable " ) ,
2021-01-22 04:59:02 -04:00
GEnableVirtualShadowMaps ,
2020-08-25 10:06:54 -04:00
TEXT ( " Enable Virtual Shadow Maps. " ) ,
2021-05-17 22:54:18 -04:00
FConsoleVariableDelegate : : CreateLambda ( [ ] ( IConsoleVariable * InVariable )
{
// Needed because the depth state changes with method (so cached draw commands must be re-created) see SetStateForShadowDepth
FGlobalComponentRecreateRenderStateContext Context ;
} ) ,
2021-01-22 04:59:02 -04:00
ECVF_Scalability | ECVF_RenderThreadSafe
2020-07-06 18:58:26 -04:00
) ;
2021-03-23 21:23:57 -04:00
TAutoConsoleVariable < int32 > CVarMaxPhysicalPages (
TEXT ( " r.Shadow.Virtual.MaxPhysicalPages " ) ,
2048 ,
TEXT ( " Maximum number of physical pages in the pool. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2021-03-23 21:23:57 -04:00
) ;
2020-07-06 18:58:26 -04:00
2021-12-03 03:01:35 -05:00
TAutoConsoleVariable < int32 > CVarCacheStaticSeparate (
TEXT ( " r.Shadow.Virtual.Cache.StaticSeparate " ) ,
2022-04-07 18:36:13 -04:00
1 ,
2021-12-02 23:08:02 -05:00
TEXT ( " When enabled, caches static objects in separate pages from dynamic objects. \n " )
TEXT ( " This can improve performance in largely static scenes, but doubles the memory cost of the physical page pool. " ) ,
ECVF_RenderThreadSafe
) ;
2020-07-06 18:58:26 -04:00
static TAutoConsoleVariable < int32 > CVarShowStats (
2021-02-18 13:44:36 -04:00
TEXT ( " r.Shadow.Virtual.ShowStats " ) ,
2020-07-06 18:58:26 -04:00
0 ,
TEXT ( " ShowStats, also toggle shaderprint one! " ) ,
ECVF_RenderThreadSafe
) ;
2021-04-13 16:54:28 -04:00
static TAutoConsoleVariable < float > CVarResolutionLodBiasLocal (
TEXT ( " r.Shadow.Virtual.ResolutionLodBiasLocal " ) ,
0.0f ,
TEXT ( " Bias applied to LOD calculations for local lights. -1.0 doubles resolution, 1.0 halves it and so on. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2020-07-06 18:58:26 -04:00
) ;
2021-12-03 16:23:04 -05:00
static TAutoConsoleVariable < float > CVarPageDilationBorderSizeDirectional (
TEXT ( " r.Shadow.Virtual.PageDilationBorderSizeDirectional " ) ,
2021-01-08 22:26:52 -04:00
0.05f ,
2021-12-03 16:23:04 -05:00
TEXT ( " If a screen pixel falls within this fraction of a page border for directional lights, the adacent page will also be mapped. " )
TEXT ( " Higher values can reduce page misses at screen edges or disocclusions, but increase total page counts. " ) ,
ECVF_RenderThreadSafe
) ;
static TAutoConsoleVariable < float > CVarPageDilationBorderSizeLocal (
TEXT ( " r.Shadow.Virtual.PageDilationBorderSizeLocal " ) ,
0.05f ,
TEXT ( " If a screen pixel falls within this fraction of a page border for local lights, the adacent page will also be mapped. " )
2021-01-08 22:26:52 -04:00
TEXT ( " Higher values can reduce page misses at screen edges or disocclusions, but increase total page counts. " ) ,
2020-07-06 18:58:26 -04:00
ECVF_RenderThreadSafe
) ;
2021-02-18 21:09:05 -04:00
TAutoConsoleVariable < int32 > CVarMarkPixelPages (
TEXT ( " r.Shadow.Virtual.MarkPixelPages " ) ,
1 ,
TEXT ( " Marks pages in virtual shadow maps based on depth buffer pixels. Ability to disable is primarily for profiling and debugging. " ) ,
ECVF_RenderThreadSafe
) ;
2021-03-08 23:14:54 -04:00
TAutoConsoleVariable < int32 > CVarMarkCoarsePagesDirectional (
TEXT ( " r.Shadow.Virtual.MarkCoarsePagesDirectional " ) ,
1 ,
TEXT ( " Marks coarse pages in directional light virtual shadow maps so that low resolution data is available everywhere. " )
TEXT ( " Ability to disable is primarily for profiling and debugging. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2021-02-18 21:09:05 -04:00
) ;
2021-03-08 23:14:54 -04:00
TAutoConsoleVariable < int32 > CVarMarkCoarsePagesLocal (
TEXT ( " r.Shadow.Virtual.MarkCoarsePagesLocal " ) ,
1 ,
TEXT ( " Marks coarse pages in local light virtual shadow maps so that low resolution data is available everywhere. " )
TEXT ( " Ability to disable is primarily for profiling and debugging. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2021-02-18 21:09:05 -04:00
) ;
2021-08-07 15:19:34 -04:00
TAutoConsoleVariable < int32 > CVarCoarsePagesIncludeNonNanite (
2022-02-22 13:52:40 -05:00
TEXT ( " r.Shadow.Virtual.NonNanite.IncludeInCoarsePages " ) ,
2021-08-07 15:19:34 -04:00
1 ,
2022-02-22 13:52:40 -05:00
TEXT ( " Include non-Nanite geometry in coarse pages. " )
TEXT ( " Rendering non-Nanite geometry into large coarse pages can be expensive; disabling this can be a significant performance win. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2021-08-07 15:19:34 -04:00
) ;
2021-01-20 08:46:15 -04:00
static TAutoConsoleVariable < int32 > CVarShowClipmapStats (
2021-02-18 13:44:36 -04:00
TEXT ( " r.Shadow.Virtual.ShowClipmapStats " ) ,
2021-01-20 08:46:15 -04:00
- 1 ,
2021-09-01 13:13:01 -04:00
TEXT ( " Set to the number of clipmap you want to show stats for (-1 == off) " ) ,
ECVF_RenderThreadSafe
) ;
static TAutoConsoleVariable < int32 > CVarCullBackfacingPixels (
TEXT ( " r.Shadow.Virtual.CullBackfacingPixels " ) ,
1 ,
TEXT ( " When enabled does not generate shadow data for pixels that are backfacing to the light. " ) ,
2021-01-20 08:46:15 -04:00
ECVF_RenderThreadSafe
) ;
2021-04-20 10:31:17 -04:00
int32 GEnableNonNaniteVSM = 1 ;
2021-02-25 05:03:27 -04:00
FAutoConsoleVariableRef CVarEnableNonNaniteVSM (
TEXT ( " r.Shadow.Virtual.NonNaniteVSM " ) ,
GEnableNonNaniteVSM ,
2021-04-20 10:31:17 -04:00
TEXT ( " Enable support for non-nanite Virtual Shadow Maps. " )
2021-02-25 05:03:27 -04:00
TEXT ( " Read-only and to be set in a config file (requires restart). " ) ,
ECVF_RenderThreadSafe | ECVF_ReadOnly
) ;
2020-12-16 17:57:13 -04:00
2021-12-02 18:25:13 -05:00
static TAutoConsoleVariable < int32 > CVarNonNaniteVsmUseHzb (
TEXT ( " r.Shadow.Virtual.NonNanite.UseHZB " ) ,
2 ,
TEXT ( " Cull Non-Nanite instances using HZB. If set to 2, attempt to use Nanite-HZB from the current frame. " ) ,
ECVF_RenderThreadSafe ) ;
2021-12-02 23:12:09 -05:00
TAutoConsoleVariable < int32 > CVarInitializePhysicalUsingIndirect (
TEXT ( " r.Shadow.Virtual.InitPhysicalUsingIndirect " ) ,
1 ,
TEXT ( " . " ) ,
ECVF_RenderThreadSafe
) ;
TAutoConsoleVariable < int32 > CVarMergePhysicalUsingIndirect (
TEXT ( " r.Shadow.Virtual.MergePhysicalUsingIndirect " ) ,
1 ,
TEXT ( " . " ) ,
ECVF_RenderThreadSafe
) ;
2021-12-03 16:23:04 -05:00
TAutoConsoleVariable < int32 > CVarVirtualShadowOnePassProjectionMaxLights (
TEXT ( " r.Shadow.Virtual.OnePassProjection.MaxLightsPerPixel " ) ,
16 ,
TEXT ( " Maximum lights per pixel that get full filtering when using one pass projection and clustered shading. " )
TEXT ( " Generally set to 8 (32bpp), 16 (64bpp) or 32 (128bpp). Lower values require less transient VRAM during the lighting pass. " ) ,
2021-12-07 16:24:58 -05:00
ECVF_Scalability | ECVF_RenderThreadSafe
2021-12-03 16:23:04 -05:00
) ;
2021-12-03 16:38:33 -05:00
TAutoConsoleVariable < int32 > CVarDoNonNaniteBatching (
TEXT ( " r.Shadow.Virtual.NonNanite.Batch " ) ,
1 ,
TEXT ( " . " ) ,
ECVF_RenderThreadSafe
) ;
2022-01-27 15:49:31 -05:00
2021-05-20 19:32:20 -04:00
# if !UE_BUILD_SHIPPING
bool GDumpVSMLightNames = false ;
void DumpVSMLightNames ( )
{
ENQUEUE_RENDER_COMMAND ( DumpVSMLightNames ) (
[ ] ( FRHICommandList & RHICmdList )
{
GDumpVSMLightNames = true ;
} ) ;
}
FAutoConsoleCommand CmdDumpVSMLightNames (
2022-01-27 15:49:31 -05:00
TEXT ( " r.Shadow.Virtual.Visualize.DumpLightNames " ) ,
TEXT ( " Dump light names with virtual shadow maps (for developer use in non-shiping builds) " ) ,
2021-05-20 19:32:20 -04:00
FConsoleCommandDelegate : : CreateStatic ( DumpVSMLightNames )
) ;
2022-01-27 15:49:31 -05:00
FString GVirtualShadowMapVisualizeLightName ;
FAutoConsoleVariableRef CVarVisualizeLightName (
TEXT ( " r.Shadow.Virtual.Visualize.LightName " ) ,
GVirtualShadowMapVisualizeLightName ,
TEXT ( " Sets the name of a specific light to visualize (for developer use in non-shiping builds) " ) ,
2021-05-20 19:32:20 -04:00
ECVF_RenderThreadSafe
2021-12-02 23:12:09 -05:00
) ;
2021-05-20 19:32:20 -04:00
2022-01-27 15:49:31 -05:00
static TAutoConsoleVariable < int32 > CVarVisualizeLayout (
TEXT ( " r.Shadow.Virtual.Visualize.Layout " ) ,
2021-05-20 19:32:20 -04:00
0 ,
2022-01-27 15:49:31 -05:00
TEXT ( " Overlay layout when virtual shadow map visualization is enabled: \n " )
TEXT ( " 0: Full screen \n " )
TEXT ( " 1: Thumbnail \n " )
TEXT ( " 2: Split screen " ) ,
2021-12-02 23:12:09 -05:00
ECVF_RenderThreadSafe
) ;
TAutoConsoleVariable < int32 > CVarDebugSkipMergePhysical (
TEXT ( " r.Shadow.Virtual.DebugSkipMergePhysical " ) ,
0 ,
TEXT ( " " ) ,
ECVF_RenderThreadSafe
) ;
TAutoConsoleVariable < int32 > CVarDebugSkipDynamicPageInvalidation (
2022-02-02 08:19:08 -05:00
TEXT ( " r.Shadow.Virtual.Cache.DebugSkipDynamicPageInvalidation " ) ,
2021-12-02 23:12:09 -05:00
0 ,
2022-02-02 08:19:08 -05:00
TEXT ( " Skip invalidation of cached pages when geometry moves for debugging purposes. This will create obvious visual artifacts when disabled. \n " ) ,
2021-05-20 19:32:20 -04:00
ECVF_RenderThreadSafe
) ;
# endif // !UE_BUILD_SHIPPING
2022-01-20 04:59:27 -05:00
static TAutoConsoleVariable < float > CVarMaxMaterialPositionInvalidationRange (
2022-02-02 08:19:08 -05:00
TEXT ( " r.Shadow.Virtual.Cache.MaxMaterialPositionInvalidationRange " ) ,
2022-01-20 04:59:27 -05:00
- 1.0f ,
TEXT ( " Beyond this distance in world units, material position effects (e.g., WPO or PDO) cease to cause VSM invalidations. \n " )
TEXT ( " This can be used to tune performance by reducing re-draw overhead, but causes some artifacts. \n " )
TEXT ( " < 0 <=> infinite (default) " ) ,
ECVF_Scalability | ECVF_RenderThreadSafe
) ;
2022-03-23 15:54:41 -04:00
static TAutoConsoleVariable < int32 > CVarShadowsVirtualUseHZB (
TEXT ( " r.Shadow.Virtual.UseHZB " ) ,
2 ,
TEXT ( " Enables HZB for (Nanite) Virtual Shadow Maps - Non-Nanite unfortunately has a separate flag with different semantics: r.Shadow.Virtual.NonNanite.UseHZB. \n " )
TEXT ( " 0 - No HZB occlusion culling \n " )
TEXT ( " 1 - Approximate Single-pass HZB occlusion culling (using previous frame HZB) \n " )
TEXT ( " 2 - Two-pass occlusion culling (default). " ) ,
ECVF_RenderThreadSafe ) ;
2022-04-14 19:28:00 -04:00
static TAutoConsoleVariable < int32 > CVarShadowsVirtualForceFullHZBUpdate (
TEXT ( " r.Shadow.Virtual.ForceFullHZBUpdate " ) ,
0 ,
TEXT ( " Forces full HZB update every frame rather than just dirty pages. \n " ) ,
ECVF_RenderThreadSafe ) ;
2021-02-18 21:09:05 -04:00
FMatrix CalcTranslatedWorldToShadowUVMatrix (
const FMatrix & TranslatedWorldToShadowView ,
const FMatrix & ViewToClip )
{
FMatrix TranslatedWorldToShadowClip = TranslatedWorldToShadowView * ViewToClip ;
FMatrix ScaleAndBiasToSmUV = FScaleMatrix ( FVector ( 0.5f , - 0.5f , 1.0f ) ) * FTranslationMatrix ( FVector ( 0.5f , 0.5f , 0.0f ) ) ;
FMatrix TranslatedWorldToShadowUv = TranslatedWorldToShadowClip * ScaleAndBiasToSmUV ;
return TranslatedWorldToShadowUv ;
}
2020-12-16 17:57:13 -04:00
FMatrix CalcTranslatedWorldToShadowUVNormalMatrix (
const FMatrix & TranslatedWorldToShadowView ,
const FMatrix & ViewToClip )
{
return CalcTranslatedWorldToShadowUVMatrix ( TranslatedWorldToShadowView , ViewToClip ) . GetTransposed ( ) . Inverse ( ) ;
2020-07-06 18:58:26 -04:00
}
2022-03-23 15:54:41 -04:00
template < typename ShaderType >
static bool SetStatsArgsAndPermutation ( FRDGBuilder & GraphBuilder , FRDGBufferRef StatsBufferRDG , typename ShaderType : : FParameters * OutPassParameters , typename ShaderType : : FPermutationDomain & OutPermutationVector )
{
bool bGenerateStats = StatsBufferRDG ! = nullptr ;
if ( bGenerateStats )
{
OutPassParameters - > OutStatsBuffer = GraphBuilder . CreateUAV ( StatsBufferRDG ) ;
}
;
OutPermutationVector . template Set < typename ShaderType : : FGenerateStatsDim > ( bGenerateStats ) ;
return bGenerateStats ;
}
2020-07-06 18:58:26 -04:00
FVirtualShadowMapArray : : FVirtualShadowMapArray ( )
{
}
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FCacheDataParameters , )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FShadowMapCacheData > , ShadowMapCacheData )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PrevPageFlags )
2021-01-21 19:15:53 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PrevPageTable )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPhysicalPageMetaData > , PrevPhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PrevDynamicCasterPageFlags )
2022-02-02 02:18:54 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PrevProjectionData )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
2021-03-23 21:23:57 -04:00
static void SetCacheDataShaderParameters ( FRDGBuilder & GraphBuilder , const TArray < FVirtualShadowMap * , SceneRenderingAllocator > & ShadowMaps , FVirtualShadowMapArrayCacheManager * CacheManager , FCacheDataParameters & CacheDataParameters )
2020-07-06 18:58:26 -04:00
{
TArray < FShadowMapCacheData , SceneRenderingAllocator > ShadowMapCacheData ;
ShadowMapCacheData . AddDefaulted ( ShadowMaps . Num ( ) ) ;
for ( int32 SmIndex = 0 ; SmIndex < ShadowMaps . Num ( ) ; + + SmIndex )
{
TSharedPtr < FVirtualShadowMapCacheEntry > VirtualShadowMapCacheEntry = ShadowMaps [ SmIndex ] - > VirtualShadowMapCacheEntry ;
if ( VirtualShadowMapCacheEntry ! = nullptr & & VirtualShadowMapCacheEntry - > IsValid ( ) )
{
2021-07-28 17:00:37 -04:00
ShadowMapCacheData [ SmIndex ] . PrevVirtualShadowMapId = VirtualShadowMapCacheEntry - > PrevVirtualShadowMapId ;
2020-07-06 18:58:26 -04:00
}
else
{
2021-07-28 17:00:37 -04:00
ShadowMapCacheData [ SmIndex ] . PrevVirtualShadowMapId = INDEX_NONE ;
2020-07-06 18:58:26 -04:00
}
}
2021-05-25 20:46:17 -04:00
CacheDataParameters . ShadowMapCacheData = GraphBuilder . CreateSRV ( CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.ShadowMapCacheData " ) , ShadowMapCacheData ) ) ;
2021-03-23 21:23:57 -04:00
CacheDataParameters . PrevPageFlags = GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PageFlags , TEXT ( " Shadow.Virtual.PrevPageFlags " ) ) ) ;
CacheDataParameters . PrevPageTable = GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PageTable , TEXT ( " Shadow.Virtual.PrevPageTable " ) ) ) ;
CacheDataParameters . PrevPhysicalPageMetaData = GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PhysicalPageMetaData , TEXT ( " Shadow.Virtual.PrevPhysicalPageMetaData " ) ) ) ;
CacheDataParameters . PrevDynamicCasterPageFlags = GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . DynamicCasterPageFlags , TEXT ( " Shadow.Virtual.PrevDynamicCasterPageFlags " ) ) ) ;
2022-02-02 02:18:54 -05:00
CacheDataParameters . PrevProjectionData = GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . ProjectionData , TEXT ( " Shadow.Virtual.PrevProjectionData " ) ) ) ;
2020-07-06 18:58:26 -04:00
}
2021-03-08 23:14:54 -04:00
static FRDGBufferRef CreateProjectionDataBuffer (
FRDGBuilder & GraphBuilder ,
const TCHAR * Name ,
const TArray < FVirtualShadowMapProjectionShaderData , SceneRenderingAllocator > & InitialData )
{
uint64 DataSize = InitialData . Num ( ) * InitialData . GetTypeSize ( ) ;
FRDGBufferDesc Desc ;
2022-05-06 15:44:23 -04:00
Desc . Usage = EBufferUsageFlags : : UnorderedAccess | EBufferUsageFlags : : ShaderResource | EBufferUsageFlags : : ByteAddressBuffer | EBufferUsageFlags : : StructuredBuffer ;
2021-03-08 23:14:54 -04:00
Desc . BytesPerElement = 4 ;
Desc . NumElements = DataSize / 4 ;
FRDGBufferRef Buffer = GraphBuilder . CreateBuffer ( Desc , Name ) ;
2021-05-25 20:46:17 -04:00
GraphBuilder . QueueBufferUpload ( Buffer , InitialData . GetData ( ) , DataSize ) ;
2021-03-08 23:14:54 -04:00
return Buffer ;
}
2021-03-23 21:23:57 -04:00
void FVirtualShadowMapArray : : Initialize ( FRDGBuilder & GraphBuilder , FVirtualShadowMapArrayCacheManager * InCacheManager , bool bInEnabled )
2021-01-12 14:40:19 -04:00
{
2021-03-23 21:23:57 -04:00
bInitialized = true ;
2021-01-22 04:59:02 -04:00
bEnabled = bInEnabled ;
2021-03-23 21:23:57 -04:00
CacheManager = InCacheManager ;
2021-03-08 23:14:54 -04:00
2021-09-01 13:13:01 -04:00
bCullBackfacingPixels = CVarCullBackfacingPixels . GetValueOnRenderThread ( ) ! = 0 ;
2022-03-23 15:54:41 -04:00
bUseHzbOcclusion = CVarShadowsVirtualUseHZB . GetValueOnRenderThread ( ) ! = 0 ;
bUseTwoPassHzbOcclusion = CVarShadowsVirtualUseHZB . GetValueOnRenderThread ( ) = = 2 ;
2021-09-01 13:13:01 -04:00
2021-03-08 23:14:54 -04:00
UniformParameters . NumShadowMaps = 0 ;
2021-03-15 22:06:28 -04:00
UniformParameters . NumDirectionalLights = 0 ;
2022-02-09 19:28:57 -05:00
UniformParameters . MaxPhysicalPages = 0 ;
2022-04-07 18:36:13 -04:00
UniformParameters . StaticCachedArrayIndex = 0 ;
2022-02-09 19:28:57 -05:00
// NOTE: Most uniform values don't matter when VSM is disabled
2021-12-03 16:23:04 -05:00
2021-03-08 23:14:54 -04:00
// Reference dummy data in the UB initially
2022-01-18 13:05:54 -05:00
const uint32 DummyPageTableElement = 0xFFFFFFFF ;
UniformParameters . PageTable = GraphBuilder . CreateSRV ( GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , sizeof ( DummyPageTableElement ) , DummyPageTableElement ) ) ;
2021-05-05 13:20:09 -04:00
UniformParameters . ProjectionData = GraphBuilder . CreateSRV ( GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , sizeof ( FVirtualShadowMapProjectionShaderData ) ) ) ;
2022-01-18 13:05:54 -05:00
UniformParameters . PageFlags = GraphBuilder . CreateSRV ( GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , sizeof ( uint32 ) ) ) ;
UniformParameters . PageRectBounds = GraphBuilder . CreateSRV ( GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , sizeof ( FIntVector4 ) ) ) ;
2021-03-08 23:14:54 -04:00
2021-07-28 17:00:37 -04:00
if ( bEnabled )
{
2022-02-09 19:28:57 -05:00
// Fixed physical page pool width, we adjust the height to accomodate the requested maximum
// NOTE: Row size in pages has to be POT since we use mask & shift in place of integer ops
// NOTE: This assumes GetMax2DTextureDimension() is a power of two on supported platforms
const uint32 PhysicalPagesX = FMath : : DivideAndRoundDown ( GetMax2DTextureDimension ( ) , FVirtualShadowMap : : PageSize ) ;
check ( FMath : : IsPowerOfTwo ( PhysicalPagesX ) ) ;
uint32 PhysicalPagesY = FMath : : DivideAndRoundUp ( ( uint32 ) FMath : : Max ( 1 , CVarMaxPhysicalPages . GetValueOnRenderThread ( ) ) , PhysicalPagesX ) ;
UniformParameters . MaxPhysicalPages = PhysicalPagesX * PhysicalPagesY ;
2022-04-07 18:36:13 -04:00
if ( CVarCacheStaticSeparate . GetValueOnRenderThread ( ) ! = 0 )
2022-02-09 19:28:57 -05:00
{
2022-04-07 18:36:13 -04:00
# if !DEBUG_ALLOW_STATIC_SEPARATE_WITHOUT_CACHING
if ( CacheManager - > IsValid ( ) )
# endif
{
// Enable separate static caching in the second texture array element
UniformParameters . StaticCachedArrayIndex = 1 ;
}
2022-02-09 19:28:57 -05:00
}
uint32 PhysicalX = PhysicalPagesX * FVirtualShadowMap : : PageSize ;
uint32 PhysicalY = PhysicalPagesY * FVirtualShadowMap : : PageSize ;
// TODO: Some sort of better fallback with warning?
// All supported platforms support at least 16384 texture dimensions which translates to 16384 max pages with default 128x128 page size
check ( PhysicalX < = GetMax2DTextureDimension ( ) ) ;
check ( PhysicalY < = GetMax2DTextureDimension ( ) ) ;
UniformParameters . PhysicalPageRowMask = ( PhysicalPagesX - 1 ) ;
UniformParameters . PhysicalPageRowShift = FMath : : FloorLog2 ( PhysicalPagesX ) ;
UniformParameters . RecPhysicalPoolSize = FVector4f ( 1.0f / PhysicalX , 1.0f / PhysicalY , 1.0f , 1.0f ) ;
UniformParameters . PhysicalPoolSize = FIntPoint ( PhysicalX , PhysicalY ) ;
UniformParameters . PhysicalPoolSizePages = FIntPoint ( PhysicalPagesX , PhysicalPagesY ) ;
// TODO: Parameterize this in a useful way; potentially modify it automatically
// when there are fewer lights in the scene and/or clustered shading settings differ.
UniformParameters . PackedShadowMaskMaxLightCount = FMath : : Min ( CVarVirtualShadowOnePassProjectionMaxLights . GetValueOnRenderThread ( ) , 32 ) ;
2021-07-28 17:00:37 -04:00
// If enabled, ensure we have a properly-sized physical page pool
// We can do this here since the pool is independent of the number of shadow maps
2022-04-07 18:36:13 -04:00
const int PoolArraySize = ShouldCacheStaticSeparately ( ) ? 2 : 1 ;
TRefCountPtr < IPooledRenderTarget > PhysicalPagePool = CacheManager - > SetPhysicalPoolSize ( GraphBuilder , GetPhysicalPoolSize ( ) , PoolArraySize ) ;
2021-07-28 17:00:37 -04:00
PhysicalPagePoolRDG = GraphBuilder . RegisterExternalTexture ( PhysicalPagePool ) ;
UniformParameters . PhysicalPagePool = PhysicalPagePoolRDG ;
2022-03-23 15:54:41 -04:00
}
2021-07-28 17:00:37 -04:00
else
{
CacheManager - > FreePhysicalPool ( ) ;
2022-04-07 18:36:13 -04:00
UniformParameters . PhysicalPagePool = GSystemTextures . GetZeroUIntArrayDummy ( GraphBuilder ) ;
}
2022-03-23 15:54:41 -04:00
if ( bEnabled & & bUseHzbOcclusion )
{
2022-04-07 18:36:13 -04:00
TRefCountPtr < IPooledRenderTarget > HzbPhysicalPagePool = CacheManager - > SetHZBPhysicalPoolSize ( GraphBuilder , GetHZBPhysicalPoolSize ( ) , PF_R32_FLOAT ) ;
2022-03-23 15:54:41 -04:00
HZBPhysical = GraphBuilder . RegisterExternalTexture ( HzbPhysicalPagePool ) ;
}
else
{
CacheManager - > FreeHZBPhysicalPool ( ) ;
HZBPhysical = GSystemTextures . GetZeroUIntDummy ( GraphBuilder ) ;
}
2021-03-08 23:14:54 -04:00
}
2021-03-23 21:23:57 -04:00
FVirtualShadowMapArray : : ~ FVirtualShadowMapArray ( )
{
for ( FVirtualShadowMap * SM : ShadowMaps )
{
SM - > ~ FVirtualShadowMap ( ) ;
}
}
2021-12-03 16:23:04 -05:00
EPixelFormat FVirtualShadowMapArray : : GetPackedShadowMaskFormat ( ) const
{
// TODO: Check if we're after any point that determines the format later too (light setup)
check ( bInitialized ) ;
// NOTE: Currently 4bpp/light
if ( UniformParameters . PackedShadowMaskMaxLightCount < = 8 )
{
return PF_R32_UINT ;
}
else if ( UniformParameters . PackedShadowMaskMaxLightCount < = 16 )
{
return PF_R32G32_UINT ;
}
else
{
check ( UniformParameters . PackedShadowMaskMaxLightCount < = 32 ) ;
return PF_R32G32B32A32_UINT ;
}
}
2021-03-23 21:23:57 -04:00
FIntPoint FVirtualShadowMapArray : : GetPhysicalPoolSize ( ) const
{
check ( bInitialized ) ;
return FIntPoint ( UniformParameters . PhysicalPoolSize . X , UniformParameters . PhysicalPoolSize . Y ) ;
}
2022-03-23 15:54:41 -04:00
FIntPoint FVirtualShadowMapArray : : GetHZBPhysicalPoolSize ( ) const
{
check ( bInitialized ) ;
FIntPoint PhysicalPoolSize = GetPhysicalPoolSize ( ) ;
FIntPoint HZBSize ( FMath : : Max ( FPlatformMath : : RoundUpToPowerOfTwo ( PhysicalPoolSize . X ) > > 1 , 1u ) ,
FMath : : Max ( FPlatformMath : : RoundUpToPowerOfTwo ( PhysicalPoolSize . Y ) > > 1 , 1u ) ) ;
return HZBSize ;
}
2021-12-03 01:39:26 -05:00
uint32 FVirtualShadowMapArray : : GetTotalAllocatedPhysicalPages ( ) const
{
check ( bInitialized ) ;
2021-12-03 10:05:20 -05:00
return ShouldCacheStaticSeparately ( ) ? ( 2U * UniformParameters . MaxPhysicalPages ) : UniformParameters . MaxPhysicalPages ;
2021-12-03 01:39:26 -05:00
}
2021-03-08 23:14:54 -04:00
TRDGUniformBufferRef < FVirtualShadowMapUniformParameters > FVirtualShadowMapArray : : GetUniformBuffer ( FRDGBuilder & GraphBuilder ) const
{
// NOTE: Need to allocate new parameter space since the UB changes over the frame as dummy references are replaced
// TODO: Should we be caching this once all the relevant updates to parameters have been made in a frame?
FVirtualShadowMapUniformParameters * VersionedParameters = GraphBuilder . AllocParameters < FVirtualShadowMapUniformParameters > ( ) ;
* VersionedParameters = UniformParameters ;
return GraphBuilder . CreateUniformBuffer ( VersionedParameters ) ;
2021-01-12 14:40:19 -04:00
}
2020-07-06 18:58:26 -04:00
void FVirtualShadowMapArray : : SetShaderDefines ( FShaderCompilerEnvironment & OutEnvironment )
{
2022-02-02 05:33:52 -05:00
static_assert ( FVirtualShadowMap : : Log2Level0DimPagesXY * 2U + NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS_BITS < = 32U , " Page indirection plus view index must fit into 32-bits for page-routing storage! " ) ;
2021-02-25 05:03:27 -04:00
OutEnvironment . SetDefine ( TEXT ( " ENABLE_NON_NANITE_VSM " ) , GEnableNonNaniteVSM ) ;
2020-07-06 18:58:26 -04:00
OutEnvironment . SetDefine ( TEXT ( " VSM_PAGE_SIZE " ) , FVirtualShadowMap : : PageSize ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_PAGE_SIZE_MASK " ) , FVirtualShadowMap : : PageSizeMask ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_LOG2_PAGE_SIZE " ) , FVirtualShadowMap : : Log2PageSize ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_LEVEL0_DIM_PAGES_XY " ) , FVirtualShadowMap : : Level0DimPagesXY ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_LOG2_LEVEL0_DIM_PAGES_XY " ) , FVirtualShadowMap : : Log2Level0DimPagesXY ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_MAX_MIP_LEVELS " ) , FVirtualShadowMap : : MaxMipLevels ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_VIRTUAL_MAX_RESOLUTION_XY " ) , FVirtualShadowMap : : VirtualMaxResolutionXY ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_RASTER_WINDOW_PAGES " ) , FVirtualShadowMap : : RasterWindowPages ) ;
2021-01-19 10:17:05 -04:00
OutEnvironment . SetDefine ( TEXT ( " VSM_PAGE_TABLE_SIZE " ) , FVirtualShadowMap : : PageTableSize ) ;
2021-12-03 16:08:27 -05:00
OutEnvironment . SetDefine ( TEXT ( " VSM_NUM_STATS " ) , NumStats ) ;
2020-07-06 18:58:26 -04:00
OutEnvironment . SetDefine ( TEXT ( " INDEX_NONE " ) , INDEX_NONE ) ;
}
2021-03-08 23:14:54 -04:00
FVirtualShadowMapSamplingParameters FVirtualShadowMapArray : : GetSamplingParameters ( FRDGBuilder & GraphBuilder ) const
{
// Sanity check: either VSMs are disabled and it's expected to be relying on dummy data, or we should have valid data
// If this fires, it is likely because the caller is trying to sample VSMs before they have been rendered by the ShadowDepths pass
// This should not crash, but it is not an intended production path as it will not return valid shadow data.
// TODO: Disabled warning until SkyAtmosphereLUT is moved after ShadowDepths
//ensureMsgf(!IsEnabled() || IsAllocated(),
// TEXT("Attempt to use Virtual Shadow Maps before they have been rendered by ShadowDepths."));
FVirtualShadowMapSamplingParameters Parameters ;
Parameters . VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
return Parameters ;
}
2020-07-06 18:58:26 -04:00
class FVirtualPageManagementShader : public FGlobalShader
{
public :
// Kernel launch group sizes
static constexpr uint32 DefaultCSGroupXY = 8 ;
static constexpr uint32 DefaultCSGroupX = 256 ;
static constexpr uint32 GeneratePageFlagsGroupXYZ = 4 ;
static constexpr uint32 BuildExplicitBoundsGroupXY = 16 ;
2020-08-19 14:22:37 -04:00
FVirtualPageManagementShader ( )
{
}
2020-07-06 18:58:26 -04:00
FVirtualPageManagementShader ( const ShaderMetaType : : CompiledShaderInitializerType & Initializer )
2020-08-19 14:22:37 -04:00
: FGlobalShader ( Initializer )
{
}
2020-07-06 18:58:26 -04:00
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
2021-12-03 03:01:35 -05:00
return IsFeatureLevelSupported ( Parameters . Platform , ERHIFeatureLevel : : SM5 ) & &
DoesPlatformSupportNanite ( Parameters . Platform ) ;
2020-07-06 18:58:26 -04:00
}
/**
* Can be overridden by FVertexFactory subclasses to modify their compile environment just before compilation occurs .
*/
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FGlobalShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
FVirtualShadowMapArray : : SetShaderDefines ( OutEnvironment ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_DEFAULT_CS_GROUP_X " ) , DefaultCSGroupX ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_DEFAULT_CS_GROUP_XY " ) , DefaultCSGroupXY ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_GENERATE_PAGE_FLAGS_CS_GROUP_XYZ " ) , GeneratePageFlagsGroupXYZ ) ;
OutEnvironment . SetDefine ( TEXT ( " VSM_BUILD_EXPLICIT_BOUNDS_CS_XY " ) , BuildExplicitBoundsGroupXY ) ;
FForwardLightingParameters : : ModifyCompilationEnvironment ( Parameters . Platform , OutEnvironment ) ;
OutEnvironment . SetDefine ( TEXT ( " VF_SUPPORTS_PRIMITIVE_SCENE_DATA " ) , 1 ) ;
}
} ;
class FGeneratePageFlagsFromPixelsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FGeneratePageFlagsFromPixelsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FGeneratePageFlagsFromPixelsCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2021-05-03 05:07:00 -04:00
class FInputType : SHADER_PERMUTATION_INT ( " PERMUTATION_INPUT_TYPE " , 3 ) ;
using FPermutationDomain = TShaderPermutationDomain < FInputType > ;
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2020-09-24 00:43:27 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FSceneTextureUniformParameters , SceneTexturesStruct )
2021-03-05 05:14:13 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FHairStrandsViewUniformParameters , HairStrands )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_STRUCT_REF ( FViewUniformShaderParameters , View )
2022-01-11 15:57:18 -05:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FForwardLightData , ForwardLightData )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D < uint2 > , VisBuffer64 )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageRequestFlags )
2021-07-14 13:45:31 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , DirectionalLightIds )
2022-02-10 09:53:46 -05:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FStrataGlobalUniformParameters , Strata )
2021-05-03 05:07:00 -04:00
RDG_BUFFER_ACCESS ( IndirectBufferArgs , ERHIAccess : : IndirectArgs )
2021-03-05 05:14:13 -04:00
SHADER_PARAMETER ( uint32 , InputType )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER ( uint32 , NumDirectionalLightSmInds )
SHADER_PARAMETER ( uint32 , bPostBasePass )
2021-04-13 16:54:28 -04:00
SHADER_PARAMETER ( float , ResolutionLodBiasLocal )
2021-12-03 16:23:04 -05:00
SHADER_PARAMETER ( float , PageDilationBorderSizeDirectional )
SHADER_PARAMETER ( float , PageDilationBorderSizeLocal )
2021-09-01 13:13:01 -04:00
SHADER_PARAMETER ( uint32 , bCullBackfacingPixels )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FGeneratePageFlagsFromPixelsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " GeneratePageFlagsFromPixels " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
2021-02-18 21:09:05 -04:00
class FMarkCoarsePagesCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FMarkCoarsePagesCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FMarkCoarsePagesCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-02-18 21:09:05 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageRequestFlags )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER ( uint32 , bMarkCoarsePagesLocal )
2021-08-07 15:19:34 -04:00
SHADER_PARAMETER ( uint32 , bIncludeNonNaniteGeometry )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER ( uint32 , ClipmapIndexMask )
2021-02-18 21:09:05 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FMarkCoarsePagesCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " MarkCoarsePages " , SF_Compute ) ;
2021-02-18 21:09:05 -04:00
2020-07-06 18:58:26 -04:00
class FGenerateHierarchicalPageFlagsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FGenerateHierarchicalPageFlagsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FGenerateHierarchicalPageFlagsCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2022-01-18 13:05:54 -05:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageFlags )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FIntVector4 > , OutPageRectBounds )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FGenerateHierarchicalPageFlagsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " GenerateHierarchicalPageFlags " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
class FInitPhysicalPageMetaData : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FInitPhysicalPageMetaData ) ;
SHADER_USE_PARAMETER_STRUCT ( FInitPhysicalPageMetaData , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-07-28 17:00:37 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FPhysicalPageMetaData > , OutPhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutFreePhysicalPages )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FInitPhysicalPageMetaData , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " InitPhysicalPageMetaData " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
2021-07-28 17:00:37 -04:00
class FCreateCachedPageMappingsCS : public FVirtualPageManagementShader
2020-07-06 18:58:26 -04:00
{
2021-07-28 17:00:37 -04:00
DECLARE_GLOBAL_SHADER ( FCreateCachedPageMappingsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FCreateCachedPageMappingsCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
class FHasCacheDataDim : SHADER_PERMUTATION_BOOL ( " HAS_CACHE_DATA " ) ;
2021-07-28 17:00:37 -04:00
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
2020-07-06 18:58:26 -04:00
using FPermutationDomain = TShaderPermutationDomain < FHasCacheDataDim , FGenerateStatsDim > ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-07-28 17:00:37 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_STRUCT_INCLUDE ( FCacheDataParameters , CacheDataParameters )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PageRequestFlags )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageFlags )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageTable )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FPhysicalPageMetaData > , OutPhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
2021-12-02 23:08:02 -05:00
SHADER_PARAMETER ( int32 , bDynamicPageInvalidation )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FCreateCachedPageMappingsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " CreateCachedPageMappings " , SF_Compute ) ;
2021-07-28 17:00:37 -04:00
class FPackFreePagesCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FPackFreePagesCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FPackFreePagesCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPhysicalPageMetaData > , PhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutFreePhysicalPages )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FPackFreePagesCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " PackFreePages " , SF_Compute ) ;
2021-07-28 17:00:37 -04:00
class FAllocateNewPageMappingsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FAllocateNewPageMappingsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FAllocateNewPageMappingsCS , FVirtualPageManagementShader )
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
using FPermutationDomain = TShaderPermutationDomain < FGenerateStatsDim > ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PageRequestFlags )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutFreePhysicalPages )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageFlags )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageTable )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FPhysicalPageMetaData > , OutPhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FAllocateNewPageMappingsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " AllocateNewPageMappings " , SF_Compute ) ;
2021-07-28 17:00:37 -04:00
2021-01-22 23:23:16 -04:00
class FPropagateMappedMipsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FPropagateMappedMipsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FPropagateMappedMipsCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-07-28 17:00:37 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPageTable )
2021-01-22 23:23:16 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FPropagateMappedMipsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " PropagateMappedMips " , SF_Compute ) ;
2021-01-22 23:23:16 -04:00
2021-12-02 23:08:02 -05:00
class FInitializePhysicalPagesCS : public FVirtualPageManagementShader
2020-07-06 18:58:26 -04:00
{
2021-12-02 23:08:02 -05:00
DECLARE_GLOBAL_SHADER ( FInitializePhysicalPagesCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FInitializePhysicalPagesCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-12-02 23:08:02 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPageMetaData )
2021-07-28 17:00:37 -04:00
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D < uint > , OutPhysicalPagePool )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FInitializePhysicalPagesCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " InitializePhysicalPages " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
2021-12-02 23:09:13 -05:00
class FSelectPagesToInitializeCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FSelectPagesToInitializeCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FSelectPagesToInitializeCS , FVirtualPageManagementShader )
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
using FPermutationDomain = TShaderPermutationDomain < FGenerateStatsDim > ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPhysicalPageMetaData > , PhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , OutInitializePagesIndirectArgsBuffer )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPhysicalPagesToInitialize )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FSelectPagesToInitializeCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " SelectPagesToInitializeCS " , SF_Compute ) ;
2021-12-02 23:09:13 -05:00
class FInitializePhysicalPagesIndirectCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FInitializePhysicalPagesIndirectCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FInitializePhysicalPagesIndirectCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPagesToInitialize )
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D < uint > , OutPhysicalPagePool )
RDG_BUFFER_ACCESS ( IndirectArgs , ERHIAccess : : IndirectArgs )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FInitializePhysicalPagesIndirectCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " InitializePhysicalPagesIndirectCS " , SF_Compute ) ;
2021-12-02 23:09:13 -05:00
class FClearIndirectDispatchArgs1DCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FClearIndirectDispatchArgs1DCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FClearIndirectDispatchArgs1DCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-12-03 01:31:24 -05:00
SHADER_PARAMETER ( uint32 , NumIndirectArgs )
SHADER_PARAMETER ( uint32 , IndirectArgStride )
2021-12-02 23:09:13 -05:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , OutIndirectArgsBuffer )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FClearIndirectDispatchArgs1DCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " ClearIndirectDispatchArgs1DCS " , SF_Compute ) ;
2021-12-02 23:09:13 -05:00
2021-12-03 01:31:24 -05:00
static void AddClearIndirectDispatchArgs1DPass ( FRDGBuilder & GraphBuilder , FRDGBufferRef IndirectArgsRDG , uint32 NumIndirectArgs = 1U , uint32 IndirectArgStride = 4U )
2021-12-02 23:09:13 -05:00
{
FClearIndirectDispatchArgs1DCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FClearIndirectDispatchArgs1DCS : : FParameters > ( ) ;
2021-12-03 01:31:24 -05:00
PassParameters - > NumIndirectArgs = NumIndirectArgs ;
PassParameters - > IndirectArgStride = IndirectArgStride ;
2021-12-02 23:09:13 -05:00
PassParameters - > OutIndirectArgsBuffer = GraphBuilder . CreateUAV ( IndirectArgsRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FClearIndirectDispatchArgs1DCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " ClearIndirectDispatchArgs " ) ,
ComputeShader ,
PassParameters ,
2021-12-03 01:31:24 -05:00
FComputeShaderUtils : : GetGroupCount ( NumIndirectArgs , 64 )
2021-12-02 23:09:13 -05:00
) ;
}
2021-12-02 23:08:02 -05:00
class FMergeStaticPhysicalPagesCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FMergeStaticPhysicalPagesCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FMergeStaticPhysicalPagesCS , FVirtualPageManagementShader )
2020-07-06 18:58:26 -04:00
2021-12-02 23:08:02 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPageMetaData )
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D < uint > , OutPhysicalPagePool )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FMergeStaticPhysicalPagesCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " MergeStaticPhysicalPages " , SF_Compute ) ;
2021-12-02 23:08:02 -05:00
2021-12-02 23:09:13 -05:00
class FSelectPagesToMergeCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FSelectPagesToMergeCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FSelectPagesToMergeCS , FVirtualPageManagementShader )
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
using FPermutationDomain = TShaderPermutationDomain < FGenerateStatsDim > ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPhysicalPageMetaData > , PhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , OutMergePagesIndirectArgsBuffer )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPhysicalPagesToMerge )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FSelectPagesToMergeCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " SelectPagesToMergeCS " , SF_Compute ) ;
2021-12-02 23:09:13 -05:00
class FMergeStaticPhysicalPagesIndirectCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FMergeStaticPhysicalPagesIndirectCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FMergeStaticPhysicalPagesIndirectCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPagesToMerge )
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D < uint > , OutPhysicalPagePool )
RDG_BUFFER_ACCESS ( IndirectArgs , ERHIAccess : : IndirectArgs )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FMergeStaticPhysicalPagesIndirectCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " MergeStaticPhysicalPagesIndirectCS " , SF_Compute ) ;
2021-12-02 23:09:13 -05:00
2021-12-02 23:08:02 -05:00
void FVirtualShadowMapArray : : MergeStaticPhysicalPages ( FRDGBuilder & GraphBuilder )
2020-07-06 18:58:26 -04:00
{
2021-01-12 14:40:19 -04:00
check ( IsEnabled ( ) ) ;
2021-12-02 23:08:02 -05:00
if ( ShadowMaps . Num ( ) = = 0 | | ! ShouldCacheStaticSeparately ( ) )
2021-01-12 14:40:19 -04:00
{
2020-07-06 18:58:26 -04:00
return ;
2021-01-12 14:40:19 -04:00
}
2020-07-06 18:58:26 -04:00
2021-12-02 23:12:09 -05:00
# if !UE_BUILD_SHIPPING
if ( CVarDebugSkipMergePhysical . GetValueOnRenderThread ( ) ! = 0 )
{
return ;
}
# endif
2021-12-02 23:08:02 -05:00
RDG_EVENT_SCOPE ( GraphBuilder , " FVirtualShadowMapArray::MergeStaticPhysicalPages " ) ;
2021-12-02 23:09:13 -05:00
if ( CVarMergePhysicalUsingIndirect . GetValueOnRenderThread ( ) ! = 0 )
{
FRDGBufferRef MergePagesIndirectArgsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateIndirectDesc ( 3 ) , TEXT ( " Shadow.Virtual.MergePagesIndirectArgs " ) ) ;
2021-12-03 01:39:26 -05:00
// Note: We use GetTotalAllocatedPhysicalPages() to size the buffer as the selection shader emits both static/dynamic pages separately when enabled.
FRDGBufferRef PhysicalPagesToMergeRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( int32 ) , GetTotalAllocatedPhysicalPages ( ) + 1 ) , TEXT ( " Shadow.Virtual.PhysicalPagesToMerge " ) ) ;
2021-12-02 23:09:13 -05:00
// 1. Initialize the indirect args buffer
AddClearIndirectDispatchArgs1DPass ( GraphBuilder , MergePagesIndirectArgsRDG ) ;
// 2. Filter the relevant physical pages and set up the indirect args
{
FSelectPagesToMergeCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSelectPagesToMergeCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutMergePagesIndirectArgsBuffer = GraphBuilder . CreateUAV ( MergePagesIndirectArgsRDG ) ;
PassParameters - > OutPhysicalPagesToMerge = GraphBuilder . CreateUAV ( PhysicalPagesToMergeRDG ) ;
2022-03-23 15:54:41 -04:00
2021-12-02 23:09:13 -05:00
FSelectPagesToMergeCS : : FPermutationDomain PermutationVector ;
2022-03-23 15:54:41 -04:00
SetStatsArgsAndPermutation < FSelectPagesToMergeCS > ( GraphBuilder , StatsBufferRDG , PassParameters , PermutationVector ) ;
2021-12-02 23:09:13 -05:00
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FSelectPagesToMergeCS > ( PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SelectPagesToMerge " ) ,
ComputeShader ,
PassParameters ,
2021-12-03 01:39:26 -05:00
FIntVector ( FMath : : DivideAndRoundUp ( GetMaxPhysicalPages ( ) , FSelectPagesToMergeCS : : DefaultCSGroupX ) , 1 , 1 )
2021-12-02 23:09:13 -05:00
) ;
}
// 3. Indirect dispatch to clear the selected pages
{
FMergeStaticPhysicalPagesIndirectCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FMergeStaticPhysicalPagesIndirectCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPhysicalPagePool = GraphBuilder . CreateUAV ( PhysicalPagePoolRDG ) ;
PassParameters - > IndirectArgs = MergePagesIndirectArgsRDG ;
PassParameters - > PhysicalPagesToMerge = GraphBuilder . CreateSRV ( PhysicalPagesToMergeRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FMergeStaticPhysicalPagesIndirectCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " MergeStaticPhysicalPagesIndirect " ) ,
ComputeShader ,
PassParameters ,
PassParameters - > IndirectArgs ,
0
) ;
}
}
else
2020-07-06 18:58:26 -04:00
{
2021-12-02 23:08:02 -05:00
FMergeStaticPhysicalPagesCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FMergeStaticPhysicalPagesCS : : FParameters > ( ) ;
2021-03-08 23:14:54 -04:00
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
2021-12-02 23:08:02 -05:00
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutPhysicalPagePool = GraphBuilder . CreateUAV ( PhysicalPagePoolRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FMergeStaticPhysicalPagesCS > ( ) ;
// Shader contains logic to deal with static cached pages if enabled
// We only need to launch one per page, even if there are multiple cached pages per page
2022-04-07 18:36:13 -04:00
FIntPoint PoolSize = GetPhysicalPoolSize ( ) ;
2020-07-06 18:58:26 -04:00
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2021-12-02 23:08:02 -05:00
RDG_EVENT_NAME ( " MergeStaticPhysicalPages " ) ,
2020-07-06 18:58:26 -04:00
ComputeShader ,
PassParameters ,
2021-07-28 17:00:37 -04:00
FIntVector (
2022-04-07 18:36:13 -04:00
FMath : : DivideAndRoundUp ( PoolSize . X , 16 ) ,
FMath : : DivideAndRoundUp ( PoolSize . Y , 16 ) ,
2021-07-28 17:00:37 -04:00
1 )
2020-07-06 18:58:26 -04:00
) ;
}
}
/**
* Helper to get hold of / check for associated virtual shadow map
*/
FORCEINLINE FProjectedShadowInfo * GetVirtualShadowMapInfo ( const FVisibleLightInfo & LightInfo )
{
for ( FProjectedShadowInfo * ProjectedShadowInfo : LightInfo . AllProjectedShadows )
{
2021-01-18 22:44:41 -04:00
if ( ProjectedShadowInfo - > HasVirtualShadowMap ( ) )
2020-07-06 18:58:26 -04:00
{
return ProjectedShadowInfo ;
}
}
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
return nullptr ;
}
class FInitPageRectBoundsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FInitPageRectBoundsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FInitPageRectBoundsCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2022-01-18 13:05:54 -05:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FIntVector4 > , OutPageRectBounds )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FInitPageRectBoundsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " InitPageRectBounds " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
2021-12-03 16:08:27 -05:00
class FVirtualSmFeedbackStatusCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FVirtualSmFeedbackStatusCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FVirtualSmFeedbackStatusCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , FreePhysicalPages )
SHADER_PARAMETER_STRUCT_INCLUDE ( GPUMessage : : FParameters , GPUMessageParams )
SHADER_PARAMETER ( uint32 , StatusMessageId )
END_SHADER_PARAMETER_STRUCT ( )
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FVirtualPageManagementShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
}
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FVirtualSmFeedbackStatusCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " FeedbackStatusCS " , SF_Compute ) ;
2021-12-03 16:08:27 -05:00
2022-01-27 15:49:31 -05:00
void FVirtualShadowMapVisualizeLightSearch : : CheckLight ( const FLightSceneProxy * CheckProxy , int CheckVirtualShadowMapId )
2021-05-20 19:32:20 -04:00
{
# if !UE_BUILD_SHIPPING
2022-01-27 15:49:31 -05:00
FString CheckLightName = CheckProxy - > GetOwnerNameOrLabel ( ) ;
if ( GDumpVSMLightNames )
2021-05-20 19:32:20 -04:00
{
2022-01-27 15:49:31 -05:00
UE_LOG ( LogRenderer , Display , TEXT ( " %s " ) , * CheckLightName ) ;
2021-05-20 19:32:20 -04:00
}
2022-01-27 15:49:31 -05:00
const ULightComponent * Component = CheckProxy - > GetLightComponent ( ) ;
check ( Component ) ;
// Fill out new sort key and compare to our best found so far
SortKey CheckKey ;
CheckKey . Packed = 0 ;
CheckKey . Fields . bExactNameMatch = ( CheckLightName = = GVirtualShadowMapVisualizeLightName ) ;
CheckKey . Fields . bPartialNameMatch = CheckKey . Fields . bExactNameMatch | | CheckLightName . Contains ( GVirtualShadowMapVisualizeLightName ) ;
CheckKey . Fields . bSelected = Component - > IsSelected ( ) ;
CheckKey . Fields . bOwnerSelected = Component - > IsOwnerSelected ( ) ;
CheckKey . Fields . bDirectionalLight = CheckProxy - > GetLightType ( ) = = LightType_Directional ;
CheckKey . Fields . bExists = 1 ;
2022-01-31 16:46:48 -05:00
if ( CheckKey . Packed > FoundKey . Packed ) //-V547
2022-01-27 15:49:31 -05:00
{
FoundKey = CheckKey ;
FoundProxy = CheckProxy ;
FoundVirtualShadowMapId = CheckVirtualShadowMapId ;
}
# endif
}
2021-05-20 19:32:20 -04:00
2022-02-02 08:19:08 -05:00
static FRDGTextureRef CreateDebugVisualizationTexture ( FRDGBuilder & GraphBuilder , FIntPoint Extent )
2021-05-20 19:32:20 -04:00
{
2022-02-02 08:19:08 -05:00
const FLinearColor ClearColor ( 0.0f , 0.0f , 0.0f , 0.0f ) ;
2021-05-20 19:32:20 -04:00
FRDGTextureDesc Desc = FRDGTextureDesc : : Create2D (
Extent ,
PF_R8G8B8A8 ,
FClearValueBinding ( ClearColor ) ,
TexCreate_ShaderResource | TexCreate_UAV ) ;
FRDGTextureRef Texture = GraphBuilder . CreateTexture ( Desc , TEXT ( " Shadow.Virtual.DebugProjection " ) ) ;
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( Texture ) , ClearColor ) ;
return Texture ;
}
2022-04-12 21:16:42 -04:00
static uint32 GetShadowMapsToAllocate ( uint32 NumShadowMaps )
{
// Round up to powers of two to be friendlier to the buffer pool
return FMath : : RoundUpToPowerOfTwo ( FMath : : Max ( 64U , NumShadowMaps ) ) ;
}
2021-01-11 14:49:16 -04:00
void FVirtualShadowMapArray : : BuildPageAllocations (
FRDGBuilder & GraphBuilder ,
const FMinimalSceneTextures & SceneTextures ,
UE5_MAIN: Multi-view-family scene renderer refactor, part 1. Major structural change to allow scene renderer to accept multiple view families, with otherwise negligible changes in internal behavior.
* Added "BeginRenderingViewFamilies" render interface call that accepts multiple view families. Original "BeginRenderingViewFamily" falls through to this.
* FSceneRenderer modified to include an array of view families, plus an active view family and the Views for that family.
* Swap ViewFamily to ActiveViewFamily.
* Swap Views array from TArray<FViewInfo> to TArrayView<FViewInfo>, including where the Views array is passed to functions.
* FSceneRenderer iterates over the view families, rendering each one at a time, as separate render graph executions.
* Some frame setup and cleanup logic outside the render graph runs once.
* Moved stateful FSceneRenderer members to FViewFamilyInfo, to preserve existing one-at-a-time view family rendering behavior.
* Display Cluster (Virtual Production) uses new API.
Next step will push everything into one render graph, which requires handling per-family external resources and cleaning up singletons (like FSceneTextures and FSceneTexturesConfig). Once that's done, we'll be in a position to further interleave rendering, properly handle once per frame work, and solve artifacts in various systems.
#jira none
#rnx
#rb zach.bethel
#preflight 625df821b21bb49791d377c9
[CL 19813996 by jason hoerner in ue5-main branch]
2022-04-19 14:45:26 -04:00
const TArrayView < FViewInfo > & Views ,
2022-01-27 15:49:31 -05:00
const FEngineShowFlags & EngineShowFlags ,
2021-01-11 14:49:16 -04:00
const FSortedLightSetSceneInfo & SortedLightsInfo ,
const TArray < FVisibleLightInfo , SceneRenderingAllocator > & VisibleLightInfos ,
2021-09-02 07:13:18 -04:00
const TArray < Nanite : : FRasterResults , TInlineAllocator < 2 > > & NaniteRasterResults ,
FScene & Scene )
2020-07-06 18:58:26 -04:00
{
2021-01-12 14:40:19 -04:00
check ( IsEnabled ( ) ) ;
2022-01-27 15:49:31 -05:00
if ( ShadowMaps . Num ( ) = = 0 | | Views . Num ( ) = = 0 )
{
// Nothing to do
return ;
}
2021-03-08 23:14:54 -04:00
RDG_EVENT_SCOPE ( GraphBuilder , " FVirtualShadowMapArray::BuildPageAllocation " ) ;
2020-07-06 18:58:26 -04:00
2021-05-20 19:32:20 -04:00
bool bDebugOutputEnabled = false ;
2022-01-27 15:49:31 -05:00
VisualizeLight . Reset ( ) ;
# if !UE_BUILD_SHIPPING
2021-05-20 19:32:20 -04:00
if ( GDumpVSMLightNames )
{
bDebugOutputEnabled = true ;
UE_LOG ( LogRenderer , Display , TEXT ( " Lights with Virtual Shadow Maps: " ) ) ;
}
2022-01-27 15:49:31 -05:00
// Setup debug visualization/output if enabled
2021-05-20 19:32:20 -04:00
{
2022-01-27 15:49:31 -05:00
FVirtualShadowMapVisualizationData & VisualizationData = GetVirtualShadowMapVisualizationData ( ) ;
2022-04-12 23:44:41 -04:00
for ( const FViewInfo & View : Views )
2022-01-27 15:49:31 -05:00
{
2022-04-12 23:44:41 -04:00
const FName & VisualizationMode = View . CurrentVirtualShadowMapVisualizationMode ;
// for stereo views that aren't multi-view, don't account for the left
FIntPoint Extent = View . ViewRect . Max - View . ViewRect . Min ;
if ( VisualizationData . Update ( VisualizationMode ) )
{
// TODO - automatically enable the show flag when set from command line?
//EngineShowFlags.SetVisualizeVirtualShadowMap(true);
}
2022-01-27 15:49:31 -05:00
2022-04-12 23:44:41 -04:00
if ( VisualizationData . IsActive ( ) & & EngineShowFlags . VisualizeVirtualShadowMap )
{
bDebugOutputEnabled = true ;
DebugVisualizationOutput . Add ( CreateDebugVisualizationTexture ( GraphBuilder , Extent ) ) ;
}
2022-01-27 15:49:31 -05:00
}
2021-05-20 19:32:20 -04:00
}
# endif //!UE_BUILD_SHIPPING
2022-01-27 15:49:31 -05:00
// Store shadow map projection data for each virtual shadow map
2022-02-02 02:18:54 -05:00
TArray < FVirtualShadowMapProjectionShaderData , SceneRenderingAllocator > ProjectionData ;
ProjectionData . AddDefaulted ( ShadowMaps . Num ( ) ) ;
2021-05-20 19:32:20 -04:00
2022-01-27 15:49:31 -05:00
// Gather directional light virtual shadow maps
TArray < int32 , SceneRenderingAllocator > DirectionalLightIds ;
for ( const FVisibleLightInfo & VisibleLightInfo : VisibleLightInfos )
2020-07-06 18:58:26 -04:00
{
2022-01-27 15:49:31 -05:00
for ( const TSharedPtr < FVirtualShadowMapClipmap > & Clipmap : VisibleLightInfo . VirtualShadowMapClipmaps )
2020-08-19 12:54:43 -04:00
{
2022-01-27 15:49:31 -05:00
// NOTE: Shader assumes all levels from a given clipmap are contiguous
int32 ClipmapID = Clipmap - > GetVirtualShadowMap ( ) - > ID ;
DirectionalLightIds . Add ( ClipmapID ) ;
for ( int32 ClipmapLevel = 0 ; ClipmapLevel < Clipmap - > GetLevelCount ( ) ; + + ClipmapLevel )
2020-08-19 12:54:43 -04:00
{
2022-02-02 02:18:54 -05:00
ProjectionData [ ClipmapID + ClipmapLevel ] = Clipmap - > GetProjectionShaderData ( ClipmapLevel ) ;
2022-01-27 15:49:31 -05:00
}
if ( bDebugOutputEnabled )
{
VisualizeLight . CheckLight ( Clipmap - > GetLightSceneInfo ( ) . Proxy , ClipmapID ) ;
}
}
for ( FProjectedShadowInfo * ProjectedShadowInfo : VisibleLightInfo . AllProjectedShadows )
{
if ( ProjectedShadowInfo - > HasVirtualShadowMap ( ) )
{
check ( ProjectedShadowInfo - > CascadeSettings . ShadowSplitIndex = = INDEX_NONE ) ; // We use clipmaps for virtual shadow maps, not cascades
// NOTE: Virtual shadow maps are never atlased, but verify our assumptions
2020-08-19 12:54:43 -04:00
{
2022-01-27 15:49:31 -05:00
const FVector4f ClipToShadowUV = ProjectedShadowInfo - > GetClipToShadowBufferUvScaleBias ( ) ;
check ( ProjectedShadowInfo - > BorderSize = = 0 ) ;
check ( ProjectedShadowInfo - > X = = 0 ) ;
check ( ProjectedShadowInfo - > Y = = 0 ) ;
const FIntRect ShadowViewRect = ProjectedShadowInfo - > GetInnerViewRect ( ) ;
check ( ShadowViewRect . Min . X = = 0 ) ;
check ( ShadowViewRect . Min . Y = = 0 ) ;
check ( ShadowViewRect . Max . X = = FVirtualShadowMap : : VirtualMaxResolutionXY ) ;
check ( ShadowViewRect . Max . Y = = FVirtualShadowMap : : VirtualMaxResolutionXY ) ;
}
int32 NumMaps = ProjectedShadowInfo - > bOnePassPointLightShadow ? 6 : 1 ;
for ( int32 i = 0 ; i < NumMaps ; i + + )
{
int32 ID = ProjectedShadowInfo - > VirtualShadowMaps [ i ] - > ID ;
2022-02-11 14:57:27 -05:00
const FViewMatrices ViewMatrices = ProjectedShadowInfo - > GetShadowDepthRenderingViewMatrices ( i , true ) ;
const FLargeWorldRenderPosition PreViewTranslation ( ProjectedShadowInfo - > PreShadowTranslation ) ;
2022-01-27 15:49:31 -05:00
2022-02-02 02:18:54 -05:00
FVirtualShadowMapProjectionShaderData & Data = ProjectionData [ ID ] ;
2022-01-27 15:49:31 -05:00
Data . TranslatedWorldToShadowViewMatrix = FMatrix44f ( ViewMatrices . GetTranslatedViewMatrix ( ) ) ; // LWC_TODO: Precision loss?
Data . ShadowViewToClipMatrix = FMatrix44f ( ViewMatrices . GetProjectionMatrix ( ) ) ;
Data . TranslatedWorldToShadowUVMatrix = FMatrix44f ( CalcTranslatedWorldToShadowUVMatrix ( ViewMatrices . GetTranslatedViewMatrix ( ) , ViewMatrices . GetProjectionMatrix ( ) ) ) ;
Data . TranslatedWorldToShadowUVNormalMatrix = FMatrix44f ( CalcTranslatedWorldToShadowUVNormalMatrix ( ViewMatrices . GetTranslatedViewMatrix ( ) , ViewMatrices . GetProjectionMatrix ( ) ) ) ;
2022-02-11 14:57:27 -05:00
Data . PreViewTranslationLWCTile = PreViewTranslation . GetTile ( ) ;
Data . PreViewTranslationLWCOffset = PreViewTranslation . GetOffset ( ) ;
2022-01-27 15:49:31 -05:00
Data . LightType = ProjectedShadowInfo - > GetLightSceneInfo ( ) . Proxy - > GetLightType ( ) ;
Data . LightSourceRadius = ProjectedShadowInfo - > GetLightSceneInfo ( ) . Proxy - > GetSourceRadius ( ) ;
2020-08-19 12:54:43 -04:00
}
2021-05-20 19:32:20 -04:00
if ( bDebugOutputEnabled )
{
2022-01-27 15:49:31 -05:00
VisualizeLight . CheckLight ( ProjectedShadowInfo - > GetLightSceneInfo ( ) . Proxy , ProjectedShadowInfo - > VirtualShadowMaps [ 0 ] - > ID ) ;
2020-08-19 12:54:43 -04:00
}
}
}
2022-01-27 15:49:31 -05:00
}
2021-03-08 23:14:54 -04:00
2022-01-27 15:49:31 -05:00
UniformParameters . NumShadowMaps = ShadowMaps . Num ( ) ;
UniformParameters . NumDirectionalLights = DirectionalLightIds . Num ( ) ;
2021-03-15 22:06:28 -04:00
2022-02-02 02:18:54 -05:00
ProjectionDataRDG = CreateProjectionDataBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.ProjectionData " ) , ProjectionData ) ;
2021-05-05 13:20:09 -04:00
2022-02-02 02:18:54 -05:00
UniformParameters . ProjectionData = GraphBuilder . CreateSRV ( ProjectionDataRDG ) ;
2021-03-08 23:14:54 -04:00
2022-01-27 15:49:31 -05:00
if ( CVarShowStats . GetValueOnRenderThread ( ) | | CacheManager - > IsAccumulatingStats ( ) )
{
StatsBufferRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumStats ) , TEXT ( " Shadow.Virtual.StatsBuffer " ) ) ;
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( StatsBufferRDG ) , 0 ) ;
}
2022-04-12 21:16:42 -04:00
// We potentially over-allocate these to avoid too many different allocation sizes each frame
const uint32 NumShadowMapsToAllocate = GetShadowMapsToAllocate ( ShadowMaps . Num ( ) ) ;
const uint32 NumPageFlagsToAllocate = NumShadowMapsToAllocate * FVirtualShadowMap : : PageTableSize ;
// Create and clear the requested page flags
FRDGBufferRef PageRequestFlagsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumPageFlagsToAllocate ) , TEXT ( " Shadow.Virtual.PageRequestFlags " ) ) ;
2022-01-27 15:49:31 -05:00
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( PageRequestFlagsRDG ) , 0 ) ;
2022-04-12 21:16:42 -04:00
DynamicCasterPageFlagsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumPageFlagsToAllocate ) , TEXT ( " Shadow.Virtual.DynamicCasterPageFlags " ) ) ;
2022-01-27 15:49:31 -05:00
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( DynamicCasterPageFlagsRDG ) , 0 ) ;
2022-03-23 15:54:41 -04:00
2022-04-12 21:16:56 -04:00
DirtyPageFlagsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumPageFlagsToAllocate ) , TEXT ( " Shadow.Virtual.DirtyPageFlags " ) ) ;
2022-03-23 15:54:41 -04:00
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( DirtyPageFlagsRDG ) , 0 ) ;
2022-01-27 15:49:31 -05:00
// Record the number of instances the buffer has capactiy for, should anything change (it shouldn't!)
NumInvalidatingInstanceSlots = Scene . GPUScene . GetNumInstances ( ) ;
// Allocate space for counter, worst case ID storage, and flags.
int32 InstanceInvalidationBufferSize = 1 + NumInvalidatingInstanceSlots + FMath : : DivideAndRoundUp ( NumInvalidatingInstanceSlots , 32 ) ;
InvalidatingInstancesRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , InstanceInvalidationBufferSize ) , TEXT ( " Shadow.Virtual.InvalidatingInstances " ) ) ;
// Clear to zero, technically only need to clear first Scene.GPUScene.GetNumInstances() + 1 uints
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( InvalidatingInstancesRDG ) , 0 ) ;
2022-04-12 21:16:42 -04:00
const uint32 NumPageRects = ShadowMaps . Num ( ) * FVirtualShadowMap : : MaxMipLevels ;
const uint32 NumPageRectsToAllocate = NumShadowMapsToAllocate * FVirtualShadowMap : : MaxMipLevels ;
PageRectBoundsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( FIntVector4 ) , NumPageRectsToAllocate ) , TEXT ( " Shadow.Virtual.PageRectBounds " ) ) ;
2022-01-27 15:49:31 -05:00
{
FInitPageRectBoundsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FInitPageRectBoundsCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPageRectBounds = GraphBuilder . CreateUAV ( PageRectBoundsRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FInitPageRectBoundsCS > ( ) ;
ClearUnusedGraphResources ( ComputeShader , PassParameters ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " InitPageRectBounds " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( NumPageRects , FInitPageRectBoundsCS : : DefaultCSGroupX ) , 1 , 1 )
) ;
}
for ( int32 ViewIndex = 0 ; ViewIndex < Views . Num ( ) ; + + ViewIndex )
{
2022-04-12 23:44:41 -04:00
RDG_EVENT_SCOPE_CONDITIONAL ( GraphBuilder , Views . Num ( ) > 1 , " View%d " , ViewIndex ) ;
2022-01-27 15:49:31 -05:00
const FViewInfo & View = Views [ ViewIndex ] ;
// This view contained no local lights (that were stored in the light grid), and no directional lights, so nothing to do.
if ( View . ForwardLightingResources . LocalLightVisibleLightInfosIndex . Num ( ) + DirectionalLightIds . Num ( ) = = 0 )
2021-03-08 23:14:54 -04:00
{
2022-01-27 15:49:31 -05:00
continue ;
2021-03-08 23:14:54 -04:00
}
2020-08-19 12:54:43 -04:00
2022-01-27 15:49:31 -05:00
FRDGBufferRef DirectionalLightIdsRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.DirectionalLightIds " ) , DirectionalLightIds ) ;
2020-07-06 18:58:26 -04:00
2022-01-27 15:49:31 -05:00
const FRDGSystemTextures & SystemTextures = FRDGSystemTextures : : Get ( GraphBuilder ) ;
2020-07-06 18:58:26 -04:00
2022-01-27 15:49:31 -05:00
FRDGBufferRef ScreenSpaceGridBoundsRDG = nullptr ;
2020-07-06 18:58:26 -04:00
2022-01-27 15:49:31 -05:00
{
// It's safe to overlap these passes that all write to page request flags
FRDGBufferUAVRef PageRequestFlagsUAV = GraphBuilder . CreateUAV ( PageRequestFlagsRDG , ERDGUnorderedAccessViewFlags : : SkipBarrier ) ;
// Mark pages based on projected depth buffer pixels
if ( CVarMarkPixelPages . GetValueOnRenderThread ( ) ! = 0 )
2020-07-06 18:58:26 -04:00
{
2022-02-10 09:53:46 -05:00
auto GeneratePageFlags = [ & ] ( const EVirtualShadowMapProjectionInputType InputType )
2021-02-18 21:09:05 -04:00
{
2022-01-27 15:49:31 -05:00
FGeneratePageFlagsFromPixelsCS : : FPermutationDomain PermutationVector ;
2022-02-10 09:53:46 -05:00
PermutationVector . Set < FGeneratePageFlagsFromPixelsCS : : FInputType > ( InputType = = EVirtualShadowMapProjectionInputType : : HairStrands ? 1u : 0u ) ;
2022-01-27 15:49:31 -05:00
FGeneratePageFlagsFromPixelsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FGeneratePageFlagsFromPixelsCS : : FParameters > ( ) ;
2021-03-08 23:14:54 -04:00
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
2022-01-27 15:49:31 -05:00
PassParameters - > SceneTexturesStruct = SceneTextures . UniformBuffer ;
PassParameters - > HairStrands = HairStrands : : BindHairStrandsViewUniformParameters ( View ) ;
PassParameters - > View = View . ViewUniformBuffer ;
2021-02-18 21:09:05 -04:00
PassParameters - > OutPageRequestFlags = PageRequestFlagsUAV ;
2022-01-27 15:49:31 -05:00
PassParameters - > ForwardLightData = View . ForwardLightingResources . ForwardLightUniformBuffer ;
PassParameters - > DirectionalLightIds = GraphBuilder . CreateSRV ( DirectionalLightIdsRDG ) ;
PassParameters - > ResolutionLodBiasLocal = CVarResolutionLodBiasLocal . GetValueOnRenderThread ( ) ;
PassParameters - > PageDilationBorderSizeLocal = CVarPageDilationBorderSizeLocal . GetValueOnRenderThread ( ) ;
PassParameters - > PageDilationBorderSizeDirectional = CVarPageDilationBorderSizeDirectional . GetValueOnRenderThread ( ) ;
PassParameters - > bCullBackfacingPixels = ShouldCullBackfacingPixels ( ) ? 1 : 0 ;
2022-04-01 08:35:55 -04:00
PassParameters - > Strata = Strata : : BindStrataGlobalUniformParameters ( View ) ;
2021-02-18 21:09:05 -04:00
2022-01-27 15:49:31 -05:00
auto ComputeShader = View . ShaderMap - > GetShader < FGeneratePageFlagsFromPixelsCS > ( PermutationVector ) ;
2021-02-18 21:09:05 -04:00
2022-01-27 15:49:31 -05:00
static_assert ( ( FVirtualPageManagementShader : : DefaultCSGroupXY % 2 ) = = 0 , " GeneratePageFlagsFromPixels requires even-sized CS groups for quad swizzling. " ) ;
const FIntPoint GridSize = FIntPoint : : DivideAndRoundUp ( View . ViewRect . Size ( ) , FVirtualPageManagementShader : : DefaultCSGroupXY ) ;
2020-07-06 18:58:26 -04:00
2022-02-10 09:53:46 -05:00
if ( InputType = = EVirtualShadowMapProjectionInputType : : HairStrands & & View . HairStrandsViewData . VisibilityData . TileData . IsValid ( ) )
2021-12-02 23:09:13 -05:00
{
2022-01-27 15:49:31 -05:00
PassParameters - > IndirectBufferArgs = View . HairStrandsViewData . VisibilityData . TileData . TileIndirectDispatchBuffer ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " GeneratePageFlagsFromPixels(HairStrands,Tile) " ) ,
ComputeShader ,
PassParameters ,
View . HairStrandsViewData . VisibilityData . TileData . TileIndirectDispatchBuffer ,
View . HairStrandsViewData . VisibilityData . TileData . GetIndirectDispatchArgOffset ( FHairStrandsTiles : : ETileType : : HairAll ) ) ;
2021-12-02 23:09:13 -05:00
}
2022-01-27 15:49:31 -05:00
else
{
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2022-04-12 21:16:42 -04:00
RDG_EVENT_NAME ( " GeneratePageFlagsFromPixels(%s,NumShadowMaps=%d) " , ToString ( InputType ) , ShadowMaps . Num ( ) ) ,
2022-01-27 15:49:31 -05:00
ComputeShader ,
PassParameters ,
FIntVector ( GridSize . X , GridSize . Y , 1 ) ) ;
}
} ;
2021-12-02 23:09:13 -05:00
2022-02-10 09:53:46 -05:00
GeneratePageFlags ( EVirtualShadowMapProjectionInputType : : GBuffer ) ;
2022-01-27 15:49:31 -05:00
if ( HairStrands : : HasViewHairStrandsData ( View ) )
2021-12-02 23:09:13 -05:00
{
2022-02-10 09:53:46 -05:00
GeneratePageFlags ( EVirtualShadowMapProjectionInputType : : HairStrands ) ;
2021-12-02 23:09:13 -05:00
}
}
2022-01-27 15:49:31 -05:00
// Mark coarse pages
bool bMarkCoarsePagesDirectional = CVarMarkCoarsePagesDirectional . GetValueOnRenderThread ( ) ! = 0 ;
bool bMarkCoarsePagesLocal = CVarMarkCoarsePagesLocal . GetValueOnRenderThread ( ) ! = 0 ;
if ( bMarkCoarsePagesDirectional | | bMarkCoarsePagesLocal )
2021-12-02 23:08:02 -05:00
{
2022-01-27 15:49:31 -05:00
FMarkCoarsePagesCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FMarkCoarsePagesCS : : FParameters > ( ) ;
2021-12-02 23:08:02 -05:00
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
2022-01-27 15:49:31 -05:00
PassParameters - > OutPageRequestFlags = PageRequestFlagsUAV ;
PassParameters - > bMarkCoarsePagesLocal = bMarkCoarsePagesLocal ? 1 : 0 ;
PassParameters - > ClipmapIndexMask = bMarkCoarsePagesDirectional ? FVirtualShadowMapClipmap : : GetCoarsePageClipmapIndexMask ( ) : 0 ;
PassParameters - > bIncludeNonNaniteGeometry = CVarCoarsePagesIncludeNonNanite . GetValueOnRenderThread ( ) ;
2021-12-02 23:08:02 -05:00
2022-01-27 15:49:31 -05:00
auto ComputeShader = View . ShaderMap - > GetShader < FMarkCoarsePagesCS > ( ) ;
2021-12-02 23:08:02 -05:00
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2022-01-27 15:49:31 -05:00
RDG_EVENT_NAME ( " MarkCoarsePages " ) ,
2021-12-02 23:08:02 -05:00
ComputeShader ,
PassParameters ,
2022-01-27 15:49:31 -05:00
FIntVector ( FMath : : DivideAndRoundUp ( uint32 ( ShadowMaps . Num ( ) ) , FMarkCoarsePagesCS : : DefaultCSGroupX ) , 1 , 1 )
2021-12-02 23:08:02 -05:00
) ;
}
}
2022-01-27 15:49:31 -05:00
}
2021-07-21 19:04:20 -04:00
2022-04-12 21:16:42 -04:00
PageTableRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumPageFlagsToAllocate ) , TEXT ( " Shadow.Virtual.PageTable " ) ) ;
2022-01-27 15:49:31 -05:00
// Note: these are passed to the rendering and are not identical to the PageRequest flags coming in from GeneratePageFlagsFromPixels
2022-04-12 21:16:42 -04:00
PageFlagsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , NumPageFlagsToAllocate ) , TEXT ( " Shadow.Virtual.PageFlags " ) ) ;
2021-12-03 16:08:27 -05:00
2022-01-27 15:49:31 -05:00
// One additional element as the last element is used as an atomic counter
FRDGBufferRef FreePhysicalPagesRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( int32 ) , GetMaxPhysicalPages ( ) + 1 ) , TEXT ( " Shadow.Virtual.FreePhysicalPages " ) ) ;
// Enough space for all physical pages that might be allocated
PhysicalPageMetaDataRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( FPhysicalPageMetaData ) , GetMaxPhysicalPages ( ) ) , TEXT ( " Shadow.Virtual.PhysicalPageMetaData " ) ) ;
AllocatedPageRectBoundsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( FIntVector4 ) , NumPageRects ) , TEXT ( " Shadow.Virtual.AllocatedPageRectBounds " ) ) ;
{
FInitPhysicalPageMetaData : : FParameters * PassParameters = GraphBuilder . AllocParameters < FInitPhysicalPageMetaData : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPhysicalPageMetaData = GraphBuilder . CreateUAV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutFreePhysicalPages = GraphBuilder . CreateUAV ( FreePhysicalPagesRDG ) ;
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FInitPhysicalPageMetaData > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " InitPhysicalPageMetaData " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( GetMaxPhysicalPages ( ) , FInitPhysicalPageMetaData : : DefaultCSGroupX ) , 1 , 1 )
) ;
}
// Start by marking any physical pages that we are going to keep due to caching
// NOTE: We run this pass even with no caching since we still need to initialize the metadata
{
FCreateCachedPageMappingsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FCreateCachedPageMappingsCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PageRequestFlags = GraphBuilder . CreateSRV ( PageRequestFlagsRDG ) ;
PassParameters - > OutPageTable = GraphBuilder . CreateUAV ( PageTableRDG ) ;
PassParameters - > OutPhysicalPageMetaData = GraphBuilder . CreateUAV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutPageFlags = GraphBuilder . CreateUAV ( PageFlagsRDG ) ;
PassParameters - > bDynamicPageInvalidation = 1 ;
# if !UE_BUILD_SHIPPING
PassParameters - > bDynamicPageInvalidation = CVarDebugSkipDynamicPageInvalidation . GetValueOnRenderThread ( ) = = 0 ? 1 : 0 ;
# endif
bool bCacheEnabled = CacheManager - > IsValid ( ) ;
if ( bCacheEnabled )
2021-12-03 16:08:27 -05:00
{
2022-01-27 15:49:31 -05:00
SetCacheDataShaderParameters ( GraphBuilder , ShadowMaps , CacheManager , PassParameters - > CacheDataParameters ) ;
}
2021-12-03 16:08:27 -05:00
2022-01-27 15:49:31 -05:00
FCreateCachedPageMappingsCS : : FPermutationDomain PermutationVector ;
2022-03-23 15:54:41 -04:00
SetStatsArgsAndPermutation < FCreateCachedPageMappingsCS > ( GraphBuilder , StatsBufferRDG , PassParameters , PermutationVector ) ;
2022-01-27 15:49:31 -05:00
PermutationVector . Set < FCreateCachedPageMappingsCS : : FHasCacheDataDim > ( bCacheEnabled ) ;
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FCreateCachedPageMappingsCS > ( PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " CreateCachedPageMappings " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( FVirtualShadowMap : : PageTableSize , FCreateCachedPageMappingsCS : : DefaultCSGroupX ) , ShadowMaps . Num ( ) , 1 )
) ;
}
// After we've marked any cached pages, collect all the remaining free pages into a list
// NOTE: We could optimize this more in the case where there's no caching of course; TBD priority
{
FPackFreePagesCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FPackFreePagesCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutFreePhysicalPages = GraphBuilder . CreateUAV ( FreePhysicalPagesRDG ) ;
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FPackFreePagesCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " PackFreePages " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( GetMaxPhysicalPages ( ) , FPackFreePagesCS : : DefaultCSGroupX ) , 1 , 1 )
) ;
}
// Allocate any new physical pages that were not cached from the free list
{
FAllocateNewPageMappingsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FAllocateNewPageMappingsCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PageRequestFlags = GraphBuilder . CreateSRV ( PageRequestFlagsRDG ) ;
PassParameters - > OutPageTable = GraphBuilder . CreateUAV ( PageTableRDG ) ;
PassParameters - > OutPageFlags = GraphBuilder . CreateUAV ( PageFlagsRDG ) ;
PassParameters - > OutFreePhysicalPages = GraphBuilder . CreateUAV ( FreePhysicalPagesRDG ) ;
PassParameters - > OutPhysicalPageMetaData = GraphBuilder . CreateUAV ( PhysicalPageMetaDataRDG ) ;
FAllocateNewPageMappingsCS : : FPermutationDomain PermutationVector ;
2022-03-23 15:54:41 -04:00
SetStatsArgsAndPermutation < FAllocateNewPageMappingsCS > ( GraphBuilder , StatsBufferRDG , PassParameters , PermutationVector ) ;
2022-01-27 15:49:31 -05:00
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FAllocateNewPageMappingsCS > ( PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " AllocateNewPageMappings " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( FVirtualShadowMap : : PageTableSize , FAllocateNewPageMappingsCS : : DefaultCSGroupX ) , ShadowMaps . Num ( ) , 1 )
) ;
}
{
// Run pass building hierarchical page flags to make culling acceptable performance.
FGenerateHierarchicalPageFlagsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FGenerateHierarchicalPageFlagsCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPageFlags = GraphBuilder . CreateUAV ( PageFlagsRDG ) ;
PassParameters - > OutPageRectBounds = GraphBuilder . CreateUAV ( PageRectBoundsRDG ) ;
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FGenerateHierarchicalPageFlagsCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " GenerateHierarchicalPageFlags " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( FVirtualShadowMap : : PageTableSize , FGenerateHierarchicalPageFlagsCS : : DefaultCSGroupX ) , ShadowMaps . Num ( ) , 1 )
) ;
}
// NOTE: We could skip this (in shader) for shadow maps that only have 1 mip (ex. clipmaps)
{
// Propagate mapped mips down the hierarchy to allow O(1) lookup of coarser mapped pages
FPropagateMappedMipsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FPropagateMappedMipsCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPageTable = GraphBuilder . CreateUAV ( PageTableRDG ) ;
auto ComputeShader = Views [ 0 ] . ShaderMap - > GetShader < FPropagateMappedMipsCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " PropagateMappedMips " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( FMath : : Square ( FVirtualShadowMap : : Level0DimPagesXY ) , FPropagateMappedMipsCS : : DefaultCSGroupX ) , ShadowMaps . Num ( ) , 1 )
) ;
}
// Initialize the physical page pool
check ( PhysicalPagePoolRDG ! = nullptr ) ;
{
RDG_EVENT_SCOPE ( GraphBuilder , " InitializePhysicalPages " ) ;
if ( CVarInitializePhysicalUsingIndirect . GetValueOnRenderThread ( ) ! = 0 )
{
FRDGBufferRef InitializePagesIndirectArgsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateIndirectDesc ( 3 ) , TEXT ( " Shadow.Virtual.InitializePagesIndirectArgs " ) ) ;
// Note: We use GetTotalAllocatedPhysicalPages() to size the buffer as the selection shader emits both static/dynamic pages separately when enabled.
FRDGBufferRef PhysicalPagesToInitializeRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( int32 ) , GetTotalAllocatedPhysicalPages ( ) + 1 ) , TEXT ( " Shadow.Virtual.PhysicalPagesToInitialize " ) ) ;
// 1. Initialize the indirect args buffer
AddClearIndirectDispatchArgs1DPass ( GraphBuilder , InitializePagesIndirectArgsRDG ) ;
// 2. Filter the relevant physical pages and set up the indirect args
{
FSelectPagesToInitializeCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSelectPagesToInitializeCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutInitializePagesIndirectArgsBuffer = GraphBuilder . CreateUAV ( InitializePagesIndirectArgsRDG ) ;
PassParameters - > OutPhysicalPagesToInitialize = GraphBuilder . CreateUAV ( PhysicalPagesToInitializeRDG ) ;
bool bGenerateStats = StatsBufferRDG ! = nullptr ;
if ( bGenerateStats )
{
PassParameters - > OutStatsBuffer = GraphBuilder . CreateUAV ( StatsBufferRDG ) ;
}
FSelectPagesToInitializeCS : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FSelectPagesToInitializeCS : : FGenerateStatsDim > ( bGenerateStats ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FSelectPagesToInitializeCS > ( PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " SelectPagesToInitialize " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( GetMaxPhysicalPages ( ) , FSelectPagesToInitializeCS : : DefaultCSGroupX ) , 1 , 1 )
) ;
}
// 3. Indirect dispatch to clear the selected pages
{
FInitializePhysicalPagesIndirectCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FInitializePhysicalPagesIndirectCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > OutPhysicalPagePool = GraphBuilder . CreateUAV ( PhysicalPagePoolRDG ) ;
PassParameters - > IndirectArgs = InitializePagesIndirectArgsRDG ;
PassParameters - > PhysicalPagesToInitialize = GraphBuilder . CreateSRV ( PhysicalPagesToInitializeRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FInitializePhysicalPagesIndirectCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " InitializePhysicalMemoryIndirect " ) ,
ComputeShader ,
PassParameters ,
PassParameters - > IndirectArgs ,
0
) ;
}
}
else
{
FInitializePhysicalPagesCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FInitializePhysicalPagesCS : : FParameters > ( ) ;
2021-12-03 16:08:27 -05:00
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
2022-01-27 15:49:31 -05:00
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutPhysicalPagePool = GraphBuilder . CreateUAV ( PhysicalPagePoolRDG ) ;
2021-12-03 16:08:27 -05:00
2022-01-27 15:49:31 -05:00
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FInitializePhysicalPagesCS > ( ) ;
// Shader contains logic to deal with static cached pages if enabled
// We only need to launch one per page, even if there are multiple cached pages per page
2022-04-07 18:36:13 -04:00
FIntPoint PoolSize = GetPhysicalPoolSize ( ) ;
2021-12-03 16:08:27 -05:00
FComputeShaderUtils : : AddPass (
GraphBuilder ,
2022-01-27 15:49:31 -05:00
RDG_EVENT_NAME ( " InitializePhysicalPages " ) ,
2021-12-03 16:08:27 -05:00
ComputeShader ,
PassParameters ,
2022-01-27 15:49:31 -05:00
FIntVector (
2022-04-07 18:36:13 -04:00
FMath : : DivideAndRoundUp ( PoolSize . X , 16 ) ,
FMath : : DivideAndRoundUp ( PoolSize . Y , 16 ) ,
2022-01-27 15:49:31 -05:00
1 )
2021-12-03 16:08:27 -05:00
) ;
}
2020-07-06 18:58:26 -04:00
}
2021-05-20 19:32:20 -04:00
2022-01-27 15:49:31 -05:00
UniformParameters . PageTable = GraphBuilder . CreateSRV ( PageTableRDG ) ;
UniformParameters . PageFlags = GraphBuilder . CreateSRV ( PageFlagsRDG ) ;
UniformParameters . PageRectBounds = GraphBuilder . CreateSRV ( PageRectBoundsRDG ) ;
// Add pass to pipe back important stats
{
FVirtualSmFeedbackStatusCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualSmFeedbackStatusCS : : FParameters > ( ) ;
PassParameters - > FreePhysicalPages = GraphBuilder . CreateSRV ( FreePhysicalPagesRDG ) ;
PassParameters - > GPUMessageParams = GPUMessage : : GetShaderParameters ( GraphBuilder ) ;
PassParameters - > StatusMessageId = CacheManager - > StatusFeedbackSocket . GetMessageId ( ) . GetIndex ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FVirtualSmFeedbackStatusCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " Feedback Status " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( 1 , 1 , 1 )
) ;
}
2021-05-20 19:32:20 -04:00
# if !UE_BUILD_SHIPPING
// Only dump one frame of light data
GDumpVSMLightNames = false ;
# endif
2020-07-06 18:58:26 -04:00
}
class FDebugVisualizeVirtualSmCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FDebugVisualizeVirtualSmCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FDebugVisualizeVirtualSmCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FVirtualShadowMapSamplingParameters , ProjectionParameters )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER ( uint32 , DebugTargetWidth )
SHADER_PARAMETER ( uint32 , DebugTargetHeight )
SHADER_PARAMETER ( uint32 , BorderWidth )
2022-02-02 08:19:08 -05:00
SHADER_PARAMETER ( uint32 , VisualizeModeId )
SHADER_PARAMETER ( int32 , VirtualShadowMapId )
2020-07-06 18:58:26 -04:00
2022-02-02 08:19:08 -05:00
SHADER_PARAMETER_RDG_TEXTURE_UAV ( RWTexture2D < float4 > , OutVisualize )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FDebugVisualizeVirtualSmCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapDebug.usf " , " DebugVisualizeVirtualSmCS " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
2022-04-12 23:44:41 -04:00
void FVirtualShadowMapArray : : RenderDebugInfo ( FRDGBuilder & GraphBuilder , TArrayView < FViewInfo > Views )
2020-07-06 18:58:26 -04:00
{
2021-01-12 14:40:19 -04:00
check ( IsEnabled ( ) ) ;
2022-02-02 08:19:08 -05:00
2022-04-12 23:44:41 -04:00
if ( DebugVisualizationOutput . IsEmpty ( ) | | ! VisualizeLight . IsValid ( ) )
2020-07-06 18:58:26 -04:00
{
2022-02-02 08:19:08 -05:00
return ;
2020-07-06 18:58:26 -04:00
}
2022-02-02 08:19:08 -05:00
const FVirtualShadowMapVisualizationData & VisualizationData = GetVirtualShadowMapVisualizationData ( ) ;
2022-04-12 23:44:41 -04:00
if ( VisualizationData . GetActiveModeID ( ) ! = VIRTUAL_SHADOW_MAP_VISUALIZE_CLIPMAP_VIRTUAL_SPACE )
{
return ;
}
2022-02-02 08:19:08 -05:00
int32 BorderWidth = 2 ;
2022-04-12 23:44:41 -04:00
for ( int32 ViewIndex = 0 ; ViewIndex < Views . Num ( ) ; ViewIndex + + )
{
FViewInfo & View = Views [ ViewIndex ] ;
2022-02-02 08:19:08 -05:00
2022-04-12 23:44:41 -04:00
FIntPoint DebugTargetExtent = DebugVisualizationOutput [ ViewIndex ] - > Desc . Extent ;
2022-02-02 08:19:08 -05:00
2022-04-12 23:44:41 -04:00
FDebugVisualizeVirtualSmCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FDebugVisualizeVirtualSmCS : : FParameters > ( ) ;
PassParameters - > ProjectionParameters = GetSamplingParameters ( GraphBuilder ) ;
2022-02-02 08:19:08 -05:00
2022-04-12 23:44:41 -04:00
PassParameters - > DebugTargetWidth = DebugTargetExtent . X ;
PassParameters - > DebugTargetHeight = DebugTargetExtent . Y ;
PassParameters - > BorderWidth = BorderWidth ;
PassParameters - > VisualizeModeId = VisualizationData . GetActiveModeID ( ) ;
PassParameters - > VirtualShadowMapId = VisualizeLight . GetVirtualShadowMapId ( ) ;
2022-02-02 08:19:08 -05:00
2022-04-12 23:44:41 -04:00
PassParameters - > OutVisualize = GraphBuilder . CreateUAV ( DebugVisualizationOutput [ ViewIndex ] ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FDebugVisualizeVirtualSmCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " DebugVisualizeVirtualShadowMap " ) ,
ComputeShader ,
PassParameters ,
FComputeShaderUtils : : GetGroupCount ( DebugTargetExtent , FVirtualPageManagementShader : : DefaultCSGroupXY )
) ;
}
2020-07-06 18:58:26 -04:00
}
class FVirtualSmPrintStatsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FVirtualSmPrintStatsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FVirtualSmPrintStatsCS , FVirtualPageManagementShader )
2020-08-19 14:22:37 -04:00
2020-07-06 18:58:26 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2020-07-06 18:58:26 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( ShaderPrint : : FShaderParameters , ShaderPrintStruct )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , InStatsBuffer )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FIntVector4 > , AllocatedPageRectBounds )
2021-12-02 23:08:02 -05:00
SHADER_PARAMETER ( int , ShowStatsValue )
2020-07-06 18:58:26 -04:00
END_SHADER_PARAMETER_STRUCT ( )
2021-12-02 18:25:13 -05:00
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FVirtualPageManagementShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
// Disable optimizations as shader print causes long compile times
OutEnvironment . CompilerFlags . Add ( CFLAG_SkipOptimizations ) ;
}
2020-07-06 18:58:26 -04:00
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FVirtualSmPrintStatsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPrintStats.usf " , " PrintStats " , SF_Compute ) ;
2020-07-06 18:58:26 -04:00
void FVirtualShadowMapArray : : PrintStats ( FRDGBuilder & GraphBuilder , const FViewInfo & View )
{
2021-01-12 14:40:19 -04:00
check ( IsEnabled ( ) ) ;
2020-10-09 22:42:26 -04:00
LLM_SCOPE_BYTAG ( Nanite ) ;
2020-07-06 18:58:26 -04:00
// Print stats
2021-12-02 23:08:02 -05:00
int ShowStatsValue = CVarShowStats . GetValueOnRenderThread ( ) ;
if ( ShowStatsValue ! = 0 & & StatsBufferRDG )
2020-07-06 18:58:26 -04:00
{
{
FVirtualSmPrintStatsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualSmPrintStatsCS : : FParameters > ( ) ;
2020-10-27 13:40:36 -04:00
ShaderPrint : : SetParameters ( GraphBuilder , View , PassParameters - > ShaderPrintStruct ) ;
2020-11-09 15:47:39 -04:00
PassParameters - > InStatsBuffer = GraphBuilder . CreateSRV ( StatsBufferRDG ) ;
2021-03-08 23:14:54 -04:00
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
2021-12-02 23:08:02 -05:00
PassParameters - > ShowStatsValue = ShowStatsValue ;
2020-07-06 18:58:26 -04:00
auto ComputeShader = View . ShaderMap - > GetShader < FVirtualSmPrintStatsCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " Print Stats " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( 1 , 1 , 1 )
) ;
}
}
}
2021-01-20 08:46:15 -04:00
2021-02-05 16:37:17 -04:00
void FVirtualShadowMapArray : : CreateMipViews ( TArray < Nanite : : FPackedView , SceneRenderingAllocator > & Views ) const
{
// strategy:
// 1. Use the cull pass to generate copies of every node for every view needed.
// [2. Fabricate a HZB array?]
ensure ( Views . Num ( ) < = ShadowMaps . Num ( ) ) ;
const int32 NumPrimaryViews = Views . Num ( ) ;
// 1. create derivative views for each of the Mip levels,
Views . AddDefaulted ( NumPrimaryViews * ( FVirtualShadowMap : : MaxMipLevels - 1 ) ) ;
int32 MaxMips = 0 ;
for ( int32 ViewIndex = 0 ; ViewIndex < NumPrimaryViews ; + + ViewIndex )
{
const Nanite : : FPackedView & PrimaryView = Views [ ViewIndex ] ;
ensure ( PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . X > = 0 & & PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . X < ShadowMaps . Num ( ) ) ;
ensure ( PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Y = = 0 ) ;
ensure ( PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Z > 0 & & PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Z < = FVirtualShadowMap : : MaxMipLevels ) ;
const int32 NumMips = PrimaryView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Z ;
MaxMips = FMath : : Max ( MaxMips , NumMips ) ;
for ( int32 MipLevel = 0 ; MipLevel < NumMips ; + + MipLevel )
{
Nanite : : FPackedView & MipView = Views [ MipLevel * NumPrimaryViews + ViewIndex ] ; // Primary (Non-Mip views) first followed by derived mip views.
if ( MipLevel > 0 )
{
MipView = PrimaryView ;
// Slightly messy, but extract any scale factor that was applied to the LOD scale for re-application below
MipView . UpdateLODScales ( ) ;
float LODScaleFactor = PrimaryView . LODScales . X / MipView . LODScales . X ;
MipView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Y = MipLevel ;
MipView . TargetLayerIdX_AndMipLevelY_AndNumMipLevelsZ . Z = NumMips ; //FVirtualShadowMap::MaxMipLevels;
// Size of view, for the virtual SMs these are assumed to not be offset.
FIntPoint ViewSize = FIntPoint : : DivideAndRoundUp ( FIntPoint ( PrimaryView . ViewSizeAndInvSize . X + 0.5f , PrimaryView . ViewSizeAndInvSize . Y + 0.5f ) , 1U < < MipLevel ) ;
FIntPoint ViewMin = FIntPoint ( MipView . ViewRect . X , MipView . ViewRect . Y ) / ( 1U < < MipLevel ) ;
2021-09-22 10:01:48 -04:00
MipView . ViewSizeAndInvSize = FVector4f ( ViewSize . X , ViewSize . Y , 1.0f / float ( ViewSize . X ) , 1.0f / float ( ViewSize . Y ) ) ;
2021-02-05 16:37:17 -04:00
MipView . ViewRect = FIntVector4 ( ViewMin . X , ViewMin . Y , ViewMin . X + ViewSize . X , ViewMin . Y + ViewSize . Y ) ;
MipView . UpdateLODScales ( ) ;
MipView . LODScales . X * = LODScaleFactor ;
2022-05-20 03:17:33 -04:00
MipView . TranslatedWorldToSubpixelClip = Nanite : : FPackedView : : CalcTranslatedWorldToSubpixelClip ( MipView . TranslatedWorldToClip , FIntRect ( ViewMin . X , ViewMin . Y , ViewMin . X + ViewSize . X , ViewMin . Y + ViewSize . Y ) ) ;
2021-02-05 16:37:17 -04:00
}
MipView . HZBTestViewRect = MipView . ViewRect ; // Assumed to always be the same for VSM
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
float RcpExtXY = 1.0f / ( FVirtualShadowMap : : PageSize * FVirtualShadowMap : : RasterWindowPages ) ;
2021-02-05 16:37:17 -04:00
// Transform clip from virtual address space to viewport.
2021-09-22 10:01:48 -04:00
MipView . ClipSpaceScaleOffset = FVector4f (
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
MipView . ViewSizeAndInvSize . X * RcpExtXY ,
MipView . ViewSizeAndInvSize . Y * RcpExtXY ,
( MipView . ViewSizeAndInvSize . X + 2.0f * MipView . ViewRect . X ) * RcpExtXY - 1.0f ,
2021-02-05 16:37:17 -04:00
- ( MipView . ViewSizeAndInvSize . Y + 2.0f * MipView . ViewRect . Y ) * RcpExtXY + 1.0f ) ;
uint32 StreamingPriorityCategory = 0 ;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
uint32 ViewFlags = NANITE_VIEW_FLAG_HZBTEST | NANITE_VIEW_FLAG_NEAR_CLIP ;
2022-02-02 05:33:52 -05:00
MipView . StreamingPriorityCategory_AndFlags = ( ViewFlags < < NANITE_NUM_STREAMING_PRIORITY_CATEGORY_BITS ) | StreamingPriorityCategory ;
2021-02-05 16:37:17 -04:00
}
}
// Remove unused mip views
2021-12-03 16:38:33 -05:00
check ( Views . IsEmpty ( ) | | MaxMips > 0 ) ;
2021-02-05 16:37:17 -04:00
Views . SetNum ( MaxMips * NumPrimaryViews , false ) ;
}
2021-01-20 08:46:15 -04:00
class FVirtualSmPrintClipmapStatsCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FVirtualSmPrintClipmapStatsCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FVirtualSmPrintClipmapStatsCS , FVirtualPageManagementShader )
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-03-08 23:14:54 -04:00
//SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FVirtualShadowMapUniformParameters, VirtualShadowMap)
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( ShaderPrint : : FShaderParameters , ShaderPrintStruct )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FIntVector4 > , PageRectBounds )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FIntVector4 > , AllocatedPageRectBounds )
SHADER_PARAMETER ( uint32 , ShadowMapIdRangeStart )
SHADER_PARAMETER ( uint32 , ShadowMapIdRangeEnd )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FVirtualSmPrintClipmapStatsCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPrintStats.usf " , " PrintClipmapStats " , SF_Compute ) ;
2021-01-20 08:46:15 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FVirtualShadowDepthPassParameters , )
SHADER_PARAMETER_STRUCT_REF ( FViewUniformShaderParameters , View )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FShadowDepthPassUniformParameters , ShadowDepthPass )
2021-03-08 23:14:54 -04:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FInstanceCullingDrawParams , InstanceCullingDrawParams )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPackedView > , InViews )
RENDER_TARGET_BINDING_SLOTS ( )
END_SHADER_PARAMETER_STRUCT ( )
2021-12-03 16:38:33 -05:00
struct FVSMCullingBatchInfo
{
2022-01-20 04:59:27 -05:00
FVector3f CullingViewOriginOffset ;
2021-12-03 16:38:33 -05:00
uint32 FirstPrimaryView ;
2022-01-20 04:59:27 -05:00
FVector3f CullingViewOriginTile ;
2021-12-03 16:38:33 -05:00
uint32 NumPrimaryViews ;
} ;
2021-01-20 08:46:15 -04:00
struct FVisibleInstanceCmd
{
uint32 PackedPageInfo ;
uint32 InstanceId ;
uint32 DrawCommandId ;
} ;
class FCullPerPageDrawCommandsCs : public FGlobalShader
{
DECLARE_GLOBAL_SHADER ( FCullPerPageDrawCommandsCs ) ;
SHADER_USE_PARAMETER_STRUCT ( FCullPerPageDrawCommandsCs , FGlobalShader )
2021-02-05 16:37:17 -04:00
2021-03-23 22:41:18 -04:00
class FNearClipDim : SHADER_PERMUTATION_BOOL ( " NEAR_CLIP " ) ;
2021-12-02 18:25:13 -05:00
class FUseHzbDim : SHADER_PERMUTATION_BOOL ( " USE_HZB_OCCLUSION " ) ;
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
2021-12-03 16:38:33 -05:00
class FBatchedDim : SHADER_PERMUTATION_BOOL ( " ENABLE_BATCH_MODE " ) ;
using FPermutationDomain = TShaderPermutationDomain < FUseHzbDim , FNearClipDim , FBatchedDim , FGenerateStatsDim > ;
2021-02-05 16:37:17 -04:00
2021-01-20 08:46:15 -04:00
public :
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
2022-01-27 14:34:07 -05:00
return IsFeatureLevelSupported ( Parameters . Platform , ERHIFeatureLevel : : SM5 ) & &
DoesPlatformSupportNanite ( Parameters . Platform ) ;
2021-01-20 08:46:15 -04:00
}
/**
*/
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FGlobalShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
FVirtualShadowMapArray : : SetShaderDefines ( OutEnvironment ) ;
2021-06-21 16:52:03 -04:00
FInstanceProcessingGPULoadBalancer : : SetShaderDefines ( OutEnvironment ) ;
2021-01-20 08:46:15 -04:00
OutEnvironment . SetDefine ( TEXT ( " NANITE_MULTI_VIEW " ) , 1 ) ;
OutEnvironment . SetDefine ( TEXT ( " INDIRECT_ARGS_NUM_WORDS " ) , FInstanceCullingContext : : IndirectArgsNumWords ) ;
OutEnvironment . SetDefine ( TEXT ( " VF_SUPPORTS_PRIMITIVE_SCENE_DATA " ) , 1 ) ;
OutEnvironment . SetDefine ( TEXT ( " USE_GLOBAL_GPU_SCENE_DATA " ) , 1 ) ;
}
2021-12-03 16:38:33 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FHZBShaderParameters , )
2022-03-03 06:41:20 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , HZBPageTable )
2022-03-15 10:05:21 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , HZBPageFlags )
2022-03-03 06:41:20 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint4 > , HZBPageRectBounds )
2021-12-03 16:38:33 -05:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2D , HZBTexture )
SHADER_PARAMETER_SAMPLER ( SamplerState , HZBSampler )
SHADER_PARAMETER ( FVector2f , HZBSize )
SHADER_PARAMETER ( uint32 , HZBMode )
END_SHADER_PARAMETER_STRUCT ( )
2021-01-20 08:46:15 -04:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2022-01-18 13:05:54 -05:00
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
2021-01-20 08:46:15 -04:00
2022-05-03 12:08:20 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < float4 > , GPUSceneInstanceSceneData )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < float4 > , GPUSceneInstancePayloadData )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < float4 > , GPUScenePrimitiveSceneData )
2021-06-14 13:43:26 -04:00
SHADER_PARAMETER ( uint32 , InstanceSceneDataSOAStride )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER ( uint32 , GPUSceneFrameNumber )
2022-01-18 08:51:35 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PrimitiveRevealedMask )
SHADER_PARAMETER ( uint32 , PrimitiveRevealedNum )
2021-06-21 16:52:03 -04:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FInstanceProcessingGPULoadBalancer : : FShaderParameters , LoadBalancerParameters )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER ( int32 , FirstPrimaryView )
SHADER_PARAMETER ( int32 , NumPrimaryViews )
2021-12-03 16:38:33 -05:00
SHADER_PARAMETER ( uint32 , TotalPrimaryViews )
SHADER_PARAMETER ( uint32 , VisibleInstancesBufferNum )
2021-06-21 16:52:03 -04:00
SHADER_PARAMETER ( int32 , DynamicInstanceIdOffset )
SHADER_PARAMETER ( int32 , DynamicInstanceIdMax )
2022-01-20 04:59:27 -05:00
SHADER_PARAMETER ( float , MaxMaterialPositionInvalidationRange )
SHADER_PARAMETER ( FVector3f , CullingViewOriginOffset )
SHADER_PARAMETER ( FVector3f , CullingViewOriginTile )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPackedView > , InViews )
2021-06-21 16:52:03 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FInstanceCullingContext : : FDrawCommandDesc > , DrawCommandDescs )
2021-01-20 08:46:15 -04:00
2021-12-03 16:38:33 -05:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FContextBatchInfo > , BatchInfos )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FVSMCullingBatchInfo > , VSMCullingBatchInfos )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint32 > , BatchInds )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < FVisibleInstanceCmd > , VisibleInstancesOut )
2021-06-21 16:52:03 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , DrawIndirectArgsBufferOut )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , VisibleInstanceCountBufferOut )
2021-09-02 07:13:18 -04:00
2021-12-03 16:38:33 -05:00
SHADER_PARAMETER_STRUCT_INCLUDE ( FHZBShaderParameters , HZBShaderParameters )
2021-12-02 18:25:13 -05:00
2021-09-02 07:13:18 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutInvalidatingInstances )
SHADER_PARAMETER ( uint32 , NumInvalidatingInstanceSlots )
2021-12-02 18:25:13 -05:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
2021-01-20 08:46:15 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FCullPerPageDrawCommandsCs , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapBuildPerPageDrawCommands.usf " , " CullPerPageDrawCommandsCs " , SF_Compute ) ;
2021-01-20 08:46:15 -04:00
class FAllocateCommandInstanceOutputSpaceCs : public FGlobalShader
{
DECLARE_GLOBAL_SHADER ( FAllocateCommandInstanceOutputSpaceCs ) ;
SHADER_USE_PARAMETER_STRUCT ( FAllocateCommandInstanceOutputSpaceCs , FGlobalShader )
public :
static constexpr int32 NumThreadsPerGroup = 64 ;
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
2022-01-27 14:34:07 -05:00
return IsFeatureLevelSupported ( Parameters . Platform , ERHIFeatureLevel : : SM5 ) & &
DoesPlatformSupportNanite ( Parameters . Platform ) ;
2021-01-20 08:46:15 -04:00
}
/**
*/
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FGlobalShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
FVirtualShadowMapArray : : SetShaderDefines ( OutEnvironment ) ;
2021-06-21 16:52:03 -04:00
FInstanceProcessingGPULoadBalancer : : SetShaderDefines ( OutEnvironment ) ;
2021-01-20 08:46:15 -04:00
OutEnvironment . SetDefine ( TEXT ( " NUM_THREADS_PER_GROUP " ) , NumThreadsPerGroup ) ;
OutEnvironment . SetDefine ( TEXT ( " NANITE_MULTI_VIEW " ) , 1 ) ;
OutEnvironment . SetDefine ( TEXT ( " INDIRECT_ARGS_NUM_WORDS " ) , FInstanceCullingContext : : IndirectArgsNumWords ) ;
}
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
2021-06-21 16:52:03 -04:00
SHADER_PARAMETER ( uint32 , NumIndirectArgs )
SHADER_PARAMETER_RDG_BUFFER_SRV ( Buffer < uint > , DrawIndirectArgsBuffer )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , InstanceIdOffsetBufferOut )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutputOffsetBufferOut )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , TmpInstanceIdOffsetBufferOut )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FAllocateCommandInstanceOutputSpaceCs , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapBuildPerPageDrawCommands.usf " , " AllocateCommandInstanceOutputSpaceCs " , SF_Compute ) ;
2021-01-20 08:46:15 -04:00
class FOutputCommandInstanceListsCs : public FGlobalShader
{
DECLARE_GLOBAL_SHADER ( FOutputCommandInstanceListsCs ) ;
SHADER_USE_PARAMETER_STRUCT ( FOutputCommandInstanceListsCs , FGlobalShader )
public :
static constexpr int32 NumThreadsPerGroup = 64 ;
static bool ShouldCompilePermutation ( const FGlobalShaderPermutationParameters & Parameters )
{
2022-01-27 14:34:07 -05:00
return IsFeatureLevelSupported ( Parameters . Platform , ERHIFeatureLevel : : SM5 ) & &
DoesPlatformSupportNanite ( Parameters . Platform ) ;
2021-01-20 08:46:15 -04:00
}
/**
*/
static void ModifyCompilationEnvironment ( const FGlobalShaderPermutationParameters & Parameters , FShaderCompilerEnvironment & OutEnvironment )
{
FGlobalShader : : ModifyCompilationEnvironment ( Parameters , OutEnvironment ) ;
FVirtualShadowMapArray : : SetShaderDefines ( OutEnvironment ) ;
2021-06-21 16:52:03 -04:00
FInstanceProcessingGPULoadBalancer : : SetShaderDefines ( OutEnvironment ) ;
2021-01-20 08:46:15 -04:00
OutEnvironment . SetDefine ( TEXT ( " NUM_THREADS_PER_GROUP " ) , NumThreadsPerGroup ) ;
OutEnvironment . SetDefine ( TEXT ( " NANITE_MULTI_VIEW " ) , 1 ) ;
OutEnvironment . SetDefine ( TEXT ( " INDIRECT_ARGS_NUM_WORDS " ) , FInstanceCullingContext : : IndirectArgsNumWords ) ;
}
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FVisibleInstanceCmd > , VisibleInstances )
2021-06-03 02:19:28 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , InstanceIdsBufferOut )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , PageInfoBufferOut )
2021-01-20 08:46:15 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , TmpInstanceIdOffsetBufferOut )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , VisibleInstanceCountBuffer )
// Needed reference for make RDG happy somehow
2021-03-17 05:55:03 -04:00
RDG_BUFFER_ACCESS ( IndirectArgs , ERHIAccess : : IndirectArgs )
2021-01-20 08:46:15 -04:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FOutputCommandInstanceListsCs , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapBuildPerPageDrawCommands.usf " , " OutputCommandInstanceListsCs " , SF_Compute ) ;
2021-01-20 08:46:15 -04:00
2021-12-03 16:38:33 -05:00
struct FCullingResult
{
FRDGBufferRef DrawIndirectArgsRDG ;
FRDGBufferRef InstanceIdOffsetBufferRDG ;
FRDGBufferRef InstanceIdsBuffer ;
FRDGBufferRef PageInfoBuffer ;
uint32 MaxNumInstancesPerPass ;
} ;
template < typename InstanceCullingLoadBalancerType >
static FCullingResult AddCullingPasses ( FRDGBuilder & GraphBuilder ,
const TConstArrayView < FRHIDrawIndexedIndirectParameters > & IndirectArgs ,
const TConstArrayView < FInstanceCullingContext : : FDrawCommandDesc > & DrawCommandDescs ,
const TConstArrayView < uint32 > & InstanceIdOffsets ,
InstanceCullingLoadBalancerType * LoadBalancer ,
const TConstArrayView < FInstanceCullingMergedContext : : FContextBatchInfo > BatchInfos ,
const TConstArrayView < FVSMCullingBatchInfo > VSMCullingBatchInfos ,
const TConstArrayView < uint32 > BatchInds ,
bool bUseNearClip ,
uint32 TotalInstances ,
uint32 TotalPrimaryViews ,
FRDGBufferRef VirtualShadowViewsRDG ,
const FCullPerPageDrawCommandsCs : : FHZBShaderParameters & HZBShaderParameters ,
FVirtualShadowMapArray * VirtualShadowMapArray ,
2022-01-18 08:51:35 -05:00
FGPUScene & GPUScene ,
FRDGBufferRef PrimitiveRevealedMaskRdg ,
int32 PrimitiveRevealedNum )
2021-12-03 16:38:33 -05:00
{
int32 NumIndirectArgs = IndirectArgs . Num ( ) ;
FRDGBufferRef TmpInstanceIdOffsetBufferRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.TmpInstanceIdOffsetBuffer " ) , sizeof ( uint32 ) , NumIndirectArgs , nullptr , 0 ) ;
// TODO: This is both not right, and also over conservative when running with the atomic path
FCullingResult CullingResult ;
CullingResult . MaxNumInstancesPerPass = TotalInstances * 64u ;
FRDGBufferRef VisibleInstancesRdg = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.VisibleInstances " ) , sizeof ( FVisibleInstanceCmd ) , CullingResult . MaxNumInstancesPerPass , nullptr , 0 ) ;
FRDGBufferRef VisibleInstanceWriteOffsetRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.VisibleInstanceWriteOffset " ) , sizeof ( uint32 ) , 1 , nullptr , 0 ) ;
FRDGBufferRef OutputOffsetBufferRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.OutputOffsetBuffer " ) , sizeof ( uint32 ) , 1 , nullptr , 0 ) ;
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( VisibleInstanceWriteOffsetRDG ) , 0 ) ;
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( OutputOffsetBufferRDG ) , 0 ) ;
// Create buffer for indirect args and upload draw arg data, also clears the instance to zero
UE5_RELEASE: MGPU, numerous fixes to get EngineTest AFR, and Virtual Production City map to run with multiple GPUs. Ultimately there were 3 crash sources (RayTracing, Nanite, Distance Field streaming), each of which required a couple fixes, plus infrastructure to support those fixes...
There remain significant visual artifacts in the Virtual Production City map. Lumen has serious issues with multiple views in both single and multi-GPU modes -- I think Lumen data needs to be split per view family to solve this. There is some corrupt geometry in the second view, which may be Nanite or instance rendering related (or something else entirely). To narrow down these issues, I think I'm going to need to extend the DumpGPU feature to be able to do more effective MGPU graphical debugging, since none of PIX, RenderDoc, or NSight work. But at least it doesn't crash now...
Full list of changes:
* CVAR (DC.MultiGPUMode) to override multi-GPU mode for Display Cluster, debug feature copied over from 4.27.
* Barrier and synchronization fixes for RHITransferTextures copied over 4.27. Future work will make RDG handle multi-GPU transitions more seamlessly...
* CVAR (DC.ForceCrossGPUCopy) to force expensive full synchronization and copy of resources cross GPU at the end of each view family render (for debugging). RHITransferTextures upgraded to support copying things besides 2D textures, including other texture resources and buffers.
* AFR temporal fixes from a previous CL (which I moved from my single GPU to multi GPU PC), now improved to avoid some validation asserts in Debug builds (pass inputs not declared, GetParent()->GetRHI() not working because parent not declared to pass).
* Ray tracing (hang): acceleration buffers are branched per GPU, as GPU virtual addresses for resources internally referenced by these buffers may vary per GPU. Needed to add infrastructure to support buffers that duplicate memory per GPU, rather than using driver aliasing of the underlying resource.
* Ray tracing (hang): some buffer bindings weren't using a proper GPU index.
* Nanite (hang): Force initial clear of Nanite.MainAndPostNodesAndClusterBatchesBuffer to run on all GPUs. Solves GPU hang in shadow rendering the first frame (due to shadow rendering running across all GPUs), and later random hangs in view rendering.
* Distance field streaming (assert): GPU readback staging buffers need to be branched per GPU, as the underlying class is single device. GPU readback buffers and textures properly take into account the GPU they were last written on when locking and unlocking. Includes handling an edge case where a write can be queued when a lock is active, due to the deferred way commands are played back in the render graph.
* Distance field streaming (assert): UAV clear wasn't taking into account GPU index.
* GPU scene update needs to run across all GPUs.
* Fix for "DumpGPU" command to avoid assert with MGPU -- arbitrarily pick a GPU (last index) when the GPU mask contains multiple bits. Hope to improve this in the future, but it works.
#rnx
#rb mihnea.balta juan.canada tiago.costa kenzo.terelst
#jira none
#preflight 61ba7edbdc58e54b3318fdf5
#ROBOMERGE-AUTHOR: jason.hoerner
#ROBOMERGE-SOURCE: CL 18472819 in //UE5/Release-5.0/... via CL 18473380
#ROBOMERGE-BOT: STARSHIP (Release-Engine-Staging -> Release-Engine-Test) (v899-18417669)
[CL 18473412 by jason hoerner in ue5-release-engine-test branch]
2021-12-15 23:12:04 -05:00
FRDGBufferDesc IndirectArgsDesc = FRDGBufferDesc : : CreateIndirectDesc ( FInstanceCullingContext : : IndirectArgsNumWords * IndirectArgs . Num ( ) ) ;
IndirectArgsDesc . Usage | = BUF_MultiGPUGraphIgnore ;
CullingResult . DrawIndirectArgsRDG = GraphBuilder . CreateBuffer ( IndirectArgsDesc , TEXT ( " Shadow.Virtual.DrawIndirectArgsBuffer " ) ) ;
2021-12-03 16:38:33 -05:00
GraphBuilder . QueueBufferUpload ( CullingResult . DrawIndirectArgsRDG , IndirectArgs . GetData ( ) , IndirectArgs . GetTypeSize ( ) * IndirectArgs . Num ( ) ) ;
FGlobalShaderMap * ShaderMap = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) ;
// Note: we redundantly clear the instance counts here as there is some issue with replays on certain consoles.
FInstanceCullingContext : : AddClearIndirectArgInstanceCountPass ( GraphBuilder , ShaderMap , CullingResult . DrawIndirectArgsRDG ) ;
// not using structured buffer as we have to get at it as a vertex buffer
CullingResult . InstanceIdOffsetBufferRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateBufferDesc ( sizeof ( uint32 ) , InstanceIdOffsets . Num ( ) ) , TEXT ( " Shadow.Virtual.InstanceIdOffsetBuffer " ) ) ;
{
FCullPerPageDrawCommandsCs : : FParameters * PassParameters = GraphBuilder . AllocParameters < FCullPerPageDrawCommandsCs : : FParameters > ( ) ;
2022-01-18 13:05:54 -05:00
PassParameters - > VirtualShadowMap = VirtualShadowMapArray - > GetUniformBuffer ( GraphBuilder ) ;
2021-12-03 16:38:33 -05:00
2022-05-03 12:08:20 -04:00
const FGPUSceneResourceParameters GPUSceneParameters = GPUScene . GetShaderParameters ( ) ;
PassParameters - > GPUSceneInstanceSceneData = GPUSceneParameters . GPUSceneInstanceSceneData ;
PassParameters - > GPUSceneInstancePayloadData = GPUSceneParameters . GPUSceneInstancePayloadData ;
PassParameters - > GPUScenePrimitiveSceneData = GPUSceneParameters . GPUScenePrimitiveSceneData ;
PassParameters - > GPUSceneFrameNumber = GPUSceneParameters . GPUSceneFrameNumber ;
PassParameters - > InstanceSceneDataSOAStride = GPUSceneParameters . InstanceDataSOAStride ;
2021-12-03 16:38:33 -05:00
2022-01-18 08:51:35 -05:00
// Make sure there is enough space in the buffer for all the primitive IDs that might be used to index.
check ( PrimitiveRevealedMaskRdg - > Desc . NumElements * 32u > = uint32 ( PrimitiveRevealedNum ) ) ;
PassParameters - > PrimitiveRevealedMask = GraphBuilder . CreateSRV ( PrimitiveRevealedMaskRdg ) ;
PassParameters - > PrimitiveRevealedNum = uint32 ( PrimitiveRevealedNum ) ;
2021-12-03 16:38:33 -05:00
PassParameters - > DynamicInstanceIdOffset = BatchInfos [ 0 ] . DynamicInstanceIdOffset ;
PassParameters - > DynamicInstanceIdMax = BatchInfos [ 0 ] . DynamicInstanceIdMax ;
2022-01-20 04:59:27 -05:00
PassParameters - > MaxMaterialPositionInvalidationRange = CVarMaxMaterialPositionInvalidationRange . GetValueOnRenderThread ( ) ;
2021-12-03 16:38:33 -05:00
auto GPUData = LoadBalancer - > Upload ( GraphBuilder ) ;
GPUData . GetShaderParameters ( GraphBuilder , PassParameters - > LoadBalancerParameters ) ;
PassParameters - > FirstPrimaryView = VSMCullingBatchInfos [ 0 ] . FirstPrimaryView ;
PassParameters - > NumPrimaryViews = VSMCullingBatchInfos [ 0 ] . NumPrimaryViews ;
2022-01-20 04:59:27 -05:00
PassParameters - > CullingViewOriginOffset = VSMCullingBatchInfos [ 0 ] . CullingViewOriginOffset ;
PassParameters - > CullingViewOriginTile = VSMCullingBatchInfos [ 0 ] . CullingViewOriginTile ;
2021-12-03 16:38:33 -05:00
PassParameters - > TotalPrimaryViews = TotalPrimaryViews ;
PassParameters - > VisibleInstancesBufferNum = CullingResult . MaxNumInstancesPerPass ;
PassParameters - > InViews = GraphBuilder . CreateSRV ( VirtualShadowViewsRDG ) ;
PassParameters - > DrawCommandDescs = GraphBuilder . CreateSRV ( CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.DrawCommandDescs " ) , DrawCommandDescs ) ) ;
const bool bUseBatchMode = ! BatchInds . IsEmpty ( ) ;
if ( bUseBatchMode )
{
PassParameters - > BatchInfos = GraphBuilder . CreateSRV ( CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.BatchInfos " ) , BatchInfos ) ) ;
PassParameters - > VSMCullingBatchInfos = GraphBuilder . CreateSRV ( CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.VSMCullingBatchInfos " ) , VSMCullingBatchInfos ) ) ;
PassParameters - > BatchInds = GraphBuilder . CreateSRV ( CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.BatchInds " ) , BatchInds ) ) ;
}
PassParameters - > DrawIndirectArgsBufferOut = GraphBuilder . CreateUAV ( CullingResult . DrawIndirectArgsRDG , PF_R32_UINT ) ;
PassParameters - > VisibleInstancesOut = GraphBuilder . CreateUAV ( VisibleInstancesRdg ) ;
PassParameters - > VisibleInstanceCountBufferOut = GraphBuilder . CreateUAV ( VisibleInstanceWriteOffsetRDG ) ;
PassParameters - > OutInvalidatingInstances = GraphBuilder . CreateUAV ( VirtualShadowMapArray - > InvalidatingInstancesRDG ) ;
PassParameters - > NumInvalidatingInstanceSlots = VirtualShadowMapArray - > NumInvalidatingInstanceSlots ;
PassParameters - > HZBShaderParameters = HZBShaderParameters ;
bool bGenerateStats = VirtualShadowMapArray - > StatsBufferRDG ! = nullptr ;
if ( bGenerateStats )
{
PassParameters - > OutStatsBuffer = GraphBuilder . CreateUAV ( VirtualShadowMapArray - > StatsBufferRDG ) ;
}
FCullPerPageDrawCommandsCs : : FPermutationDomain PermutationVector ;
PermutationVector . Set < FCullPerPageDrawCommandsCs : : FNearClipDim > ( bUseNearClip ) ;
PermutationVector . Set < FCullPerPageDrawCommandsCs : : FBatchedDim > ( bUseBatchMode ) ;
PermutationVector . Set < FCullPerPageDrawCommandsCs : : FUseHzbDim > ( HZBShaderParameters . HZBTexture ! = nullptr ) ;
PermutationVector . Set < FCullPerPageDrawCommandsCs : : FGenerateStatsDim > ( bGenerateStats ) ;
auto ComputeShader = ShaderMap - > GetShader < FCullPerPageDrawCommandsCs > ( PermutationVector ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " CullPerPageDrawCommands " ) ,
ComputeShader ,
PassParameters ,
LoadBalancer - > GetWrappedCsGroupCount ( )
) ;
}
// 2.2.Allocate space for the final instance ID output and so on.
if ( true )
{
FAllocateCommandInstanceOutputSpaceCs : : FParameters * PassParameters = GraphBuilder . AllocParameters < FAllocateCommandInstanceOutputSpaceCs : : FParameters > ( ) ;
FRDGBufferRef InstanceIdOutOffsetBufferRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " InstanceCulling.OutputOffsetBufferOut " ) , sizeof ( uint32 ) , 1 , nullptr , 0 ) ;
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( InstanceIdOutOffsetBufferRDG ) , 0 ) ;
PassParameters - > NumIndirectArgs = NumIndirectArgs ;
PassParameters - > InstanceIdOffsetBufferOut = GraphBuilder . CreateUAV ( CullingResult . InstanceIdOffsetBufferRDG , PF_R32_UINT ) ; ;
PassParameters - > OutputOffsetBufferOut = GraphBuilder . CreateUAV ( InstanceIdOutOffsetBufferRDG ) ;
PassParameters - > TmpInstanceIdOffsetBufferOut = GraphBuilder . CreateUAV ( TmpInstanceIdOffsetBufferRDG ) ;
PassParameters - > DrawIndirectArgsBuffer = GraphBuilder . CreateSRV ( CullingResult . DrawIndirectArgsRDG , PF_R32_UINT ) ;
auto ComputeShader = ShaderMap - > GetShader < FAllocateCommandInstanceOutputSpaceCs > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " AllocateCommandInstanceOutputSpaceCs " ) ,
ComputeShader ,
PassParameters ,
FComputeShaderUtils : : GetGroupCount ( NumIndirectArgs , FAllocateCommandInstanceOutputSpaceCs : : NumThreadsPerGroup )
) ;
}
// 2.3. Perform final pass to re-shuffle the instance ID's to their final resting places
CullingResult . InstanceIdsBuffer = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , CullingResult . MaxNumInstancesPerPass ) , TEXT ( " Shadow.Virtual.InstanceIdsBuffer " ) ) ;
CullingResult . PageInfoBuffer = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( uint32 ) , CullingResult . MaxNumInstancesPerPass ) , TEXT ( " Shadow.Virtual.PageInfoBuffer " ) ) ;
FRDGBufferRef OutputPassIndirectArgs = FComputeShaderUtils : : AddIndirectArgsSetupCsPass1D ( GraphBuilder , VisibleInstanceWriteOffsetRDG , TEXT ( " Shadow.Virtual.IndirectArgs " ) , FOutputCommandInstanceListsCs : : NumThreadsPerGroup ) ;
if ( true )
{
FOutputCommandInstanceListsCs : : FParameters * PassParameters = GraphBuilder . AllocParameters < FOutputCommandInstanceListsCs : : FParameters > ( ) ;
PassParameters - > VisibleInstances = GraphBuilder . CreateSRV ( VisibleInstancesRdg ) ;
PassParameters - > PageInfoBufferOut = GraphBuilder . CreateUAV ( CullingResult . PageInfoBuffer ) ;
PassParameters - > InstanceIdsBufferOut = GraphBuilder . CreateUAV ( CullingResult . InstanceIdsBuffer ) ;
PassParameters - > TmpInstanceIdOffsetBufferOut = GraphBuilder . CreateUAV ( TmpInstanceIdOffsetBufferRDG ) ;
PassParameters - > VisibleInstanceCountBuffer = GraphBuilder . CreateSRV ( VisibleInstanceWriteOffsetRDG ) ;
PassParameters - > IndirectArgs = OutputPassIndirectArgs ;
auto ComputeShader = ShaderMap - > GetShader < FOutputCommandInstanceListsCs > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " OutputCommandInstanceListsCs " ) ,
ComputeShader ,
PassParameters ,
OutputPassIndirectArgs ,
0
) ;
}
return CullingResult ;
}
static void AddRasterPass (
FRDGBuilder & GraphBuilder ,
FRDGEventName & & PassName ,
const FViewInfo * ShadowDepthView ,
const TRDGUniformBufferRef < FShadowDepthPassUniformParameters > & ShadowDepthPassUniformBuffer ,
FVirtualShadowMapArray * VirtualShadowMapArray ,
FRDGBufferRef VirtualShadowViewsRDG ,
const FCullingResult & CullingResult ,
FParallelMeshDrawCommandPass & MeshCommandPass ,
FVirtualShadowDepthPassParameters * PassParameters ,
TRDGUniformBufferRef < FInstanceCullingGlobalUniforms > InstanceCullingUniformBuffer )
{
PassParameters - > View = ShadowDepthView - > ViewUniformBuffer ;
PassParameters - > ShadowDepthPass = ShadowDepthPassUniformBuffer ;
PassParameters - > VirtualShadowMap = VirtualShadowMapArray - > GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > InViews = GraphBuilder . CreateSRV ( VirtualShadowViewsRDG ) ;
PassParameters - > InstanceCullingDrawParams . DrawIndirectArgsBuffer = CullingResult . DrawIndirectArgsRDG ;
PassParameters - > InstanceCullingDrawParams . InstanceIdOffsetBuffer = CullingResult . InstanceIdOffsetBufferRDG ;
PassParameters - > InstanceCullingDrawParams . InstanceCulling = InstanceCullingUniformBuffer ;
FIntRect ViewRect ;
ViewRect . Max = FVirtualShadowMap : : VirtualMaxResolutionXY ;
GraphBuilder . AddPass (
MoveTemp ( PassName ) ,
PassParameters ,
ERDGPassFlags : : Raster | ERDGPassFlags : : SkipRenderPass ,
[ & MeshCommandPass , PassParameters , ViewRect ] ( FRHICommandList & RHICmdList )
{
FRHIRenderPassInfo RPInfo ;
2022-05-18 12:29:26 -04:00
RPInfo . ResolveRect = FResolveRect ( ViewRect ) ;
2021-12-03 16:38:33 -05:00
RHICmdList . BeginRenderPass ( RPInfo , TEXT ( " RasterizeVirtualShadowMaps(Non-Nanite) " ) ) ;
RHICmdList . SetViewport ( ViewRect . Min . X , ViewRect . Min . Y , 0.0f , FMath : : Min ( ViewRect . Max . X , 32767 ) , FMath : : Min ( ViewRect . Max . Y , 32767 ) , 1.0f ) ;
MeshCommandPass . DispatchDraw ( nullptr , RHICmdList , & PassParameters - > InstanceCullingDrawParams ) ;
RHICmdList . EndRenderPass ( ) ;
} ) ;
}
2022-01-20 04:59:27 -05:00
void FVirtualShadowMapArray : : RenderVirtualShadowMapsNonNanite ( FRDGBuilder & GraphBuilder , const TArray < FProjectedShadowInfo * , SceneRenderingAllocator > & VirtualSmMeshCommandPasses , FScene & Scene , TArrayView < FViewInfo > Views )
2021-01-20 08:46:15 -04:00
{
2021-02-17 15:32:52 -04:00
if ( VirtualSmMeshCommandPasses . Num ( ) = = 0 )
{
return ;
}
2021-06-15 12:47:24 -04:00
RDG_EVENT_SCOPE ( GraphBuilder , " RenderVirtualShadowMaps(Non-Nanite) " ) ;
2021-02-22 14:47:36 -04:00
2021-01-20 08:46:15 -04:00
FGPUScene & GPUScene = Scene . GPUScene ;
2021-12-02 18:25:13 -05:00
FRDGBufferSRVRef PrevPageTableRDGSRV = CacheManager - > IsValid ( ) ? GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PageTable , TEXT ( " Shadow.Virtual.PrevPageTable " ) ) ) : nullptr ;
2022-03-15 10:05:21 -04:00
FRDGBufferSRVRef PrevPageFlagsRDGSRV = CacheManager - > IsValid ( ) ? GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PageFlags , TEXT ( " Shadow.Virtual.PrevPageFlags " ) ) ) : nullptr ;
2022-03-03 06:41:20 -05:00
FRDGBufferSRVRef PrevPageRectBoundsRDGSRV = CacheManager - > IsValid ( ) ? GraphBuilder . CreateSRV ( GraphBuilder . RegisterExternalBuffer ( CacheManager - > PrevBuffers . PageRectBounds , TEXT ( " Shadow.Virtual.PrevPageRectBounds " ) ) ) : nullptr ;
2021-12-02 18:25:13 -05:00
int32 HZBMode = CVarNonNaniteVsmUseHzb . GetValueOnRenderThread ( ) ;
auto InitHZB = [ & ] ( ) - > FRDGTextureRef
{
if ( HZBMode = = 1 & & CacheManager - > IsValid ( ) )
{
return GraphBuilder . RegisterExternalTexture ( CacheManager - > PrevBuffers . HZBPhysical ) ;
}
if ( HZBMode = = 2 & & HZBPhysical ! = nullptr )
{
return HZBPhysical ;
}
return nullptr ;
} ;
const FRDGTextureRef HZBTexture = InitHZB ( ) ;
2021-12-03 16:38:33 -05:00
TArray < FVSMCullingBatchInfo , SceneRenderingAllocator > UnBatchedVSMCullingBatchInfo ;
TArray < FProjectedShadowInfo * , SceneRenderingAllocator > BatchedVirtualSmMeshCommandPasses ;
TArray < FProjectedShadowInfo * , SceneRenderingAllocator > UnBatchedVirtualSmMeshCommandPasses ;
UnBatchedVSMCullingBatchInfo . Reserve ( VirtualSmMeshCommandPasses . Num ( ) ) ;
BatchedVirtualSmMeshCommandPasses . Reserve ( VirtualSmMeshCommandPasses . Num ( ) ) ;
UnBatchedVirtualSmMeshCommandPasses . Reserve ( VirtualSmMeshCommandPasses . Num ( ) ) ;
TArray < Nanite : : FPackedView , SceneRenderingAllocator > VirtualShadowViews ;
2021-01-20 08:46:15 -04:00
2021-12-03 16:38:33 -05:00
TArray < FVSMCullingBatchInfo , SceneRenderingAllocator > VSMCullingBatchInfos ;
VSMCullingBatchInfos . Reserve ( VirtualSmMeshCommandPasses . Num ( ) ) ;
2021-07-28 17:00:37 -04:00
2021-12-03 16:38:33 -05:00
TArray < FVirtualShadowDepthPassParameters * , SceneRenderingAllocator > BatchedPassParameters ;
BatchedPassParameters . Reserve ( VirtualSmMeshCommandPasses . Num ( ) ) ;
2022-01-20 04:59:27 -05:00
/**
* Use the ' dependent view ' i . e . , the view used to set up a view dependent CSM / VSM ( clipmap ) OR select the view closest to the local light .
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
* This last is important to get some kind of reasonable behavior for split screen .
2022-01-20 04:59:27 -05:00
*/
auto GetCullingViewOrigin = [ & Views ] ( const FProjectedShadowInfo * ProjectedShadowInfo ) - > FLargeWorldRenderPosition
{
if ( ProjectedShadowInfo - > DependentView ! = nullptr )
{
2022-01-27 03:30:41 -05:00
return FLargeWorldRenderPosition ( ProjectedShadowInfo - > DependentView - > ShadowViewMatrices . GetViewOrigin ( ) ) ;
2022-01-20 04:59:27 -05:00
}
// VSM supports only whole scene shadows, so those without a "DependentView" are local lights
// For local lights the origin is the (inverse of) pre-shadow translation.
check ( ProjectedShadowInfo - > bWholeSceneShadow ) ;
FVector MinOrigin = Views [ 0 ] . ShadowViewMatrices . GetViewOrigin ( ) ;
double MinDistanceSq = ( MinOrigin + ProjectedShadowInfo - > PreShadowTranslation ) . SquaredLength ( ) ;
for ( int Index = 1 ; Index < Views . Num ( ) ; + + Index )
{
FVector TestOrigin = Views [ Index ] . ShadowViewMatrices . GetViewOrigin ( ) ;
double TestDistanceSq = ( TestOrigin + ProjectedShadowInfo - > PreShadowTranslation ) . SquaredLength ( ) ;
if ( TestDistanceSq < MinDistanceSq )
{
MinOrigin = TestOrigin ;
MinDistanceSq = TestDistanceSq ;
}
}
2022-01-27 03:30:41 -05:00
return FLargeWorldRenderPosition ( MinOrigin ) ;
2022-01-20 04:59:27 -05:00
} ;
2021-12-03 16:38:33 -05:00
FInstanceCullingMergedContext InstanceCullingMergedContext ( GMaxRHIFeatureLevel ) ;
// We don't use the registered culling views (this redundancy should probably be addressed at some point), set the number to disable index range checking
InstanceCullingMergedContext . NumCullingViews = - 1 ;
for ( int32 Index = 0 ; Index < VirtualSmMeshCommandPasses . Num ( ) ; + + Index )
2021-01-20 08:46:15 -04:00
{
FProjectedShadowInfo * ProjectedShadowInfo = VirtualSmMeshCommandPasses [ Index ] ;
2021-02-19 06:38:56 -04:00
ProjectedShadowInfo - > BeginRenderView ( GraphBuilder , & Scene ) ;
2021-12-03 16:38:33 -05:00
FVSMCullingBatchInfo VSMCullingBatchInfo ;
VSMCullingBatchInfo . FirstPrimaryView = uint32 ( VirtualShadowViews . Num ( ) ) ;
VSMCullingBatchInfo . NumPrimaryViews = 0U ;
2022-01-20 04:59:27 -05:00
{
const FLargeWorldRenderPosition CullingViewOrigin = GetCullingViewOrigin ( ProjectedShadowInfo ) ;
VSMCullingBatchInfo . CullingViewOriginOffset = CullingViewOrigin . GetOffset ( ) ;
VSMCullingBatchInfo . CullingViewOriginTile = CullingViewOrigin . GetTile ( ) ;
}
2021-12-03 16:38:33 -05:00
const TSharedPtr < FVirtualShadowMapClipmap > Clipmap = ProjectedShadowInfo - > VirtualShadowMapClipmap ;
if ( Clipmap )
2021-01-20 08:46:15 -04:00
{
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
VSMCullingBatchInfo . NumPrimaryViews = AddRenderViews ( Clipmap , 1.0f , HZBTexture ! = nullptr , false , ProjectedShadowInfo - > ShouldClampToNearPlane ( ) , VirtualShadowViews ) ;
2021-12-03 16:38:33 -05:00
UnBatchedVSMCullingBatchInfo . Add ( VSMCullingBatchInfo ) ;
UnBatchedVirtualSmMeshCommandPasses . Add ( ProjectedShadowInfo ) ;
2021-01-20 08:46:15 -04:00
}
2021-02-05 16:37:17 -04:00
else if ( ProjectedShadowInfo - > HasVirtualShadowMap ( ) )
{
2021-12-03 16:38:33 -05:00
FParallelMeshDrawCommandPass & MeshCommandPass = ProjectedShadowInfo - > GetShadowDepthPass ( ) ;
MeshCommandPass . WaitForSetupTask ( ) ;
2021-02-05 16:37:17 -04:00
2021-12-03 16:38:33 -05:00
FInstanceCullingContext * InstanceCullingContext = MeshCommandPass . GetInstanceCullingContext ( ) ;
if ( InstanceCullingContext - > HasCullingCommands ( ) )
2021-02-05 16:37:17 -04:00
{
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
VSMCullingBatchInfo . NumPrimaryViews = AddRenderViews ( ProjectedShadowInfo , 1.0f , HZBTexture ! = nullptr , false , ProjectedShadowInfo - > ShouldClampToNearPlane ( ) , VirtualShadowViews ) ;
2021-07-28 17:00:37 -04:00
2021-12-03 16:38:33 -05:00
if ( CVarDoNonNaniteBatching . GetValueOnRenderThread ( ) )
2021-12-02 18:25:13 -05:00
{
2021-12-03 16:38:33 -05:00
FViewInfo * ShadowDepthView = ProjectedShadowInfo - > ShadowDepthView ;
uint32 DynamicInstanceIdOffset = ShadowDepthView - > DynamicPrimitiveCollector . GetInstanceSceneDataOffset ( ) ;
uint32 DynamicInstanceIdMax = DynamicInstanceIdOffset + ShadowDepthView - > DynamicPrimitiveCollector . NumInstances ( ) ;
VSMCullingBatchInfos . Add ( VSMCullingBatchInfo ) ;
// Note: we have to allocate these up front as the context merging machinery writes the offsets directly to the &PassParameters->InstanceCullingDrawParams,
// this is a side-effect from sharing the code with the deferred culling. Should probably be refactored.
FVirtualShadowDepthPassParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualShadowDepthPassParameters > ( ) ;
InstanceCullingMergedContext . AddBatch ( GraphBuilder , InstanceCullingContext , DynamicInstanceIdOffset , ShadowDepthView - > DynamicPrimitiveCollector . NumInstances ( ) , & PassParameters - > InstanceCullingDrawParams ) ;
BatchedVirtualSmMeshCommandPasses . Add ( ProjectedShadowInfo ) ;
BatchedPassParameters . Add ( PassParameters ) ;
2021-12-02 18:25:13 -05:00
}
2021-12-03 16:38:33 -05:00
else
2021-07-28 17:00:37 -04:00
{
2021-12-03 16:38:33 -05:00
UnBatchedVSMCullingBatchInfo . Add ( VSMCullingBatchInfo ) ;
UnBatchedVirtualSmMeshCommandPasses . Add ( ProjectedShadowInfo ) ;
2021-07-28 17:00:37 -04:00
}
2021-02-05 16:37:17 -04:00
}
}
2021-12-03 16:38:33 -05:00
}
uint32 TotalPrimaryViews = uint32 ( VirtualShadowViews . Num ( ) ) ;
CreateMipViews ( VirtualShadowViews ) ;
FRDGBufferRef VirtualShadowViewsRDG = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.VirtualShadowViews " ) , VirtualShadowViews ) ;
2021-02-05 16:37:17 -04:00
2021-12-03 16:38:33 -05:00
// Helper function to create raster pass UB - only really need two of these ever
UE5_MAIN: Multi-view-family scene renderer refactor, part 2. Move FSceneTextures singleton out of RDG blackboard and FSceneTexturesConfig global variable singleton, into FViewFamilyInfo. This is necessary to allow multiple view families to render in a single render graph and a single scene renderer call.
* Existing calls to CreateSceneTextureShaderParameters and similar functions use "GetSceneTexturesChecked", which allows for the possibility that they are reached in a code path where scene textures haven't been initialized, and nullptr is returned instead of asserting. The shader parameter setup functions then fill in dummy defaults for that case. The goal was to precisely match the original behavior, which queried the RDG blackboard, and gracefully handled null if scene textures weren't there. This definitely appears to occur in FNiagaraGpuComputeDispatch::ProcessPendingTicksFlush, which can be called with a dummy scene with no scene textures. In the future, I may change this so dummy defaults are filled in for FSceneTextures at construction time, so the structure is never in an uninitialized state, but I would like to set up a test case for the Niagara code path before doing that, and the checks aren't harmful in the meantime.
* I marked as deprecated global functions which query values from FSceneTexturesConfig, but they'll still work with the caveat that if you use multi-view-family rendering, the results will be indeterminate (whatever view family rendered last). There was only one case outside the scene renderer that accessed the globals (depth clear value), which I removed, noting that there is nowhere in the code where we modify the depth clear value from its global default. I would like to permanently deprecate or remove these at some point. Display Cluster is the only code that's currently using the multi-view-family code path, and as a new (still incomplete) feature, third party code can't be using it, and won't be affected.
#jira NONE
#rb chris.kulla zach.bethel mihnea.balta
#preflight 6261aca76119a1a496bd2644
[CL 19873983 by jason hoerner in ue5-main branch]
2022-04-22 17:33:02 -04:00
const FSceneTextures * SceneTextures = & GetViewFamily ( Views ) . GetSceneTextures ( ) ;
auto CreateShadowDepthPassUniformBuffer = [ this , & VirtualShadowViewsRDG , & GraphBuilder , SceneTextures ] ( bool bClampToNearPlane )
2021-12-03 16:38:33 -05:00
{
FShadowDepthPassUniformParameters * ShadowDepthPassParameters = GraphBuilder . AllocParameters < FShadowDepthPassUniformParameters > ( ) ;
check ( PhysicalPagePoolRDG ! = nullptr ) ;
// TODO: These are not used for this case anyway
2022-01-27 07:20:20 -05:00
ShadowDepthPassParameters - > ProjectionMatrix = FMatrix44f : : Identity ;
ShadowDepthPassParameters - > ViewMatrix = FMatrix44f : : Identity ;
2021-12-03 16:38:33 -05:00
ShadowDepthPassParameters - > ShadowParams = FVector4f ( 0.0f , 0.0f , 0.0f , 1.0f ) ;
ShadowDepthPassParameters - > bRenderToVirtualShadowMap = true ;
ShadowDepthPassParameters - > VirtualSmPageTable = GraphBuilder . CreateSRV ( PageTableRDG ) ;
ShadowDepthPassParameters - > PackedNaniteViews = GraphBuilder . CreateSRV ( VirtualShadowViewsRDG ) ;
ShadowDepthPassParameters - > PageRectBounds = GraphBuilder . CreateSRV ( PageRectBoundsRDG ) ;
2022-04-07 18:36:13 -04:00
ShadowDepthPassParameters - > OutDepthBufferArray = GraphBuilder . CreateUAV ( PhysicalPagePoolRDG , ERDGUnorderedAccessViewFlags : : SkipBarrier ) ;
UE5_MAIN: Multi-view-family scene renderer refactor, part 2. Move FSceneTextures singleton out of RDG blackboard and FSceneTexturesConfig global variable singleton, into FViewFamilyInfo. This is necessary to allow multiple view families to render in a single render graph and a single scene renderer call.
* Existing calls to CreateSceneTextureShaderParameters and similar functions use "GetSceneTexturesChecked", which allows for the possibility that they are reached in a code path where scene textures haven't been initialized, and nullptr is returned instead of asserting. The shader parameter setup functions then fill in dummy defaults for that case. The goal was to precisely match the original behavior, which queried the RDG blackboard, and gracefully handled null if scene textures weren't there. This definitely appears to occur in FNiagaraGpuComputeDispatch::ProcessPendingTicksFlush, which can be called with a dummy scene with no scene textures. In the future, I may change this so dummy defaults are filled in for FSceneTextures at construction time, so the structure is never in an uninitialized state, but I would like to set up a test case for the Niagara code path before doing that, and the checks aren't harmful in the meantime.
* I marked as deprecated global functions which query values from FSceneTexturesConfig, but they'll still work with the caveat that if you use multi-view-family rendering, the results will be indeterminate (whatever view family rendered last). There was only one case outside the scene renderer that accessed the globals (depth clear value), which I removed, noting that there is nowhere in the code where we modify the depth clear value from its global default. I would like to permanently deprecate or remove these at some point. Display Cluster is the only code that's currently using the multi-view-family code path, and as a new (still incomplete) feature, third party code can't be using it, and won't be affected.
#jira NONE
#rb chris.kulla zach.bethel mihnea.balta
#preflight 6261aca76119a1a496bd2644
[CL 19873983 by jason hoerner in ue5-main branch]
2022-04-22 17:33:02 -04:00
SetupSceneTextureUniformParameters ( GraphBuilder , SceneTextures , GMaxRHIFeatureLevel , ESceneTextureSetupMode : : None , ShadowDepthPassParameters - > SceneTextures ) ;
2021-12-03 16:38:33 -05:00
ShadowDepthPassParameters - > bClampToNearPlane = bClampToNearPlane ;
return GraphBuilder . CreateUniformBuffer ( ShadowDepthPassParameters ) ;
} ;
FCullPerPageDrawCommandsCs : : FHZBShaderParameters HZBShaderParameters ;
if ( HZBTexture )
{
// Mode 2 uses the current frame HZB & page table.
2022-03-03 06:41:20 -05:00
HZBShaderParameters . HZBPageTable = HZBMode = = 2 ? GraphBuilder . CreateSRV ( PageTableRDG ) : PrevPageTableRDGSRV ;
2022-03-15 10:05:21 -04:00
HZBShaderParameters . HZBPageFlags = HZBMode = = 2 ? GraphBuilder . CreateSRV ( PageFlagsRDG ) : PrevPageFlagsRDGSRV ;
2022-03-03 06:41:20 -05:00
HZBShaderParameters . HZBPageRectBounds = HZBMode = = 2 ? GraphBuilder . CreateSRV ( PageRectBoundsRDG ) : PrevPageRectBoundsRDGSRV ;
2021-12-03 16:38:33 -05:00
HZBShaderParameters . HZBTexture = HZBTexture ;
HZBShaderParameters . HZBSize = HZBTexture - > Desc . Extent ;
HZBShaderParameters . HZBSampler = TStaticSamplerState < SF_Point , AM_Clamp , AM_Clamp , AM_Clamp > : : GetRHI ( ) ;
HZBShaderParameters . HZBMode = HZBMode ;
}
// Process batched passes
if ( ! InstanceCullingMergedContext . Batches . IsEmpty ( ) )
{
RDG_EVENT_SCOPE ( GraphBuilder , " Batched " ) ;
InstanceCullingMergedContext . MergeBatches ( ) ;
GraphBuilder . BeginEventScope ( RDG_EVENT_NAME ( " CullingPasses " ) ) ;
FCullingResult CullingResult = AddCullingPasses (
GraphBuilder ,
InstanceCullingMergedContext . IndirectArgs ,
InstanceCullingMergedContext . DrawCommandDescs ,
InstanceCullingMergedContext . InstanceIdOffsets ,
& InstanceCullingMergedContext . LoadBalancers [ uint32 ( EBatchProcessingMode : : Generic ) ] ,
InstanceCullingMergedContext . BatchInfos ,
VSMCullingBatchInfos ,
InstanceCullingMergedContext . BatchInds [ uint32 ( EBatchProcessingMode : : Generic ) ] ,
true ,
InstanceCullingMergedContext . TotalInstances ,
TotalPrimaryViews ,
VirtualShadowViewsRDG ,
HZBShaderParameters ,
this ,
2022-01-18 08:51:35 -05:00
GPUScene ,
GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , 4 ) ,
0
2021-12-03 16:38:33 -05:00
) ;
GraphBuilder . EndEventScope ( ) ;
TRDGUniformBufferRef < FShadowDepthPassUniformParameters > ShadowDepthPassUniformBuffer = CreateShadowDepthPassUniformBuffer ( false ) ;
FInstanceCullingGlobalUniforms * InstanceCullingGlobalUniforms = GraphBuilder . AllocParameters < FInstanceCullingGlobalUniforms > ( ) ;
InstanceCullingGlobalUniforms - > InstanceIdsBuffer = GraphBuilder . CreateSRV ( CullingResult . InstanceIdsBuffer ) ;
InstanceCullingGlobalUniforms - > PageInfoBuffer = GraphBuilder . CreateSRV ( CullingResult . PageInfoBuffer ) ;
InstanceCullingGlobalUniforms - > BufferCapacity = CullingResult . MaxNumInstancesPerPass ;
TRDGUniformBufferRef < FInstanceCullingGlobalUniforms > InstanceCullingUniformBuffer = GraphBuilder . CreateUniformBuffer ( InstanceCullingGlobalUniforms ) ;
{
RDG_EVENT_SCOPE ( GraphBuilder , " RasterPasses " ) ;
for ( int Index = 0 ; Index < BatchedVirtualSmMeshCommandPasses . Num ( ) ; + + Index )
{
FProjectedShadowInfo * ProjectedShadowInfo = BatchedVirtualSmMeshCommandPasses [ Index ] ;
FParallelMeshDrawCommandPass & MeshCommandPass = ProjectedShadowInfo - > GetShadowDepthPass ( ) ;
FViewInfo * ShadowDepthView = ProjectedShadowInfo - > ShadowDepthView ;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
// Local lights are assumed to not use the clamp to near-plane (this is used for some per-object SMs but these should never be used for VSM).
2021-12-03 16:38:33 -05:00
check ( ! ProjectedShadowInfo - > ShouldClampToNearPlane ( ) ) ;
FString LightNameWithLevel ;
FSceneRenderer : : GetLightNameForDrawEvent ( ProjectedShadowInfo - > GetLightSceneInfo ( ) . Proxy , LightNameWithLevel ) ;
AddRasterPass ( GraphBuilder , RDG_EVENT_NAME ( " Rasterize[%s] " , * LightNameWithLevel ) , ShadowDepthView , ShadowDepthPassUniformBuffer , this , VirtualShadowViewsRDG , CullingResult , MeshCommandPass , BatchedPassParameters [ Index ] , InstanceCullingUniformBuffer ) ;
}
}
}
// Loop over the un batched mesh command passes needed, these are all the clipmaps (but we may change the criteria)
for ( int Index = 0 ; Index < UnBatchedVirtualSmMeshCommandPasses . Num ( ) ; + + Index )
{
const auto VSMCullingBatchInfo = UnBatchedVSMCullingBatchInfo [ Index ] ;
FProjectedShadowInfo * ProjectedShadowInfo = UnBatchedVirtualSmMeshCommandPasses [ Index ] ;
FInstanceCullingMergedContext : : FContextBatchInfo CullingBatchInfo = FInstanceCullingMergedContext : : FContextBatchInfo { 0 } ;
FParallelMeshDrawCommandPass & MeshCommandPass = ProjectedShadowInfo - > GetShadowDepthPass ( ) ;
const TSharedPtr < FVirtualShadowMapClipmap > Clipmap = ProjectedShadowInfo - > VirtualShadowMapClipmap ;
FViewInfo * ShadowDepthView = ProjectedShadowInfo - > ShadowDepthView ;
2021-02-05 16:37:17 -04:00
2021-06-21 16:52:03 -04:00
MeshCommandPass . WaitForSetupTask ( ) ;
2021-01-20 08:46:15 -04:00
2021-06-21 16:52:03 -04:00
FInstanceCullingContext * InstanceCullingContext = MeshCommandPass . GetInstanceCullingContext ( ) ;
if ( InstanceCullingContext - > HasCullingCommands ( ) )
2021-01-20 08:46:15 -04:00
{
2021-04-13 08:27:58 -04:00
FString LightNameWithLevel ;
FSceneRenderer : : GetLightNameForDrawEvent ( ProjectedShadowInfo - > GetLightSceneInfo ( ) . Proxy , LightNameWithLevel ) ;
RDG_EVENT_SCOPE ( GraphBuilder , " %s " , * LightNameWithLevel ) ;
2021-12-03 16:38:33 -05:00
CullingBatchInfo . DynamicInstanceIdOffset = ShadowDepthView - > DynamicPrimitiveCollector . GetInstanceSceneDataOffset ( ) ;
CullingBatchInfo . DynamicInstanceIdMax = CullingBatchInfo . DynamicInstanceIdOffset + ShadowDepthView - > DynamicPrimitiveCollector . NumInstances ( ) ;
2021-06-21 16:52:03 -04:00
2022-01-18 08:51:35 -05:00
FRDGBufferRef PrimitiveRevealedMaskRdg = GSystemTextures . GetDefaultStructuredBuffer ( GraphBuilder , 4 ) ;
int32 PrimitiveRevealedNum = 0 ;
if ( ! Clipmap - > GetRevealedPrimitivesMask ( ) . IsEmpty ( ) )
{
PrimitiveRevealedMaskRdg = CreateStructuredBuffer ( GraphBuilder , TEXT ( " Shadow.Virtual.RevealedPrimitivesMask " ) , Clipmap - > GetRevealedPrimitivesMask ( ) ) ;
PrimitiveRevealedNum = Clipmap - > GetNumRevealedPrimitives ( ) ;
}
2021-12-03 16:38:33 -05:00
FCullingResult CullingResult = AddCullingPasses (
GraphBuilder ,
InstanceCullingContext - > IndirectArgs ,
InstanceCullingContext - > DrawCommandDescs ,
InstanceCullingContext - > InstanceIdOffsets ,
InstanceCullingContext - > LoadBalancers [ uint32 ( EBatchProcessingMode : : Generic ) ] ,
MakeArrayView ( & CullingBatchInfo , 1 ) ,
MakeArrayView ( & VSMCullingBatchInfo , 1 ) ,
MakeArrayView < const uint32 > ( nullptr , 0 ) ,
! Clipmap . IsValid ( ) ,
InstanceCullingContext - > TotalInstances ,
TotalPrimaryViews ,
VirtualShadowViewsRDG ,
HZBShaderParameters ,
this ,
2022-01-18 08:51:35 -05:00
GPUScene ,
PrimitiveRevealedMaskRdg ,
PrimitiveRevealedNum
2021-12-03 16:38:33 -05:00
) ;
2021-06-21 16:52:03 -04:00
2021-12-03 16:38:33 -05:00
TRDGUniformBufferRef < FShadowDepthPassUniformParameters > ShadowDepthPassUniformBuffer = CreateShadowDepthPassUniformBuffer ( ProjectedShadowInfo - > ShouldClampToNearPlane ( ) ) ;
2021-06-03 02:19:28 -04:00
FInstanceCullingGlobalUniforms * InstanceCullingGlobalUniforms = GraphBuilder . AllocParameters < FInstanceCullingGlobalUniforms > ( ) ;
2021-12-03 16:38:33 -05:00
InstanceCullingGlobalUniforms - > InstanceIdsBuffer = GraphBuilder . CreateSRV ( CullingResult . InstanceIdsBuffer ) ;
InstanceCullingGlobalUniforms - > PageInfoBuffer = GraphBuilder . CreateSRV ( CullingResult . PageInfoBuffer ) ;
InstanceCullingGlobalUniforms - > BufferCapacity = CullingResult . MaxNumInstancesPerPass ;
TRDGUniformBufferRef < FInstanceCullingGlobalUniforms > InstanceCullingUniformBuffer = GraphBuilder . CreateUniformBuffer ( InstanceCullingGlobalUniforms ) ;
2021-06-03 02:19:28 -04:00
2021-12-03 16:38:33 -05:00
FVirtualShadowDepthPassParameters * DepthPassParams = GraphBuilder . AllocParameters < FVirtualShadowDepthPassParameters > ( ) ;
DepthPassParams - > InstanceCullingDrawParams . IndirectArgsByteOffset = 0 ;
DepthPassParams - > InstanceCullingDrawParams . InstanceDataByteOffset = 0 ;
AddRasterPass ( GraphBuilder , RDG_EVENT_NAME ( " Rasterize " ) , ShadowDepthView , ShadowDepthPassUniformBuffer , this , VirtualShadowViewsRDG , CullingResult , MeshCommandPass , DepthPassParams , InstanceCullingUniformBuffer ) ;
2021-01-20 08:46:15 -04:00
}
//
if ( Index = = CVarShowClipmapStats . GetValueOnRenderThread ( ) )
{
2021-12-03 16:38:33 -05:00
// The 'main' view the shadow was created with respect to
const FViewInfo * ViewUsedToCreateShadow = ProjectedShadowInfo - > DependentView ;
const FViewInfo & View = * ViewUsedToCreateShadow ;
2021-01-20 08:46:15 -04:00
FVirtualSmPrintClipmapStatsCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualSmPrintClipmapStatsCS : : FParameters > ( ) ;
ShaderPrint : : SetParameters ( GraphBuilder , View , PassParameters - > ShaderPrintStruct ) ;
2021-03-08 23:14:54 -04:00
//PassParameters->VirtualShadowMap = GetUniformBuffer(GraphBuilder);
2021-01-20 08:46:15 -04:00
PassParameters - > ShadowMapIdRangeStart = Clipmap - > GetVirtualShadowMap ( 0 ) - > ID ;
// Note: assumes range!
PassParameters - > ShadowMapIdRangeEnd = Clipmap - > GetVirtualShadowMap ( 0 ) - > ID + Clipmap - > GetLevelCount ( ) ;
PassParameters - > PageRectBounds = GraphBuilder . CreateSRV ( PageRectBoundsRDG ) ;
PassParameters - > AllocatedPageRectBounds = GraphBuilder . CreateSRV ( AllocatedPageRectBoundsRDG ) ;
auto ComputeShader = View . ShaderMap - > GetShader < FVirtualSmPrintClipmapStatsCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " PrintClipmapStats " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( 1 , 1 , 1 )
) ;
}
}
2021-02-17 15:32:52 -04:00
}
2021-12-03 01:31:24 -05:00
class FSelectPagesForHZBCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FSelectPagesForHZBCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FSelectPagesForHZBCS , FVirtualPageManagementShader )
2022-03-23 15:54:41 -04:00
class FGenerateStatsDim : SHADER_PERMUTATION_BOOL ( " VSM_GENERATE_STATS " ) ;
using FPermutationDomain = TShaderPermutationDomain < FGenerateStatsDim > ;
2021-12-03 01:31:24 -05:00
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < FPhysicalPageMetaData > , PhysicalPageMetaData )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWBuffer < uint > , OutPagesForHZBIndirectArgsBuffer )
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutPhysicalPagesForHZB )
2022-03-23 15:54:41 -04:00
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , DirtyPageFlags )
SHADER_PARAMETER ( uint32 , bFirstBuildThisFrame )
2022-04-14 19:28:00 -04:00
SHADER_PARAMETER ( uint32 , bForceFullHZBUpdate )
2022-03-23 15:54:41 -04:00
SHADER_PARAMETER_RDG_BUFFER_UAV ( RWStructuredBuffer < uint > , OutStatsBuffer )
2021-12-03 01:31:24 -05:00
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FSelectPagesForHZBCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " SelectPagesForHZBCS " , SF_Compute ) ;
2021-12-03 01:31:24 -05:00
class FVirtualSmBuildHZBPerPageCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FVirtualSmBuildHZBPerPageCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FVirtualSmBuildHZBPerPageCS , FVirtualPageManagementShader )
2022-03-23 15:54:41 -04:00
static constexpr uint32 TotalHZBLevels = FVirtualShadowMap : : NumHZBLevels ;
2021-12-03 01:31:24 -05:00
static constexpr uint32 HZBLevelsBase = TotalHZBLevels - 2U ;
static_assert ( HZBLevelsBase = = 5U , " The shader is expecting 5 levels, if the page size is changed, this needs to be massaged " ) ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
RDG_BUFFER_ACCESS ( IndirectArgs , ERHIAccess : : IndirectArgs )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPagesForHZB )
SHADER_PARAMETER_SAMPLER ( SamplerState , PhysicalPagePoolSampler )
2022-04-07 18:36:13 -04:00
SHADER_PARAMETER_RDG_TEXTURE ( Texture2DArray < uint > , PhysicalPagePool )
2021-12-03 01:31:24 -05:00
SHADER_PARAMETER_RDG_TEXTURE_UAV_ARRAY ( RWTexture2D < float > , FurthestHZBOutput , [ HZBLevelsBase ] )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FVirtualSmBuildHZBPerPageCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " BuildHZBPerPageCS " , SF_Compute ) ;
2021-12-03 01:31:24 -05:00
class FVirtualSmBBuildHZBPerPageTopCS : public FVirtualPageManagementShader
{
DECLARE_GLOBAL_SHADER ( FVirtualSmBBuildHZBPerPageTopCS ) ;
SHADER_USE_PARAMETER_STRUCT ( FVirtualSmBBuildHZBPerPageTopCS , FVirtualPageManagementShader )
// We need one level less as HZB starts at half-size (not really sure if we really need 1x1 and 2x2 sized levels).
static constexpr uint32 HZBLevelsTop = 2 ;
BEGIN_SHADER_PARAMETER_STRUCT ( FParameters , )
RDG_BUFFER_ACCESS ( IndirectArgs , ERHIAccess : : IndirectArgs )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER ( FVirtualShadowMapUniformParameters , VirtualShadowMap )
SHADER_PARAMETER_RDG_BUFFER_SRV ( StructuredBuffer < uint > , PhysicalPagesForHZB )
SHADER_PARAMETER_SAMPLER ( SamplerState , ParentTextureMipSampler )
SHADER_PARAMETER_RDG_TEXTURE_SRV ( Texture2D , ParentTextureMip )
2021-12-03 13:29:07 -05:00
SHADER_PARAMETER ( FVector2f , InvHzbInputSize )
2021-12-03 01:31:24 -05:00
SHADER_PARAMETER_RDG_TEXTURE_UAV_ARRAY ( RWTexture2D < float > , FurthestHZBOutput , [ HZBLevelsTop ] )
END_SHADER_PARAMETER_STRUCT ( )
} ;
2022-02-14 05:44:50 -05:00
IMPLEMENT_GLOBAL_SHADER ( FVirtualSmBBuildHZBPerPageTopCS , " /Engine/Private/VirtualShadowMaps/VirtualShadowMapPageManagement.usf " , " BuildHZBPerPageTopCS " , SF_Compute ) ;
2021-12-03 01:31:24 -05:00
2022-03-23 15:54:41 -04:00
void FVirtualShadowMapArray : : UpdateHZB ( FRDGBuilder & GraphBuilder )
2021-12-03 01:31:24 -05:00
{
const FIntRect ViewRect ( 0 , 0 , GetPhysicalPoolSize ( ) . X , GetPhysicalPoolSize ( ) . Y ) ;
2022-04-07 18:36:13 -04:00
// 1. Gather up all physical pages that are allocated
FRDGBufferRef PagesForHZBIndirectArgsRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateIndirectDesc ( 2U * 4U ) , TEXT ( " Shadow.Virtual.PagesForHZBIndirectArgs " ) ) ;
// NOTE: Total allocated pages since the shader outputs separate entries for static/dynamic pages
FRDGBufferRef PhysicalPagesForHZBRDG = GraphBuilder . CreateBuffer ( FRDGBufferDesc : : CreateStructuredDesc ( sizeof ( int32 ) , GetTotalAllocatedPhysicalPages ( ) + 1 ) , TEXT ( " Shadow.Virtual.PhysicalPagesForHZB " ) ) ;
// 1. Clear the indirect args buffer (note 2x args)
AddClearIndirectDispatchArgs1DPass ( GraphBuilder , PagesForHZBIndirectArgsRDG , 2U ) ;
// 2. Filter the relevant physical pages and set up the indirect args
2021-12-03 01:31:24 -05:00
{
2022-04-07 18:36:13 -04:00
FSelectPagesForHZBCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FSelectPagesForHZBCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
PassParameters - > PhysicalPageMetaData = GraphBuilder . CreateSRV ( PhysicalPageMetaDataRDG ) ;
PassParameters - > OutPagesForHZBIndirectArgsBuffer = GraphBuilder . CreateUAV ( PagesForHZBIndirectArgsRDG ) ;
PassParameters - > OutPhysicalPagesForHZB = GraphBuilder . CreateUAV ( PhysicalPagesForHZBRDG ) ;
PassParameters - > DirtyPageFlags = GraphBuilder . CreateSRV ( DirtyPageFlagsRDG ) ;
PassParameters - > bFirstBuildThisFrame = ! bHZBBuiltThisFrame ;
2022-04-14 19:28:00 -04:00
PassParameters - > bForceFullHZBUpdate = CVarShadowsVirtualForceFullHZBUpdate . GetValueOnRenderThread ( ) ;
2022-04-07 18:36:13 -04:00
FSelectPagesForHZBCS : : FPermutationDomain PermutationVector ;
SetStatsArgsAndPermutation < FSelectPagesForHZBCS > ( GraphBuilder , StatsBufferRDG , PassParameters , PermutationVector ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FSelectPagesForHZBCS > ( PermutationVector ) ;
2021-12-03 01:31:24 -05:00
2022-04-07 18:36:13 -04:00
FComputeShaderUtils : : AddPass (
2021-12-03 01:31:24 -05:00
GraphBuilder ,
2022-04-07 18:36:13 -04:00
RDG_EVENT_NAME ( " SelectPagesForHZB " ) ,
ComputeShader ,
PassParameters ,
FIntVector ( FMath : : DivideAndRoundUp ( UniformParameters . MaxPhysicalPages , FSelectPagesForHZBCS : : DefaultCSGroupX ) , 1 , 1 )
) ;
}
// Clear the dirty flags (for subsequent render passes).
AddClearUAVPass ( GraphBuilder , GraphBuilder . CreateUAV ( DirtyPageFlagsRDG ) , 0 ) ;
bHZBBuiltThisFrame = true ;
{
FVirtualSmBuildHZBPerPageCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualSmBuildHZBPerPageCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
for ( int32 DestMip = 0 ; DestMip < FVirtualSmBuildHZBPerPageCS : : HZBLevelsBase ; DestMip + + )
{
PassParameters - > FurthestHZBOutput [ DestMip ] = GraphBuilder . CreateUAV ( FRDGTextureUAVDesc ( HZBPhysical , DestMip ) ) ;
}
PassParameters - > PhysicalPagePool = PhysicalPagePoolRDG ;
PassParameters - > PhysicalPagePoolSampler = TStaticSamplerState < SF_Point > : : GetRHI ( ) ;
PassParameters - > IndirectArgs = PagesForHZBIndirectArgsRDG ;
PassParameters - > PhysicalPagesForHZB = GraphBuilder . CreateSRV ( PhysicalPagesForHZBRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FVirtualSmBuildHZBPerPageCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " BuildHZBPerPage " ) ,
ComputeShader ,
PassParameters ,
PassParameters - > IndirectArgs ,
0
) ;
}
{
FVirtualSmBBuildHZBPerPageTopCS : : FParameters * PassParameters = GraphBuilder . AllocParameters < FVirtualSmBBuildHZBPerPageTopCS : : FParameters > ( ) ;
PassParameters - > VirtualShadowMap = GetUniformBuffer ( GraphBuilder ) ;
uint32 StartDestMip = FVirtualSmBuildHZBPerPageCS : : HZBLevelsBase ;
for ( int32 DestMip = 0 ; DestMip < FVirtualSmBBuildHZBPerPageTopCS : : HZBLevelsTop ; DestMip + + )
{
PassParameters - > FurthestHZBOutput [ DestMip ] = GraphBuilder . CreateUAV ( FRDGTextureUAVDesc ( HZBPhysical , StartDestMip + DestMip ) ) ;
}
FIntPoint SrcSize = FIntPoint : : DivideAndRoundUp ( FIntPoint ( HZBPhysical - > Desc . GetSize ( ) . X , HZBPhysical - > Desc . GetSize ( ) . Y ) , 1 < < int32 ( StartDestMip - 1 ) ) ;
PassParameters - > InvHzbInputSize = FVector2f ( 1.0f / SrcSize . X , 1.0f / SrcSize . Y ) ; ;
PassParameters - > ParentTextureMip = GraphBuilder . CreateSRV ( FRDGTextureSRVDesc : : CreateForMipLevel ( HZBPhysical , StartDestMip - 1 ) ) ;
PassParameters - > ParentTextureMipSampler = TStaticSamplerState < SF_Point > : : GetRHI ( ) ;
PassParameters - > IndirectArgs = PagesForHZBIndirectArgsRDG ;
PassParameters - > PhysicalPagesForHZB = GraphBuilder . CreateSRV ( PhysicalPagesForHZBRDG ) ;
auto ComputeShader = GetGlobalShaderMap ( GMaxRHIFeatureLevel ) - > GetShader < FVirtualSmBBuildHZBPerPageTopCS > ( ) ;
FComputeShaderUtils : : AddPass (
GraphBuilder ,
RDG_EVENT_NAME ( " BuildHZBPerPageTop " ) ,
ComputeShader ,
PassParameters ,
PassParameters - > IndirectArgs ,
// NOTE: offset 4 to get second set of args in the buffer.
4U * sizeof ( uint32 )
) ;
2021-12-03 01:31:24 -05:00
}
}
2021-12-03 16:38:33 -05:00
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
uint32 FVirtualShadowMapArray : : AddRenderViews ( const TSharedPtr < FVirtualShadowMapClipmap > & Clipmap , float LODScaleFactor , bool bSetHZBParams , bool bUpdateHZBMetaData , bool bClampToNearPlane , TArray < Nanite : : FPackedView , SceneRenderingAllocator > & OutVirtualShadowViews )
2021-12-03 16:38:33 -05:00
{
// TODO: Decide if this sort of logic belongs here or in Nanite (as with the mip level view expansion logic)
// We're eventually going to want to snap/quantize these rectangles/positions somewhat so probably don't want it
// entirely within Nanite, but likely makes sense to have some sort of "multi-viewport" notion in Nanite that can
// handle both this and mips.
// NOTE: There's still the additional VSM view logic that runs on top of this in Nanite too (see CullRasterize variant)
Nanite : : FPackedViewParams BaseParams ;
BaseParams . ViewRect = FIntRect ( 0 , 0 , FVirtualShadowMap : : VirtualMaxResolutionXY , FVirtualShadowMap : : VirtualMaxResolutionXY ) ;
BaseParams . HZBTestViewRect = BaseParams . ViewRect ;
BaseParams . RasterContextSize = GetPhysicalPoolSize ( ) ;
BaseParams . LODScaleFactor = LODScaleFactor ;
BaseParams . PrevTargetLayerIndex = INDEX_NONE ;
BaseParams . TargetMipLevel = 0 ;
BaseParams . TargetMipCount = 1 ; // No mips for clipmaps
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
BaseParams . Flags = bClampToNearPlane ? 0u : NANITE_VIEW_FLAG_NEAR_CLIP ;
2021-12-03 16:38:33 -05:00
for ( int32 ClipmapLevelIndex = 0 ; ClipmapLevelIndex < Clipmap - > GetLevelCount ( ) ; + + ClipmapLevelIndex )
{
FVirtualShadowMap * VirtualShadowMap = Clipmap - > GetVirtualShadowMap ( ClipmapLevelIndex ) ;
Nanite : : FPackedViewParams Params = BaseParams ;
Params . TargetLayerIndex = VirtualShadowMap - > ID ;
Params . ViewMatrices = Clipmap - > GetViewMatrices ( ClipmapLevelIndex ) ;
Params . PrevTargetLayerIndex = INDEX_NONE ;
Params . PrevViewMatrices = Params . ViewMatrices ;
// TODO: Clean this up - could be stored in a single structure for the whole clipmap
int32 HZBKey = Clipmap - > GetHZBKey ( ClipmapLevelIndex ) ;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
if ( bSetHZBParams )
2021-12-03 16:38:33 -05:00
{
CacheManager - > SetHZBViewParams ( HZBKey , Params ) ;
}
// If we're going to generate a new HZB this frame, save the associated metadata
if ( bUpdateHZBMetaData )
{
FVirtualShadowMapHZBMetadata & HZBMeta = HZBMetadata . FindOrAdd ( HZBKey ) ;
HZBMeta . TargetLayerIndex = Params . TargetLayerIndex ;
HZBMeta . ViewMatrices = Params . ViewMatrices ;
HZBMeta . ViewRect = Params . ViewRect ;
}
Nanite : : FPackedView View = Nanite : : CreatePackedView ( Params ) ;
OutVirtualShadowViews . Add ( View ) ;
// Mark that we rendered to this VSM for caching purposes
if ( VirtualShadowMap - > VirtualShadowMapCacheEntry )
{
VirtualShadowMap - > VirtualShadowMapCacheEntry - > MarkRendered ( ) ;
}
}
return uint32 ( Clipmap - > GetLevelCount ( ) ) ;
}
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
uint32 FVirtualShadowMapArray : : AddRenderViews ( const FProjectedShadowInfo * ProjectedShadowInfo , float LODScaleFactor , bool bSetHZBParams , bool bUpdateHZBMetaData , bool bClampToNearPlane , TArray < Nanite : : FPackedView , SceneRenderingAllocator > & OutVirtualShadowViews )
2021-12-03 16:38:33 -05:00
{
Nanite : : FPackedViewParams BaseParams ;
BaseParams . ViewRect = ProjectedShadowInfo - > GetOuterViewRect ( ) ;
BaseParams . HZBTestViewRect = BaseParams . ViewRect ;
BaseParams . RasterContextSize = GetPhysicalPoolSize ( ) ;
BaseParams . LODScaleFactor = LODScaleFactor ;
BaseParams . PrevTargetLayerIndex = INDEX_NONE ;
BaseParams . TargetMipLevel = 0 ;
BaseParams . TargetMipCount = FVirtualShadowMap : : MaxMipLevels ;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
BaseParams . Flags = bClampToNearPlane ? 0u : NANITE_VIEW_FLAG_NEAR_CLIP ;
2021-12-03 16:38:33 -05:00
int32 NumMaps = ProjectedShadowInfo - > bOnePassPointLightShadow ? 6 : 1 ;
for ( int32 Index = 0 ; Index < NumMaps ; + + Index )
{
FVirtualShadowMap * VirtualShadowMap = ProjectedShadowInfo - > VirtualShadowMaps [ Index ] ;
Nanite : : FPackedViewParams Params = BaseParams ;
Params . TargetLayerIndex = VirtualShadowMap - > ID ;
Params . ViewMatrices = ProjectedShadowInfo - > GetShadowDepthRenderingViewMatrices ( Index , true ) ;
int32 HZBKey = ProjectedShadowInfo - > GetLightSceneInfo ( ) . Id + ( Index < < 24 ) ;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path.
Reductions *per material*:
SM5
--
FHWRasterizeVS: 832 -> 21
FHWRasterizePS: 104 -> 39
SM6
--
FHWRasterizeVS: 320 -> 9
FHWRasterizeMS: 640 -> 9
FHWRasterizePS: 120 -> 30
Vulkan
--
FHWRasterizeVS: 320 -> 9
FHWRasterizePS: 40 -> 15
Other platforms redacted =)
-- Details
* CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET
* HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load
* NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together.
* VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation
* NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders
* Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow)
Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed.
#rb rune.stubbe
#fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz
#preflight 622e684c7e2e35638c96a16a
#robomerge FNNC
[CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
if ( bSetHZBParams )
2021-12-03 16:38:33 -05:00
{
CacheManager - > SetHZBViewParams ( HZBKey , Params ) ;
}
// If we're going to generate a new HZB this frame, save the associated metadata
if ( bUpdateHZBMetaData )
{
FVirtualShadowMapHZBMetadata & HZBMeta = HZBMetadata . FindOrAdd ( HZBKey ) ;
HZBMeta . TargetLayerIndex = Params . TargetLayerIndex ;
HZBMeta . ViewMatrices = Params . ViewMatrices ;
HZBMeta . ViewRect = Params . ViewRect ;
}
OutVirtualShadowViews . Add ( Nanite : : CreatePackedView ( Params ) ) ;
if ( VirtualShadowMap - > VirtualShadowMapCacheEntry )
{
VirtualShadowMap - > VirtualShadowMapCacheEntry - > MarkRendered ( ) ;
}
}
return uint32 ( NumMaps ) ;
}
2022-01-27 15:49:31 -05:00
2022-04-12 23:44:41 -04:00
void FVirtualShadowMapArray : : AddVisualizePass ( FRDGBuilder & GraphBuilder , const FViewInfo & View , int32 ViewIndex , FScreenPassTexture Output )
2022-01-27 15:49:31 -05:00
{
# if !UE_BUILD_SHIPPING
2022-04-12 23:44:41 -04:00
if ( ! IsAllocated ( ) | | DebugVisualizationOutput . IsEmpty ( ) )
2022-01-27 15:49:31 -05:00
{
return ;
}
const FVirtualShadowMapVisualizationData & VisualizationData = GetVirtualShadowMapVisualizationData ( ) ;
if ( VisualizationData . IsActive ( ) & & VisualizeLight . IsValid ( ) )
{
FCopyRectPS : : FParameters * Parameters = GraphBuilder . AllocParameters < FCopyRectPS : : FParameters > ( ) ;
2022-04-12 23:44:41 -04:00
Parameters - > InputTexture = DebugVisualizationOutput [ ViewIndex ] ;
2022-01-27 15:49:31 -05:00
Parameters - > InputSampler = TStaticSamplerState < SF_Bilinear , AM_Clamp , AM_Clamp , AM_Clamp > : : GetRHI ( ) ;
Parameters - > RenderTargets [ 0 ] = FRenderTargetBinding ( Output . Texture , ERenderTargetLoadAction : : ENoAction ) ;
TShaderMapRef < FCopyRectPS > PixelShader ( View . ShaderMap ) ;
2022-04-12 23:44:41 -04:00
FScreenPassTextureViewport InputViewport ( DebugVisualizationOutput [ ViewIndex ] - > Desc . Extent ) ;
2022-01-27 15:49:31 -05:00
FScreenPassTextureViewport OutputViewport ( Output ) ;
// See CVarVisualizeLayout documentation
const int32 VisualizeLayout = CVarVisualizeLayout . GetValueOnRenderThread ( ) ;
if ( VisualizeLayout = = 1 ) // Thumbnail
{
const int32 TileWidth = View . UnscaledViewRect . Width ( ) / 3 ;
const int32 TileHeight = View . UnscaledViewRect . Height ( ) / 3 ;
OutputViewport . Rect . Max = OutputViewport . Rect . Min + FIntPoint ( TileWidth , TileHeight ) ;
}
else if ( VisualizeLayout = = 2 ) // Split screen
{
2022-01-27 23:31:28 -05:00
InputViewport . Rect . Max . X = InputViewport . Rect . Min . X + ( InputViewport . Rect . Width ( ) / 2 ) ;
OutputViewport . Rect . Max . X = OutputViewport . Rect . Min . X + ( OutputViewport . Rect . Width ( ) / 2 ) ;
2022-01-27 15:49:31 -05:00
}
// Use separate input and output viewports w/ bilinear sampling to properly support dynamic resolution scaling
AddDrawScreenPass ( GraphBuilder , RDG_EVENT_NAME ( " DrawTexture " ) , View , OutputViewport , InputViewport , PixelShader , Parameters , EScreenPassDrawFlags : : None ) ;
// Visualization light name
{
FScreenPassRenderTarget OutputTarget ( Output . Texture , View . UnscaledViewRect , ERenderTargetLoadAction : : ELoad ) ;
AddDrawCanvasPass ( GraphBuilder , RDG_EVENT_NAME ( " Labels " ) , View , OutputTarget ,
[ & VisualizeLight = VisualizeLight , & OutputViewport = OutputViewport ] ( FCanvas & Canvas )
{
const FLinearColor LabelColor ( 1 , 1 , 0 ) ;
Canvas . DrawShadowedString (
OutputViewport . Rect . Min . X + 8 ,
OutputViewport . Rect . Max . Y - 19 ,
* VisualizeLight . GetLightName ( ) ,
GetStatsFont ( ) ,
LabelColor ) ;
} ) ;
}
}
# endif
}