Files
UnrealEngineUWP/Engine/Source/Runtime/Renderer/Private/VirtualShadowMaps/VirtualShadowMapCacheManager.cpp

1025 lines
40 KiB
C++
Raw Normal View History

// Copyright Epic Games, Inc. All Rights Reserved.
/*=============================================================================
VirtualShadowMap.h:
=============================================================================*/
#include "VirtualShadowMapCacheManager.h"
#include "VirtualShadowMapClipmap.h"
#include "RendererModule.h"
#include "RenderGraphUtils.h"
#include "RHIGPUReadback.h"
#include "ScenePrivate.h"
#include "HAL/FileManager.h"
#include "PrimitiveSceneInfo.h"
#include "ShaderPrint.h"
#include "RendererOnScreenNotification.h"
static TAutoConsoleVariable<int32> CVarAccumulateStats(
TEXT("r.Shadow.Virtual.AccumulateStats"),
0,
TEXT("AccumulateStats"),
ECVF_RenderThreadSafe
);
static TAutoConsoleVariable<int32> CVarCacheVirtualSMs(
TEXT("r.Shadow.Virtual.Cache"),
1,
TEXT("Turn on to enable caching"),
ECVF_RenderThreadSafe
);
static TAutoConsoleVariable<int32> CVarDrawInvalidatingBounds(
TEXT("r.Shadow.Virtual.Cache.DrawInvalidatingBounds"),
0,
TEXT("Turn on debug render cache invalidating instance bounds, heat mapped by number of pages invalidated.\n")
TEXT(" 1 = Draw all bounds.\n")
TEXT(" 2 = Draw those invalidating static cached pages only\n")
TEXT(" 3 = Draw those invalidating dynamic cached pages only"),
ECVF_RenderThreadSafe
);
static TAutoConsoleVariable<int32> CVarCacheVsmUseHzb(
TEXT("r.Shadow.Virtual.Cache.InvalidateUseHZB"),
1,
TEXT("Enables testing HZB for Virtual Shadow Map invalidations."),
ECVF_RenderThreadSafe);
int32 GClipmapPanning = 1;
FAutoConsoleVariableRef CVarEnableClipmapPanning(
TEXT("r.Shadow.Virtual.Cache.ClipmapPanning"),
GClipmapPanning,
TEXT("Enable support for panning cached clipmap pages for directional lights."),
ECVF_RenderThreadSafe
);
static int32 GVSMCacheDeformableMeshesInvalidate = 1;
FAutoConsoleVariableRef CVarCacheInvalidateOftenMoving(
TEXT("r.Shadow.Virtual.Cache.DeformableMeshesInvalidate"),
GVSMCacheDeformableMeshesInvalidate,
TEXT("If enabled, Primitive Proxies that are marked as having deformable meshes (HasDeformableMesh() == true) causes invalidations regardless of whether their transforms are updated."),
ECVF_RenderThreadSafe);
void FVirtualShadowMapCacheEntry::UpdateClipmap(
int32 VirtualShadowMapId,
const FMatrix &WorldToLight,
FIntPoint PageSpaceLocation,
double LevelRadius,
double ViewCenterZ,
// NOTE: ViewRadiusZ must be constant for a given clipmap level
double ViewRadiusZ)
{
bool bCacheValid = (CurrentVirtualShadowMapId != INDEX_NONE);
if (bCacheValid && WorldToLight != Clipmap.WorldToLight)
{
bCacheValid = false;
//UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap level (VSM %d) due to light movement"), VirtualShadowMapId);
}
if (bCacheValid && GClipmapPanning == 0)
{
if (PageSpaceLocation.X != PrevPageSpaceLocation.X ||
PageSpaceLocation.Y != PrevPageSpaceLocation.Y)
{
bCacheValid = false;
//UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap level (VSM %d) with page space location %d,%d (Prev %d, %d)"),
// VirtualShadowMapId, PageSpaceLocation.X, PageSpaceLocation.Y, PrevPageSpaceLocation.X, PrevPageSpaceLocation.Y);
}
}
// Invalidate if the new Z radius strayed too close/outside the guardband of the cached shadow map
if (bCacheValid)
{
double DeltaZ = FMath::Abs(ViewCenterZ - Clipmap.ViewCenterZ);
if ((DeltaZ + LevelRadius) > 0.9 * Clipmap.ViewRadiusZ)
{
bCacheValid = false;
//UE_LOG(LogRenderer, Display, TEXT("Invalidated clipmap level (VSM %d) due to depth range movement"), VirtualShadowMapId);
}
}
bool bRadiusMatches = (ViewRadiusZ == Clipmap.ViewRadiusZ);
if (bCacheValid && bRadiusMatches)
{
PrevVirtualShadowMapId = CurrentVirtualShadowMapId;
}
else
{
if (bCacheValid && !bRadiusMatches)
{
// These really should be exact by construction currently
UE_LOG(LogRenderer, Warning, TEXT("Invalidated clipmap level (VSM %d) due to Z radius mismatch"), VirtualShadowMapId);
}
// New cached level
PrevVirtualShadowMapId = INDEX_NONE;
Clipmap.WorldToLight = WorldToLight;
Clipmap.ViewCenterZ = ViewCenterZ;
Clipmap.ViewRadiusZ = ViewRadiusZ;
}
PrevPageSpaceLocation = CurrentPageSpaceLocation;
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
bPrevRendered = bCurrentRendered;
CurrentVirtualShadowMapId = VirtualShadowMapId;
CurrentPageSpaceLocation = PageSpaceLocation;
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
bCurrentRendered = false;
}
void FVirtualShadowMapCacheEntry::UpdateLocal(int32 VirtualShadowMapId, const FWholeSceneProjectedShadowInitializer &InCacheValidKey)
{
// Swap previous frame data over.
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
PrevPageSpaceLocation = FIntPoint(0, 0); // Not used for local lights
PrevVirtualShadowMapId = CurrentVirtualShadowMapId;
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
bPrevRendered = bCurrentRendered;
// Check cache validity based of shadow setup
if (!LocalCacheValidKey.IsCachedShadowValid(InCacheValidKey))
{
PrevVirtualShadowMapId = INDEX_NONE;
//UE_LOG(LogRenderer, Display, TEXT("Invalidated!"));
}
LocalCacheValidKey = InCacheValidKey;
CurrentVirtualShadowMapId = VirtualShadowMapId;
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
CurrentPageSpaceLocation = FIntPoint(0, 0); // Not used for local lights
bCurrentRendered = false;
}
static inline uint32 EncodeInstanceInvalidationPayload(bool bInvalidateStaticPage, int32 ClipmapVirtualShadowMapId = INDEX_NONE)
{
uint32 Payload = 0;
if (bInvalidateStaticPage)
{
Payload = Payload | 0x2;
}
if (ClipmapVirtualShadowMapId != INDEX_NONE)
{
// Do a single clipmap level
Payload = Payload | 0x1;
Payload = Payload | (((uint32)ClipmapVirtualShadowMapId) << 2);
}
return Payload;
}
FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::FInvalidatingPrimitiveCollector(FVirtualShadowMapArrayCacheManager* InVirtualShadowMapArrayCacheManager)
: AlreadyAddedPrimitives(false, InVirtualShadowMapArrayCacheManager->Scene->Primitives.Num())
, Scene(*InVirtualShadowMapArrayCacheManager->Scene)
, GPUScene(InVirtualShadowMapArrayCacheManager->Scene->GPUScene)
, Manager(*InVirtualShadowMapArrayCacheManager)
{
bool bPossiblyCachedAsStatic = false; // TODO
// Add and clear pending invalidations enqueued on the GPU Scene from dynamic primitives added since last invalidation
for (const FGPUScene::FInstanceRange& Range : GPUScene.DynamicPrimitiveInstancesToInvalidate)
{
LoadBalancer.Add(Range.InstanceSceneDataOffset, Range.NumInstanceSceneDataEntries, EncodeInstanceInvalidationPayload(bPossiblyCachedAsStatic));
#if VSM_LOG_INVALIDATIONS
RangesStr.Appendf(TEXT("[%6d, %6d), "), Range.InstanceSceneDataOffset, Range.InstanceSceneDataOffset + Range.NumInstanceSceneDataEntries);
#endif
TotalInstanceCount += Range.NumInstanceSceneDataEntries;
}
GPUScene.DynamicPrimitiveInstancesToInvalidate.Reset();
for (auto& CacheEntry : Manager.PrevCacheEntries)
{
for (const FVirtualShadowMapPerLightCacheEntry::FInstanceRange& Range : CacheEntry.Value->PrimitiveInstancesToInvalidate)
{
// Add item for each shadow map explicitly, inflates host data but improves load balancing,
// TODO: maybe add permutation so we can strip the loop completely.
for (const auto& SmCacheEntry : CacheEntry.Value->ShadowMapEntries)
{
if (SmCacheEntry.IsValid())
{
// Lowest bit indicates whether to run the clipmap loop, add 1 to ID so != 0 <==> single level processing
LoadBalancer.Add(Range.InstanceSceneDataOffset, Range.NumInstanceSceneDataEntries,
EncodeInstanceInvalidationPayload(bPossiblyCachedAsStatic, SmCacheEntry->CurrentVirtualShadowMapId));
}
}
#if VSM_LOG_INVALIDATIONS
RangesStr.Appendf(TEXT("[%6d, %6d), "), Range.InstanceSceneDataOffset, Range.InstanceSceneDataOffset + Range.NumInstanceSceneDataEntries);
#endif
TotalInstanceCount += Range.NumInstanceSceneDataEntries;
}
CacheEntry.Value->PrimitiveInstancesToInvalidate.Reset();
}
}
void FVirtualShadowMapArrayCacheManager::FInvalidatingPrimitiveCollector::Add(const FPrimitiveSceneInfo * PrimitiveSceneInfo)
{
int32 PrimitiveID = PrimitiveSceneInfo->GetIndex();
if (PrimitiveID >= 0
&& !AlreadyAddedPrimitives[PrimitiveID]
&& PrimitiveSceneInfo->GetInstanceSceneDataOffset() != INDEX_NONE
// Don't process primitives that are still in the 'added' state because this means that they
// have not been uploaded to the GPU yet and may be pending from a previous call to update primitive scene infos.
&& !EnumHasAnyFlags(GPUScene.GetPrimitiveDirtyState(PrimitiveID), EPrimitiveDirtyState::Added))
{
AlreadyAddedPrimitives[PrimitiveID] = true;
int32 PersistentPrimitiveIndex = PrimitiveSceneInfo->GetPersistentIndex().Index;
// Nanite meshes need special handling because they don't get culled on CPU, thus always process invalidations for those
const bool bIsNaniteMesh = Scene.PrimitiveFlagsCompact[PrimitiveID].bIsNaniteMesh;
const bool bPossiblyCachedAsStatic = !PrimitiveSceneInfo->Proxy->IsMovable();
const int32 NumInstanceSceneDataEntries = PrimitiveSceneInfo->GetNumInstanceSceneDataEntries();
// Add for non-directional lights, mark for skipping clipmaps as these are handled individually below
LoadBalancer.Add(PrimitiveSceneInfo->GetInstanceSceneDataOffset(), NumInstanceSceneDataEntries, EncodeInstanceInvalidationPayload(bPossiblyCachedAsStatic));
// Process directional lights, where we explicitly filter out primitives that were not rendered (and mark this fact)
for (auto& CacheEntry : Manager.PrevCacheEntries)
{
TBitArray<>& CachedPrimitives = CacheEntry.Value->CachedPrimitives;
if (bIsNaniteMesh || (PersistentPrimitiveIndex < CachedPrimitives.Num() && CachedPrimitives[PersistentPrimitiveIndex]))
{
if (!bIsNaniteMesh)
{
// Clear the record as we're wiping it out.
CachedPrimitives[PersistentPrimitiveIndex] = false;
}
// Add item for each shadow map explicitly, inflates host data but improves load balancing,
// TODO: maybe add permutation so we can strip the loop completely.
for (const auto& SmCacheEntry : CacheEntry.Value->ShadowMapEntries)
{
if (SmCacheEntry.IsValid())
{
checkSlow(SmCacheEntry->CurrentVirtualShadowMapId != INDEX_NONE);
LoadBalancer.Add(PrimitiveSceneInfo->GetInstanceSceneDataOffset(), NumInstanceSceneDataEntries,
EncodeInstanceInvalidationPayload(bPossiblyCachedAsStatic, SmCacheEntry->CurrentVirtualShadowMapId));
}
}
}
}
#if VSM_LOG_INVALIDATIONS
RangesStr.Appendf(TEXT("[%6d, %6d), "), PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetInstanceSceneDataOffset() + NumInstanceSceneDataEntries);
#endif
TotalInstanceCount += NumInstanceSceneDataEntries;
}
}
FVirtualShadowMapArrayCacheManager::FVirtualShadowMapArrayCacheManager(FScene* InScene)
: Scene(InScene)
{
// Handle message with status sent back from GPU
StatusFeedbackSocket = GPUMessage::RegisterHandler(TEXT("Shadow.Virtual.StatusFeedback"), [this](GPUMessage::FReader Message)
{
// Only process status messages that came from this specific cache manager
if (Message.MessageId == this->StatusFeedbackSocket.GetMessageId())
{
// Get the frame that the message was sent.
uint32 FrameNumber = Message.Read<uint32>(0);
// Goes negative on underflow
int32 NumPagesFree = Message.Read<int32>(0);
if (NumPagesFree < 0)
{
static const auto* CVarResolutionLodBiasLocalPtr = IConsoleManager::Get().FindTConsoleVariableDataFloat(TEXT("r.Shadow.Virtual.ResolutionLodBiasLocal"));
const float LodBiasLocal = CVarResolutionLodBiasLocalPtr->GetValueOnRenderThread();
static const auto* CVarResolutionLodBiasDirectionalPtr = IConsoleManager::Get().FindTConsoleVariableDataFloat(TEXT("r.Shadow.Virtual.ResolutionLodBiasDirectional"));
const float LodBiasDirectional = CVarResolutionLodBiasDirectionalPtr->GetValueOnRenderThread();
static const auto* CVarMaxPhysicalPagesPtr = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.Shadow.Virtual.MaxPhysicalPages"));
const int32 MaxPhysicalPages = CVarMaxPhysicalPagesPtr->GetValueOnRenderThread();
#if !UE_BUILD_SHIPPING
if (!bLoggedPageOverflow)
{
UE_LOG(LogRenderer, Warning, TEXT("Virtual Shadow Map Page Pool overflow (%d page allocations were not served), this will produce visual artifacts (missing shadow), increase the page pool limit or reduce resolution bias to avoid.\n")
TEXT(" See r.Shadow.Virtual.MaxPhysicalPages (%d), r.Shadow.Virtual.ResolutionLodBiasLocal (%.2f), and r.Shadow.Virtual.ResolutionLodBiasDirectional (%.2f)"),
-NumPagesFree,
MaxPhysicalPages,
LodBiasLocal,
LodBiasDirectional);
bLoggedPageOverflow = true;
}
LastOverflowFrame = Scene->GetFrameNumber();
#endif
}
#if !UE_BUILD_SHIPPING
else
{
bLoggedPageOverflow = false;
}
#endif
}
});
#if !UE_BUILD_SHIPPING
ScreenMessageDelegate = FRendererOnScreenNotification::Get().AddLambda([this](TMultiMap<FCoreDelegates::EOnScreenMessageSeverity, FText >& OutMessages)
{
// Show for ~5s after last overflow
int32 CurrentFrameNumber = Scene->GetFrameNumber();
if (LastOverflowFrame >= 0 && CurrentFrameNumber - LastOverflowFrame < 30 * 5)
{
OutMessages.Add(FCoreDelegates::EOnScreenMessageSeverity::Warning, FText::FromString(FString::Printf(TEXT("Virtual Shadow Map Page Pool overflow detected (%d frames ago)"), CurrentFrameNumber - LastOverflowFrame)));
}
});
#endif
}
FVirtualShadowMapArrayCacheManager::~FVirtualShadowMapArrayCacheManager()
{
#if !UE_BUILD_SHIPPING
FRendererOnScreenNotification::Get().Remove(ScreenMessageDelegate);
#endif
}
TRefCountPtr<IPooledRenderTarget> FVirtualShadowMapArrayCacheManager::SetPhysicalPoolSize(FRDGBuilder& GraphBuilder, FIntPoint RequestedSize, int RequestedArraySize)
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
{
if (!PhysicalPagePool || PhysicalPagePool->GetDesc().Extent != RequestedSize || PhysicalPagePool->GetDesc().ArraySize != RequestedArraySize)
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
{
FPooledRenderTargetDesc Desc2D = FPooledRenderTargetDesc::Create2DArrayDesc(
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
RequestedSize,
PF_R32_UINT,
FClearValueBinding::None,
TexCreate_None,
TexCreate_ShaderResource | TexCreate_UAV,
false,
RequestedArraySize
);
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
GRenderTargetPool.FindFreeElement(GraphBuilder.RHICmdList, Desc2D, PhysicalPagePool, TEXT("Shadow.Virtual.PhysicalPagePool"));
Invalidate();
//UE_LOG(LogRenderer, Display, TEXT("Recreating Shadow.Virtual.PhysicalPagePool. This will also drop any cached pages."));
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
}
return PhysicalPagePool;
}
void FVirtualShadowMapArrayCacheManager::FreePhysicalPool()
{
if (PhysicalPagePool)
{
PhysicalPagePool = nullptr;
Invalidate();
}
}
TRefCountPtr<IPooledRenderTarget> FVirtualShadowMapArrayCacheManager::SetHZBPhysicalPoolSize(FRDGBuilder& GraphBuilder, FIntPoint RequestedHZBSize, const EPixelFormat Format)
{
if (!HZBPhysicalPagePool || HZBPhysicalPagePool->GetDesc().Extent != RequestedHZBSize || HZBPhysicalPagePool->GetDesc().Format != Format)
{
// TODO: This may need to be an array as well
FPooledRenderTargetDesc Desc = FPooledRenderTargetDesc::Create2DDesc(
RequestedHZBSize,
Format,
FClearValueBinding::None,
GFastVRamConfig.HZB,
TexCreate_ShaderResource | TexCreate_UAV,
false,
FVirtualShadowMap::NumHZBLevels);
GRenderTargetPool.FindFreeElement(GraphBuilder.RHICmdList, Desc, HZBPhysicalPagePool, TEXT("Shadow.Virtual.HZBPhysicalPagePool"));
}
return HZBPhysicalPagePool;
}
void FVirtualShadowMapArrayCacheManager::FreeHZBPhysicalPool()
{
if (HZBPhysicalPagePool)
{
HZBPhysicalPagePool = nullptr;
Invalidate();
}
}
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
void FVirtualShadowMapArrayCacheManager::Invalidate()
{
// Clear the cache
PrevCacheEntries.Empty();
CacheEntries.Reset();
}
TSharedPtr<FVirtualShadowMapCacheEntry> FVirtualShadowMapPerLightCacheEntry::FindCreateShadowMapEntry(int32 Index)
{
check(Index >= 0);
ShadowMapEntries.SetNum(FMath::Max(Index + 1, ShadowMapEntries.Num()));
TSharedPtr<FVirtualShadowMapCacheEntry>& EntryRef = ShadowMapEntries[Index];
if (!EntryRef.IsValid())
{
EntryRef = MakeShared<FVirtualShadowMapCacheEntry>();
}
return EntryRef;
}
TSharedPtr<FVirtualShadowMapPerLightCacheEntry> FVirtualShadowMapArrayCacheManager::FindCreateLightCacheEntry(int32 LightSceneId)
{
if (CVarCacheVirtualSMs.GetValueOnRenderThread() == 0)
{
return nullptr;
}
if (TSharedPtr<FVirtualShadowMapPerLightCacheEntry> *LightEntry = CacheEntries.Find(LightSceneId))
{
return *LightEntry;
}
// Add to current frame / active set.
TSharedPtr<FVirtualShadowMapPerLightCacheEntry>& NewLightEntry = CacheEntries.Add(LightSceneId);
// Copy data if available
if (TSharedPtr<FVirtualShadowMapPerLightCacheEntry>* PrevNewLightEntry = PrevCacheEntries.Find(LightSceneId))
{
NewLightEntry = *PrevNewLightEntry;
}
else
{
NewLightEntry = MakeShared<FVirtualShadowMapPerLightCacheEntry>(Scene->GetMaxPersistentPrimitiveIndex());
}
// return entry
return NewLightEntry;
}
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
TSharedPtr<FVirtualShadowMapCacheEntry> FVirtualShadowMapArrayCacheManager::FindCreateCacheEntry(int32 LightSceneId, int32 Index)
{
if (CVarCacheVirtualSMs.GetValueOnRenderThread() == 0)
{
return nullptr;
}
return FindCreateLightCacheEntry(LightSceneId)->FindCreateShadowMapEntry(Index);
}
void FVirtualShadowMapPerLightCacheEntry::OnPrimitiveRendered(const FPrimitiveSceneInfo* PrimitiveSceneInfo)
{
// Mark as (potentially present in a cached page somehwere, so we'd need to invalidate if it is removed/moved)
CachedPrimitives[PrimitiveSceneInfo->GetPersistentIndex().Index] = true;
if (GVSMCacheDeformableMeshesInvalidate != 0)
{
// Deformable mesh primitives need to trigger invalidation (even if they did not move) or we get artifacts, for example skinned meshes that are animating but not currently moving.
if (PrimitiveSceneInfo->Proxy->HasDeformableMesh())
{
PrimitiveInstancesToInvalidate.Add(FInstanceRange{ PrimitiveSceneInfo->GetInstanceSceneDataOffset(), PrimitiveSceneInfo->GetNumInstanceSceneDataEntries() });
}
}
}
class FVirtualSmCopyStatsCS : public FGlobalShader
{
DECLARE_GLOBAL_SHADER(FVirtualSmCopyStatsCS);
SHADER_USE_PARAMETER_STRUCT(FVirtualSmCopyStatsCS, FGlobalShader)
public:
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< uint >, InStatsBuffer)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer< uint >, AccumulatedStatsBufferOut)
SHADER_PARAMETER(uint32, NumStats)
END_SHADER_PARAMETER_STRUCT()
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) &&
DoesPlatformSupportNanite(Parameters.Platform);
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
OutEnvironment.SetDefine(TEXT("MAX_STAT_FRAMES"), FVirtualShadowMapArrayCacheManager::MaxStatFrames);
}
};
IMPLEMENT_GLOBAL_SHADER(FVirtualSmCopyStatsCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCopyStats.usf", "CopyStatsCS", SF_Compute);
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
void FVirtualShadowMapArrayCacheManager::ExtractFrameData(
FRDGBuilder& GraphBuilder,
FVirtualShadowMapArray &VirtualShadowMapArray,
const FSceneRenderer& SceneRenderer,
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
bool bEnableCaching)
{
const bool bNewShadowData = VirtualShadowMapArray.IsAllocated();
const bool bDropAll = !bEnableCaching;
const bool bDropPrevBuffers = bDropAll || bNewShadowData;
if (bDropPrevBuffers)
{
PrevBuffers = FVirtualShadowMapArrayFrameData();
PrevUniformParameters.NumShadowMaps = 0;
}
if (bDropAll)
{
// We drop the physical page pool here as well to ensure that it disappears in the case where
// thumbnail rendering or similar creates multiple FSceneRenderers that never get deleted.
// Caching is disabled on these contexts intentionally to avoid these issues.
FreePhysicalPool();
}
else if (bNewShadowData)
{
bool bExtractHzbData = false;
// HZB and associated page table are needed by next frame even when VSM physical page caching is disabled
if (VirtualShadowMapArray.HZBPhysical)
{
bExtractHzbData = true;
GraphBuilder.QueueTextureExtraction(VirtualShadowMapArray.HZBPhysical, &PrevBuffers.HZBPhysical);
PrevBuffers.HZBMetadata = VirtualShadowMapArray.HZBMetadata;
}
if (CVarCacheVirtualSMs.GetValueOnRenderThread() != 0)
{
bExtractHzbData = true;
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.PhysicalPageMetaDataRDG, &PrevBuffers.PhysicalPageMetaData);
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.DynamicCasterPageFlagsRDG, &PrevBuffers.DynamicCasterPageFlags);
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.ProjectionDataRDG, &PrevBuffers.ProjectionData);
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.InvalidatingInstancesRDG, &PrevBuffers.InvalidatingInstancesBuffer);
PrevBuffers.NumInvalidatingInstanceSlots = VirtualShadowMapArray.NumInvalidatingInstanceSlots;
// Move cache entries to previous frame, this implicitly removes any that were not used
PrevCacheEntries = CacheEntries;
// Store but drop any temp references embedded in the uniform parameters this frame.
// We'll reestablish them when we reimport the extracted resources next frame
PrevUniformParameters = VirtualShadowMapArray.UniformParameters;
PrevUniformParameters.ProjectionData = nullptr;
PrevUniformParameters.PageTable = nullptr;
PrevUniformParameters.PhysicalPagePool = nullptr;
}
if (bExtractHzbData)
{
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.PageTableRDG, &PrevBuffers.PageTable);
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.PageRectBoundsRDG, &PrevBuffers.PageRectBounds);
GraphBuilder.QueueBufferExtraction(VirtualShadowMapArray.PageFlagsRDG, &PrevBuffers.PageFlags);
}
// propagate current-frame primitive state to cache entry
for (const auto& LightInfo : SceneRenderer.ActiveViewFamily->VisibleLightInfos)
{
for (const TSharedPtr<FVirtualShadowMapClipmap> &Clipmap : LightInfo.VirtualShadowMapClipmaps)
{
// Push data to cache entry
Clipmap->UpdateCachedFrameData();
}
}
CacheEntries.Reset();
ExtractStats(GraphBuilder, VirtualShadowMapArray);
}
else
{
// Do nothing; maintain the data that we had
// This allows us to work around some cases where the renderer gets called multiple times in a given frame
// - such as scene captures - but does no shadow-related work in all but one of them. We do not want to drop
// all the cached data in this case otherwise we effectively get no caching at all.
// Ideally in the long run we want the cache itself to be more robust against rendering multiple views. but
// for now this at least provides a work-around for some common cases where only one view is rendering VSMs.
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
}
}
void FVirtualShadowMapArrayCacheManager::ExtractStats(FRDGBuilder& GraphBuilder, FVirtualShadowMapArray &VirtualShadowMapArray)
{
FRDGBufferRef AccumulatedStatsBufferRDG = nullptr;
// Note: stats accumulation thing is here because it needs to persist over frames.
if (AccumulatedStatsBuffer.IsValid())
{
AccumulatedStatsBufferRDG = GraphBuilder.RegisterExternalBuffer(AccumulatedStatsBuffer, TEXT("Shadow.Virtual.AccumulatedStatsBuffer"));
}
if (IsAccumulatingStats())
{
if (!AccumulatedStatsBuffer.IsValid())
{
FRDGBufferDesc Desc = FRDGBufferDesc::CreateBufferDesc(4, 1 + FVirtualShadowMapArray::NumStats * MaxStatFrames);
Desc.Usage = EBufferUsageFlags(Desc.Usage | BUF_SourceCopy);
AccumulatedStatsBufferRDG = GraphBuilder.CreateBuffer(Desc, TEXT("Shadow.Virtual.AccumulatedStatsBuffer")); // TODO: Can't be a structured buffer as EnqueueCopy is only defined for vertex buffers
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT), 0);
AccumulatedStatsBuffer = GraphBuilder.ConvertToExternalBuffer(AccumulatedStatsBufferRDG);
}
// Initialize/clear
if (!bAccumulatingStats)
{
AddClearUAVPass(GraphBuilder, GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT), 0);
bAccumulatingStats = true;
}
FVirtualSmCopyStatsCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FVirtualSmCopyStatsCS::FParameters>();
PassParameters->InStatsBuffer = GraphBuilder.CreateSRV(VirtualShadowMapArray.StatsBufferRDG, PF_R32_UINT);
PassParameters->AccumulatedStatsBufferOut = GraphBuilder.CreateUAV(AccumulatedStatsBufferRDG, PF_R32_UINT);
PassParameters->NumStats = FVirtualShadowMapArray::NumStats;
auto ComputeShader = GetGlobalShaderMap(GMaxRHIFeatureLevel)->GetShader<FVirtualSmCopyStatsCS>();
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("Copy Stats"),
ComputeShader,
PassParameters,
FIntVector(1, 1, 1)
);
}
else if (bAccumulatingStats)
{
bAccumulatingStats = false;
GPUBufferReadback = new FRHIGPUBufferReadback(TEXT("Shadow.Virtual.AccumulatedStatsBufferReadback"));
AddEnqueueCopyPass(GraphBuilder, GPUBufferReadback, AccumulatedStatsBufferRDG, 0u);
}
else if (AccumulatedStatsBuffer.IsValid())
{
AccumulatedStatsBuffer.SafeRelease();
}
if (GPUBufferReadback && GPUBufferReadback->IsReady())
{
TArray<uint32> Tmp;
Tmp.AddDefaulted(1 + FVirtualShadowMapArray::NumStats * MaxStatFrames);
{
const uint32* BufferPtr = (const uint32*)GPUBufferReadback->Lock((1 + FVirtualShadowMapArray::NumStats * MaxStatFrames) * sizeof(uint32));
FPlatformMemory::Memcpy(Tmp.GetData(), BufferPtr, Tmp.Num() * Tmp.GetTypeSize());
GPUBufferReadback->Unlock();
delete GPUBufferReadback;
GPUBufferReadback = nullptr;
}
FString FileName = TEXT("VirtualShadowMapCacheStats.csv");// FString::Printf(TEXT("%s.csv"), *FileNameToUse);
FArchive * FileToLogTo = IFileManager::Get().CreateFileWriter(*FileName, false);
ensure(FileToLogTo);
if (FileToLogTo)
{
static const FString StatNames[] =
{
TEXT("Allocated"),
TEXT("StaticCached"),
TEXT("StaticInvalidated"),
TEXT("DynamicCached"),
TEXT("DynamicInvalidated"),
TEXT("NumSms"),
TEXT("NonNaniteInstances"),
TEXT("NonNaniteInstancesDrawn"),
TEXT("NonNaniteInstancesHZBCulled"),
TEXT("NonNaniteInstancesPageMaskCulled"),
TEXT("NonNaniteInstancesEmptyRectCulled"),
TEXT("NonNaniteInstancesFrustumCulled"),
};
// Print header
FString StringToPrint;
for (int32 Index = 0; Index < FVirtualShadowMapArray::NumStats; ++Index)
{
if (!StringToPrint.IsEmpty())
{
StringToPrint += TEXT(",");
}
if (Index < int32(UE_ARRAY_COUNT(StatNames)))
{
StringToPrint.Append(StatNames[Index]);
}
else
{
StringToPrint.Appendf(TEXT("Stat_%d"), Index);
}
}
StringToPrint += TEXT("\n");
FileToLogTo->Serialize(TCHAR_TO_ANSI(*StringToPrint), StringToPrint.Len());
uint32 Num = Tmp[0];
for (uint32 Ind = 0; Ind < Num; ++Ind)
{
StringToPrint.Empty();
for (uint32 StatInd = 0; StatInd < FVirtualShadowMapArray::NumStats; ++StatInd)
{
if (!StringToPrint.IsEmpty())
{
StringToPrint += TEXT(",");
}
StringToPrint += FString::Printf(TEXT("%d"), Tmp[1 + Ind * FVirtualShadowMapArray::NumStats + StatInd]);
}
StringToPrint += TEXT("\n");
FileToLogTo->Serialize(TCHAR_TO_ANSI(*StringToPrint), StringToPrint.Len());
}
FileToLogTo->Close();
}
}
}
Implement persistent shadow physical page pool - Eliminates the previous frame texture (~128MB in default config) and copy - Cached pages are kept in place, with a quick pass to find and allocate free pages following - Experimented with a persistent free list but for the small numbers of pages we are talking about (2k-4k ish standard), recomputing the free list each frame was cheap and robust - Main cost is synchronization/barriers between the new passes. Fairly low measured overhead compared to the much larger wins, but can revisit later if need be. - Removed a few things that added complexity but were not making a big difference to performance, especially with the changes: - No longer support "panning" the directional light cascade cached pages. With the changes to the snapping and depth range invalidations this was no longer making much of a difference anyways. - No longer do the hierarchical "eliminate parent if all 4 mip children are marked" for local lights. Tested a bit and it was not making a measurable difference in most cases with caching. Can be revived if useful later. - Moved ownership of the now persistent physical pool to CacheManager - A bunch of related cleanup and fixes #rb ola.olsson #preflight 61008a655938f90001f04260 #ROBOMERGE-OWNER: andrew.lauritzen #ROBOMERGE-AUTHOR: andrew.lauritzen #ROBOMERGE-SOURCE: CL 16985509 via CL 16987414 #ROBOMERGE-BOT: STARSHIP (Main -> Release-Engine-Test) (v838-16927207) [CL 16987415 by andrew lauritzen in ue5-release-engine-test branch]
2021-07-28 17:00:37 -04:00
bool FVirtualShadowMapArrayCacheManager::IsValid()
{
return CVarCacheVirtualSMs.GetValueOnRenderThread() != 0
&& PrevBuffers.PageTable
&& PrevBuffers.PageFlags
&& PrevBuffers.PhysicalPageMetaData
&& PrevBuffers.DynamicCasterPageFlags;
}
bool FVirtualShadowMapArrayCacheManager::IsAccumulatingStats()
{
return CVarAccumulateStats.GetValueOnRenderThread() != 0;
}
void FVirtualShadowMapArrayCacheManager::ProcessRemovedOrUpdatedPrimitives(FRDGBuilder& GraphBuilder, const FGPUScene& GPUScene, FInvalidatingPrimitiveCollector& InvalidatingPrimitiveCollector)
{
if (CVarCacheVirtualSMs.GetValueOnRenderThread() != 0 && PrevBuffers.DynamicCasterPageFlags.IsValid())
{
RDG_EVENT_SCOPE(GraphBuilder, "Shadow.Virtual.ProcessRemovedOrUpdatedPrimitives");
ProcessGPUInstanceInvalidations(GraphBuilder, GPUScene);
if (!InvalidatingPrimitiveCollector.IsEmpty())
{
#if VSM_LOG_INVALIDATIONS
UE_LOG(LogTemp, Warning, TEXT("ProcessRemovedOrUpdatedPrimitives: \n%s"), *InvalidatingPrimitiveCollector.RangesStr);
#endif
ProcessInvalidations(GraphBuilder, InvalidatingPrimitiveCollector.LoadBalancer, InvalidatingPrimitiveCollector.TotalInstanceCount, GPUScene);
}
}
}
static void ResizeFlagArray(TBitArray<>& BitArray, int32 NewMax)
{
if (BitArray.Num() > NewMax)
{
// Trim off excess items
BitArray.SetNumUninitialized(NewMax);
}
else if (BitArray.Num() < NewMax)
{
// Add false
BitArray.Add(false, NewMax - BitArray.Num());
}
}
void FVirtualShadowMapArrayCacheManager::OnSceneChange()
{
if (CVarCacheVirtualSMs.GetValueOnRenderThread() != 0)
{
for (auto& CacheEntry : PrevCacheEntries)
{
ResizeFlagArray(CacheEntry.Value->CachedPrimitives, Scene->GetMaxPersistentPrimitiveIndex());
ResizeFlagArray(CacheEntry.Value->RenderedPrimitives, Scene->GetMaxPersistentPrimitiveIndex());
}
for (auto& CacheEntry : CacheEntries)
{
ResizeFlagArray(CacheEntry.Value->CachedPrimitives, Scene->GetMaxPersistentPrimitiveIndex());
ResizeFlagArray(CacheEntry.Value->RenderedPrimitives, Scene->GetMaxPersistentPrimitiveIndex());
}
}
}
/**
* Compute shader to project and invalidate the rectangles of given instances.
*/
class FVirtualSmInvalidateInstancePagesCS : public FGlobalShader
{
DECLARE_GLOBAL_SHADER(FVirtualSmInvalidateInstancePagesCS);
SHADER_USE_PARAMETER_STRUCT(FVirtualSmInvalidateInstancePagesCS, FGlobalShader)
enum EInputDataKind
{
EInputKind_GPUInstances,
EInputKind_LoadBalancer,
EInputKind_Num
};
class FDebugDim : SHADER_PERMUTATION_BOOL("ENABLE_DEBUG_MODE");
class FInputKindDim : SHADER_PERMUTATION_INT("INPUT_KIND", EInputKind_Num);
class FUseHzbDim : SHADER_PERMUTATION_BOOL("USE_HZB_OCCLUSION");
using FPermutationDomain = TShaderPermutationDomain<FUseHzbDim, FDebugDim, FInputKindDim>;
public:
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
SHADER_PARAMETER_RDG_UNIFORM_BUFFER(FVirtualShadowMapUniformParameters, VirtualShadowMap)
SHADER_PARAMETER_STRUCT_INCLUDE(ShaderPrint::FShaderParameters, ShaderPrintUniformBuffer)
SHADER_PARAMETER(uint32, bDrawBounds)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWStructuredBuffer< uint >, OutDynamicCasterPageFlags)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, GPUSceneInstanceSceneData)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, GPUSceneInstancePayloadData)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<float4>, GPUScenePrimitiveSceneData)
SHADER_PARAMETER(uint32, GPUSceneFrameNumber)
SHADER_PARAMETER(uint32, InstanceSceneDataSOAStride)
SHADER_PARAMETER(uint32, GPUSceneNumAllocatedInstances)
SHADER_PARAMETER(uint32, GPUSceneNumAllocatedPrimitives)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< uint >, HZBPageTable)
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer< uint4 >, HZBPageRectBounds)
SHADER_PARAMETER_RDG_TEXTURE(Texture2D, HZBTexture)
SHADER_PARAMETER_SAMPLER(SamplerState, HZBSampler)
SHADER_PARAMETER( FVector2f, HZBSize )
SHADER_PARAMETER_RDG_BUFFER_SRV(StructuredBuffer<uint>, InvalidatingInstances)
SHADER_PARAMETER(uint32, NumInvalidatingInstanceSlots)
RDG_BUFFER_ACCESS(IndirectArgs, ERHIAccess::IndirectArgs)
SHADER_PARAMETER_STRUCT_INCLUDE(FGPUScene::FInstanceGPULoadBalancer::FShaderParameters, LoadBalancerParameters)
END_SHADER_PARAMETER_STRUCT()
static constexpr int Cs1dGroupSizeX = FVirtualShadowMapArrayCacheManager::FInstanceGPULoadBalancer::ThreadGroupSize;
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
return IsFeatureLevelSupported(Parameters.Platform, ERHIFeatureLevel::SM5) &&
DoesPlatformSupportNanite(Parameters.Platform);
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
FVirtualShadowMapArray::SetShaderDefines(OutEnvironment);
OutEnvironment.SetDefine(TEXT("CS_1D_GROUP_SIZE_X"), Cs1dGroupSizeX);
OutEnvironment.SetDefine(TEXT("USE_GLOBAL_GPU_SCENE_DATA"), 1);
OutEnvironment.SetDefine(TEXT("VF_SUPPORTS_PRIMITIVE_SCENE_DATA"), 1);
OutEnvironment.SetDefine(TEXT("INPUT_KIND_GPU_INSTANCES"), EInputKind_GPUInstances);
OutEnvironment.SetDefine(TEXT("INPUT_KIND_LOAD_BALANCER"), EInputKind_LoadBalancer);
FGPUScene::FInstanceGPULoadBalancer::SetShaderDefines(OutEnvironment);
}
};
IMPLEMENT_GLOBAL_SHADER(FVirtualSmInvalidateInstancePagesCS, "/Engine/Private/VirtualShadowMaps/VirtualShadowMapCacheManagement.usf", "VirtualSmInvalidateInstancePagesCS", SF_Compute);
TRDGUniformBufferRef<FVirtualShadowMapUniformParameters> FVirtualShadowMapArrayCacheManager::GetPreviousUniformBuffer(FRDGBuilder& GraphBuilder) const
{
FVirtualShadowMapUniformParameters* VersionedParameters = GraphBuilder.AllocParameters<FVirtualShadowMapUniformParameters>();
*VersionedParameters = PrevUniformParameters;
return GraphBuilder.CreateUniformBuffer(VersionedParameters);
}
void FVirtualShadowMapArrayCacheManager::SetHZBViewParams(int32 HZBKey, Nanite::FPackedViewParams& OutParams)
{
FVirtualShadowMapHZBMetadata* PrevHZBMeta = PrevBuffers.HZBMetadata.Find(HZBKey);
if (PrevHZBMeta)
{
OutParams.PrevTargetLayerIndex = PrevHZBMeta->TargetLayerIndex;
OutParams.PrevViewMatrices = PrevHZBMeta->ViewMatrices;
Removed a massive number of Nanite rasterizer shader permutations across all platforms/shaderdbs, significantly improving iteration times for the editor and cooker, especially when these numbers get multiplied by the number of materials that utilize programmable features in addition to the default material "fixed function" path. Reductions *per material*: SM5 -- FHWRasterizeVS: 832 -> 21 FHWRasterizePS: 104 -> 39 SM6 -- FHWRasterizeVS: 320 -> 9 FHWRasterizeMS: 640 -> 9 FHWRasterizePS: 120 -> 30 Vulkan -- FHWRasterizeVS: 320 -> 9 FHWRasterizePS: 40 -> 15 Other platforms redacted =) -- Details * CLUSTER_PER_PAGE has been fully removed (since we no longer ever run CLUSTER_PER_PAGE=0), which now makes it mutually inclusive with VIRTUAL_TEXTURE_TARGET * HAS_RASTER_BIN has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load * ADD_CLUSTER_OFFSET has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load * HAS_PREV_DRAW_DATA has been replaced with a dynamic branch, since this is just a per cluster index offset based on a simple uniform buffer load * NEAR_CLIP (only change to significantly affect codegen) has been turned into a dynamic branch based on FNaniteView - this lets us merge depth clip/clamp rasterizer calls in VSM together instead of relying on HAS_PREV_DRAW_DATA, and a future optimization can now be done to merge local and directional light full Nanite pipeline calls together. * VISUALIZE permutation removed from VS/MS since it only loaded unform values that passed down per-vertex into fragment stage as nointerpolation parameters. Pixel shader now constructs this uint2 directly under the VISUALIZE permutation * NANITE_MESH_SHADER_INTERP removed by default but still left in the code, since it is a work in progress potential optimization for DX12 mesh shaders * Removed explicit Lumen and VSM usage of NANITE_RENDER_FLAG_HAVE_PREV_DRAW_DATA (now the dynamic branch path is only taken if CullRasterizeMultiPass implicitly breaks the rasterization into multiple calls due to NANITE_MAX_VIEWS_PER_CULL_RASTERIZE_PASS overflow) Performance was tested on a 2080Ti in AncientGame, and the delta is effectively noise (tested cached and uncached VSM). Further testing on other platforms will occur, but important to get this change in for all the benefits and easy to tweak things later if needed. #rb rune.stubbe #fyi brian.karis, ola.olsson, andrew.lauritzen, jamie.hayes, daniel.wright, krzysztof.narkowicz #preflight 622e684c7e2e35638c96a16a #robomerge FNNC [CL 19370372 by graham wihlidal in ue5-main branch]
2022-03-13 23:18:25 -04:00
OutParams.Flags |= NANITE_VIEW_FLAG_HZBTEST;
}
}
static void SetupCommonParameters(FRDGBuilder& GraphBuilder, FVirtualShadowMapArrayCacheManager* CacheManager, int32 TotalInstanceCount, const FGPUScene& GPUScene,
FVirtualSmInvalidateInstancePagesCS::FParameters& OutPassParameters,
FVirtualSmInvalidateInstancePagesCS::FPermutationDomain &OutPermutationVector)
{
auto RegExtCreateSrv = [&GraphBuilder](const TRefCountPtr<FRDGPooledBuffer>& Buffer, const TCHAR* Name) -> FRDGBufferSRVRef
{
return GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(Buffer, Name));
};
const bool bDrawBounds = CVarDrawInvalidatingBounds.GetValueOnRenderThread() != 0;
if (bDrawBounds)
{
ShaderPrint::SetEnabled(true);
ShaderPrint::RequestSpaceForLines(TotalInstanceCount * 12);
}
// Note: this disables the whole debug permutation since the parameters must be bound.
const bool bUseDebugPermutation = bDrawBounds && ShaderPrint::IsDefaultViewEnabled();
FVirtualShadowMapArrayFrameData &PrevBuffers = CacheManager->PrevBuffers;
// Update references in our last frame uniform buffer with reimported resources for this frame
CacheManager->PrevUniformParameters.ProjectionData = RegExtCreateSrv(PrevBuffers.ProjectionData, TEXT("Shadow.Virtual.PrevProjectionData"));
CacheManager->PrevUniformParameters.PageTable = RegExtCreateSrv(PrevBuffers.PageTable, TEXT("Shadow.Virtual.PrevPageTable"));
CacheManager->PrevUniformParameters.PageFlags = RegExtCreateSrv(PrevBuffers.PageFlags, TEXT("Shadow.Virtual.PrevPageFlags"));
CacheManager->PrevUniformParameters.PageRectBounds = RegExtCreateSrv(PrevBuffers.PageRectBounds, TEXT("Shadow.Virtual.PrevPageRectBounds"));
// Unused in this path
CacheManager->PrevUniformParameters.PhysicalPagePool = GSystemTextures.GetZeroUIntArrayDummy(GraphBuilder);
OutPassParameters.VirtualShadowMap = CacheManager->GetPreviousUniformBuffer(GraphBuilder);
FRDGBufferRef DynamicCasterPageFlagsRDG = GraphBuilder.RegisterExternalBuffer(PrevBuffers.DynamicCasterPageFlags, TEXT("Shadow.Virtual.PrevDynamicCasterFlags"));
OutPassParameters.OutDynamicCasterPageFlags = GraphBuilder.CreateUAV(DynamicCasterPageFlagsRDG);
OutPassParameters.GPUSceneInstanceSceneData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(GPUScene.InstanceSceneDataBuffer));
OutPassParameters.GPUScenePrimitiveSceneData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(GPUScene.PrimitiveBuffer));
OutPassParameters.GPUSceneInstancePayloadData = GraphBuilder.CreateSRV(GraphBuilder.RegisterExternalBuffer(GPUScene.InstancePayloadDataBuffer));
OutPassParameters.GPUSceneFrameNumber = GPUScene.GetSceneFrameNumber();
OutPassParameters.GPUSceneNumAllocatedInstances = GPUScene.GetNumInstances();
OutPassParameters.GPUSceneNumAllocatedPrimitives = GPUScene.GetNumPrimitives();
OutPassParameters.InstanceSceneDataSOAStride = GPUScene.InstanceSceneDataSOAStride;
OutPassParameters.bDrawBounds = bDrawBounds;
if (bUseDebugPermutation)
{
ShaderPrint::SetParameters(GraphBuilder, OutPassParameters.ShaderPrintUniformBuffer);
}
const bool bUseHZB = (CVarCacheVsmUseHzb.GetValueOnRenderThread() != 0);
const TRefCountPtr<IPooledRenderTarget> PrevHZBPhysical = bUseHZB ? PrevBuffers.HZBPhysical : nullptr;
if (PrevHZBPhysical)
{
// Same, since we are not producing a new frame just yet
OutPassParameters.HZBPageTable = CacheManager->PrevUniformParameters.PageTable;
OutPassParameters.HZBPageRectBounds = CacheManager->PrevUniformParameters.PageRectBounds;
OutPassParameters.HZBTexture = GraphBuilder.RegisterExternalTexture(PrevHZBPhysical);
OutPassParameters.HZBSize = PrevHZBPhysical->GetDesc().Extent;
OutPassParameters.HZBSampler = TStaticSamplerState< SF_Point, AM_Clamp, AM_Clamp, AM_Clamp >::GetRHI();
}
OutPermutationVector.Set<FVirtualSmInvalidateInstancePagesCS::FDebugDim>(bUseDebugPermutation);
OutPermutationVector.Set<FVirtualSmInvalidateInstancePagesCS::FUseHzbDim>(PrevHZBPhysical != nullptr);
}
void FVirtualShadowMapArrayCacheManager::ProcessInvalidations(FRDGBuilder& GraphBuilder, FInstanceGPULoadBalancer& Instances, int32 TotalInstanceCount, const FGPUScene& GPUScene)
{
if (Instances.IsEmpty())
{
return;
}
Instances.FinalizeBatches();
RDG_EVENT_SCOPE(GraphBuilder, "ProcessInvalidations [%d batches]", Instances.GetBatches().Num());
FVirtualSmInvalidateInstancePagesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FVirtualSmInvalidateInstancePagesCS::FParameters>();
FVirtualSmInvalidateInstancePagesCS::FPermutationDomain PermutationVector;
SetupCommonParameters(GraphBuilder, this, TotalInstanceCount, GPUScene, *PassParameters, PermutationVector);
Instances.Upload(GraphBuilder).GetShaderParameters(GraphBuilder, PassParameters->LoadBalancerParameters);
PermutationVector.Set<FVirtualSmInvalidateInstancePagesCS::FInputKindDim>(FVirtualSmInvalidateInstancePagesCS::EInputKind_LoadBalancer);
auto ComputeShader = GetGlobalShaderMap(GMaxRHIFeatureLevel)->GetShader<FVirtualSmInvalidateInstancePagesCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("VirtualSmInvalidateInstancePagesCS"),
ComputeShader,
PassParameters,
Instances.GetWrappedCsGroupCount()
);
}
void FVirtualShadowMapArrayCacheManager::ProcessGPUInstanceInvalidations(FRDGBuilder& GraphBuilder, const FGPUScene& GPUScene)
{
// Dispatch CS indirectly to process instances that are marked to update from the GPU side.
if (PrevBuffers.InvalidatingInstancesBuffer.IsValid())
{
RDG_EVENT_SCOPE(GraphBuilder, "ProcessGPUInstanceInvalidations [GPU-Instances]");
FRDGBufferRef InvalidatingInstancesBufferRDG = GraphBuilder.RegisterExternalBuffer(PrevBuffers.InvalidatingInstancesBuffer, TEXT("Shadow.Virtual.PrevInvalidatingInstancesBuffer"));
FRDGBufferRef IndirectArgs = FComputeShaderUtils::AddIndirectArgsSetupCsPass1D(GraphBuilder, InvalidatingInstancesBufferRDG, TEXT("Shadow.Virtual.ProcessGPUInstanceInvalidationsIndirectArgs"), FVirtualSmInvalidateInstancePagesCS::Cs1dGroupSizeX);
FVirtualSmInvalidateInstancePagesCS::FPermutationDomain PermutationVector;
FVirtualSmInvalidateInstancePagesCS::FParameters* PassParameters = GraphBuilder.AllocParameters<FVirtualSmInvalidateInstancePagesCS::FParameters>();
SetupCommonParameters(GraphBuilder, this, 16*1024, GPUScene, *PassParameters, PermutationVector);
PassParameters->IndirectArgs = IndirectArgs;
PassParameters->InvalidatingInstances = GraphBuilder.CreateSRV(InvalidatingInstancesBufferRDG);
PassParameters->NumInvalidatingInstanceSlots = PrevBuffers.NumInvalidatingInstanceSlots;
PermutationVector.Set<FVirtualSmInvalidateInstancePagesCS::FInputKindDim>(FVirtualSmInvalidateInstancePagesCS::EInputKind_GPUInstances);
auto ComputeShader = GetGlobalShaderMap(GMaxRHIFeatureLevel)->GetShader<FVirtualSmInvalidateInstancePagesCS>(PermutationVector);
FComputeShaderUtils::AddPass(
GraphBuilder,
RDG_EVENT_NAME("VirtualSmInvalidateInstancePagesCS"),
ComputeShader,
PassParameters,
IndirectArgs,
0
);
// Drop the InvalidatingInstancesBuffer to make sure we don't redundantly process the associated invalidations if ProcessRemovedOrUpdatedPrimitives is called multiple times.
PrevBuffers.InvalidatingInstancesBuffer.SafeRelease();
}
}