Files
UnrealEngineUWP/Engine/Source/Runtime/RenderCore/Private/RenderTargetPool.cpp
Luke Thatcher ee64a28f30 Copying //UE5/Dev-ParallelRendering to //UE5/Main
This change is the copy-up of work from //UE5/Dev-ParallelRendering to unify the various RHI texture types
 - The separate RHI texture types (FRHITexture2D, FRHITexture2DArray, FRHITexture3D, FRHITextureCube, FRHITextureCubeArray) have been replaced with a single type: FRHITexture.
 - Includes the required refactoring / fixes for the various platform RHIs.

The old texture types are still supported via typedefs, but will soon be marked deprecated. Future rendering code should make use of the FRHITexture type instead.

#jira UE-139559
#rb Mihnea.Balta
#preflight 622f3af819287beb2c3047f6
#robomerge[FN_ENGINEMERGE] DEV-NC

[CL 19371962 by Luke Thatcher in ue5-main branch]
2022-03-14 09:14:58 -04:00

590 lines
16 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "RenderTargetPool.h"
#include "RHIStaticStates.h"
#include "Misc/OutputDeviceRedirector.h"
#include "Hash/CityHash.h"
/** The global render targets pool. */
TGlobalResource<FRenderTargetPool> GRenderTargetPool;
DEFINE_LOG_CATEGORY_STATIC(LogRenderTargetPool, Warning, All);
CSV_DEFINE_CATEGORY(RenderTargetPool, !UE_SERVER);
TRefCountPtr<IPooledRenderTarget> CreateRenderTarget(FRHITexture* Texture, const TCHAR* Name)
{
check(Texture);
FSceneRenderTargetItem Item;
Item.TargetableTexture = Texture;
Item.ShaderResourceTexture = Texture;
FPooledRenderTargetDesc Desc = Translate(Texture->GetDesc());
Desc.DebugName = Name;
TRefCountPtr<IPooledRenderTarget> PooledRenderTarget;
GRenderTargetPool.CreateUntrackedElement(Desc, PooledRenderTarget, Item);
return MoveTemp(PooledRenderTarget);
}
bool CacheRenderTarget(FRHITexture* Texture, const TCHAR* Name, TRefCountPtr<IPooledRenderTarget>& OutPooledRenderTarget)
{
if (!OutPooledRenderTarget || OutPooledRenderTarget->GetRHI() != Texture)
{
OutPooledRenderTarget = CreateRenderTarget(Texture, Name);
return true;
}
return false;
}
RENDERCORE_API void DumpRenderTargetPoolMemory(FOutputDevice& OutputDevice)
{
GRenderTargetPool.DumpMemoryUsage(OutputDevice);
}
static FAutoConsoleCommandWithOutputDevice GDumpRenderTargetPoolMemoryCmd(
TEXT("r.DumpRenderTargetPoolMemory"),
TEXT("Dump allocation information for the render target pool."),
FConsoleCommandWithOutputDeviceDelegate::CreateStatic(DumpRenderTargetPoolMemory)
);
static uint32 ComputeSizeInKB(FPooledRenderTarget& Element)
{
return (Element.ComputeMemorySize() + 1023) / 1024;
}
TRefCountPtr<IPooledRenderTarget> FRenderTargetPool::FindFreeElementInternal(FRHITextureCreateInfo Desc, const TCHAR* Name, bool bResetStateToUnknown)
{
FPooledRenderTarget* Found = 0;
uint32 FoundIndex = -1;
// FastVRAM is no longer supported by the render target pool.
EnumRemoveFlags(Desc.Flags, ETextureCreateFlags::FastVRAM | ETextureCreateFlags::FastVRAMPartialAlloc);
// We always want SRV access
Desc.Flags |= TexCreate_ShaderResource;
const uint32 DescHash = GetTypeHash(Desc);
for (uint32 Index = 0, Num = (uint32)PooledRenderTargets.Num(); Index < Num; ++Index)
{
if (PooledRenderTargetHashes[Index] == DescHash)
{
FPooledRenderTarget* Element = PooledRenderTargets[Index];
#if DO_CHECK
{
checkf(Element, TEXT("Hash was not cleared from the list."));
const FRHITextureCreateInfo ElementDesc = Translate(Element->GetDesc());
checkf(ElementDesc == Desc, TEXT("Invalid hash or collision when attempting to allocate %s"), Element->GetDesc().DebugName);
}
#endif
if (Element->IsFree())
{
Found = Element;
FoundIndex = Index;
break;
}
}
}
if (!Found)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FRenderTargetPool::CreateTexture);
const ERHIAccess AccessInitial = ERHIAccess::SRVMask;
FRHITextureCreateDesc CreateDesc(Desc, AccessInitial, Name);
Found = new FPooledRenderTarget(
RHICreateTexture(CreateDesc),
AccessInitial,
Translate(CreateDesc),
this);
PooledRenderTargets.Add(Found);
PooledRenderTargetHashes.Add(DescHash);
if (EnumHasAnyFlags(Desc.Flags, TexCreate_UAV))
{
// The render target desc is invalid if a UAV is requested with an RHI that doesn't support the high-end feature level.
check(GMaxRHIFeatureLevel >= ERHIFeatureLevel::SM5 || GMaxRHIFeatureLevel == ERHIFeatureLevel::ES3_1);
if (GRHISupportsUAVFormatAliasing)
{
EPixelFormat AliasFormat = Desc.UAVFormat != PF_Unknown
? Desc.UAVFormat
: Desc.Format;
Found->RenderTargetItem.UAV = RHICreateUnorderedAccessView(Found->GetRHI(), 0, (uint8)AliasFormat, 0, 0);
}
else
{
checkf(Desc.UAVFormat == PF_Unknown || Desc.UAVFormat == Desc.Format, TEXT("UAV aliasing is not supported by the current RHI."));
Found->RenderTargetItem.UAV = RHICreateUnorderedAccessView(Found->GetRHI(), 0);
}
}
AllocationLevelInKB += ComputeSizeInKB(*Found);
FoundIndex = PooledRenderTargets.Num() - 1;
Found->Desc.DebugName = Name;
}
Found->Desc.DebugName = Name;
Found->UnusedForNFrames = 0;
if (bResetStateToUnknown)
{
Found->PooledTexture.Reset();
}
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
RHIBindDebugLabelName(Found->GetRHI(), Name);
#endif
return TRefCountPtr<IPooledRenderTarget>(MoveTemp(Found));
}
bool FRenderTargetPool::FindFreeElement(const FRHITextureCreateInfo& Desc, TRefCountPtr<IPooledRenderTarget>& Out, const TCHAR* Name)
{
check(IsInRenderingThread());
if (!Desc.IsValid())
{
// no need to do anything
return true;
}
// Querying a render target that have no mip levels makes no sens.
check(Desc.NumMips > 0);
// if we can keep the current one, do that
if (Out)
{
FPooledRenderTarget* Current = (FPooledRenderTarget*)Out.GetReference();
if (Translate(Out->GetDesc()) == Desc)
{
// we can reuse the same, but the debug name might have changed
Current->Desc.DebugName = Name;
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
if (Current->GetRHI())
{
RHIBindDebugLabelName(Current->GetRHI(), Name);
}
#endif
check(!Out->IsFree());
return true;
}
else
{
// release old reference, it might free a RT we can use
Out = 0;
if (Current->IsFree())
{
AllocationLevelInKB -= ComputeSizeInKB(*Current);
int32 Index = FindIndex(Current);
check(Index >= 0);
FreeElementAtIndex(Index);
}
}
}
const bool bResetStateToUnknown = true;
Out = FindFreeElementInternal(Desc, Name, bResetStateToUnknown);
return false;
}
TRefCountPtr<IPooledRenderTarget> FRenderTargetPool::FindFreeElement(const FRHITextureCreateInfo& Desc, const TCHAR* Name)
{
const bool bResetStateToUnknown = true;
return FindFreeElementInternal(Desc, Name, bResetStateToUnknown);
}
void FRenderTargetPool::CreateUntrackedElement(const FPooledRenderTargetDesc& Desc, TRefCountPtr<IPooledRenderTarget>& Out, const FSceneRenderTargetItem& Item)
{
FPooledRenderTarget* Result = new FPooledRenderTarget(Item.GetRHI(), ERHIAccess::Unknown, Desc, nullptr);
Result->RenderTargetItem = Item;
Out = Result;
}
void FRenderTargetPool::GetStats(uint32& OutWholeCount, uint32& OutWholePoolInKB, uint32& OutUsedInKB) const
{
OutWholeCount = (uint32)PooledRenderTargets.Num();
OutUsedInKB = 0;
OutWholePoolInKB = 0;
for (uint32 i = 0; i < (uint32)PooledRenderTargets.Num(); ++i)
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element)
{
uint32 SizeInKB = ComputeSizeInKB(*Element);
OutWholePoolInKB += SizeInKB;
if (!Element->IsFree())
{
OutUsedInKB += SizeInKB;
}
}
}
// if this triggers uncomment the code in VerifyAllocationLevel() and debug the issue, we might leak memory or not release when we could
ensure(AllocationLevelInKB == OutWholePoolInKB);
}
void FRenderTargetPool::TickPoolElements()
{
uint32 DeferredAllocationLevelInKB = 0;
for (FPooledRenderTarget* Element : DeferredDeleteArray)
{
DeferredAllocationLevelInKB += ComputeSizeInKB(*Element);
}
check(IsInRenderingThread());
DeferredDeleteArray.Reset();
uint32 MinimumPoolSizeInKB;
{
static const auto CVar = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.RenderTargetPoolMin"));
MinimumPoolSizeInKB = FMath::Clamp(CVar->GetValueOnRenderThread(), 0, 2000) * 1024;
}
CompactPool();
uint32 UnusedAllocationLevelInKB = 0;
for (uint32 i = 0; i < (uint32)PooledRenderTargets.Num(); ++i)
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element)
{
Element->OnFrameStart();
if (Element->UnusedForNFrames > 2)
{
UnusedAllocationLevelInKB += ComputeSizeInKB(*Element);
}
}
}
uint32 TotalFrameUsageInKb = AllocationLevelInKB + DeferredAllocationLevelInKB ;
CSV_CUSTOM_STAT(RenderTargetPool, UnusedMB, UnusedAllocationLevelInKB / 1024.0f, ECsvCustomStatOp::Set);
CSV_CUSTOM_STAT(RenderTargetPool, PeakUsedMB, (TotalFrameUsageInKb - UnusedAllocationLevelInKB) / 1024.f, ECsvCustomStatOp::Set);
// we need to release something, take the oldest ones first
while (AllocationLevelInKB > MinimumPoolSizeInKB)
{
// -1: not set
int32 OldestElementIndex = -1;
// find oldest element we can remove
for (uint32 i = 0, Num = (uint32)PooledRenderTargets.Num(); i < Num; ++i)
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element && Element->UnusedForNFrames > 2)
{
if (OldestElementIndex != -1)
{
if (PooledRenderTargets[OldestElementIndex]->UnusedForNFrames < Element->UnusedForNFrames)
{
OldestElementIndex = i;
}
}
else
{
OldestElementIndex = i;
}
}
}
if (OldestElementIndex != -1)
{
AllocationLevelInKB -= ComputeSizeInKB(*PooledRenderTargets[OldestElementIndex]);
// we assume because of reference counting the resource gets released when not needed any more
// we don't use Remove() to not shuffle around the elements for better transparency on RenderTargetPoolEvents
FreeElementAtIndex(OldestElementIndex);
}
else
{
// There is no element we can remove but we are over budget, better we log that.
// Options:
// * Increase the pool
// * Reduce rendering features or resolution
// * Investigate allocations, order or reusing other render targets can help
// * Ignore (editor case, might start using slow memory which can be ok)
if (!bCurrentlyOverBudget)
{
UE_CLOG(IsRunningClientOnly() && MinimumPoolSizeInKB != 0, LogRenderTargetPool, Warning, TEXT("r.RenderTargetPoolMin exceeded %d/%d MB (ok in editor, bad on fixed memory platform)"), (AllocationLevelInKB + 1023) / 1024, MinimumPoolSizeInKB / 1024);
bCurrentlyOverBudget = true;
}
// at this point we need to give up
break;
}
}
if (AllocationLevelInKB <= MinimumPoolSizeInKB)
{
if (bCurrentlyOverBudget)
{
UE_CLOG(MinimumPoolSizeInKB != 0, LogRenderTargetPool, Display, TEXT("r.RenderTargetPoolMin resolved %d/%d MB"), (AllocationLevelInKB + 1023) / 1024, MinimumPoolSizeInKB / 1024);
bCurrentlyOverBudget = false;
}
}
#if STATS
uint32 Count, SizeKB, UsedKB;
GetStats(Count, SizeKB, UsedKB);
CSV_CUSTOM_STAT_GLOBAL(RenderTargetPoolSize, float(SizeKB) / 1024.0f, ECsvCustomStatOp::Set);
CSV_CUSTOM_STAT_GLOBAL(RenderTargetPoolUsed, float(UsedKB) / 1024.0f, ECsvCustomStatOp::Set);
CSV_CUSTOM_STAT_GLOBAL(RenderTargetPoolCount, int32(Count), ECsvCustomStatOp::Set);
SET_MEMORY_STAT(STAT_RenderTargetPoolSize, int64(SizeKB) * 1024ll);
SET_MEMORY_STAT(STAT_RenderTargetPoolUsed, int64(UsedKB) * 1024ll);
SET_DWORD_STAT(STAT_RenderTargetPoolCount, Count);
#endif // STATS
}
int32 FRenderTargetPool::FindIndex(IPooledRenderTarget* In) const
{
check(IsInRenderingThread());
if (In)
{
for (uint32 i = 0, Num = (uint32)PooledRenderTargets.Num(); i < Num; ++i)
{
const FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element == In)
{
return i;
}
}
}
// not found
return -1;
}
void FRenderTargetPool::FreeElementAtIndex(int32 Index)
{
// we don't use Remove() to not shuffle around the elements for better transparency on RenderTargetPoolEvents
PooledRenderTargets[Index] = 0;
PooledRenderTargetHashes[Index] = 0;
}
void FRenderTargetPool::FreeUnusedResource(TRefCountPtr<IPooledRenderTarget>& In)
{
check(IsInRenderingThread());
int32 Index = FindIndex(In);
if (Index != -1)
{
FPooledRenderTarget* Element = PooledRenderTargets[Index];
// Ref count will always be at least 2
ensure(Element->GetRefCount() >= 2);
In = nullptr;
if (Element->IsFree())
{
AllocationLevelInKB -= ComputeSizeInKB(*Element);
// we assume because of reference counting the resource gets released when not needed any more
DeferredDeleteArray.Add(PooledRenderTargets[Index]);
FreeElementAtIndex(Index);
}
}
}
void FRenderTargetPool::FreeUnusedResources()
{
check(IsInRenderingThread());
for (uint32 i = 0, Num = (uint32)PooledRenderTargets.Num(); i < Num; ++i)
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element && Element->IsFree())
{
AllocationLevelInKB -= ComputeSizeInKB(*Element);
// we assume because of reference counting the resource gets released when not needed any more
// we don't use Remove() to not shuffle around the elements for better transparency on RenderTargetPoolEvents
DeferredDeleteArray.Add(PooledRenderTargets[i]);
FreeElementAtIndex(i);
}
}
}
void FRenderTargetPool::DumpMemoryUsage(FOutputDevice& OutputDevice)
{
uint32 UnusedAllocationInKB = 0;
OutputDevice.Logf(TEXT("Pooled Render Targets:"));
for (int32 i = 0; i < PooledRenderTargets.Num(); ++i)
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (Element)
{
uint32 ElementAllocationInKB = ComputeSizeInKB(*Element);
if (Element->UnusedForNFrames > 2)
{
UnusedAllocationInKB += ElementAllocationInKB;
}
OutputDevice.Logf(
TEXT(" %6.3fMB %4dx%4d%s%s %2dmip(s) %s (%s) Unused frames: %d"),
ElementAllocationInKB / 1024.0f,
Element->Desc.Extent.X,
Element->Desc.Extent.Y,
Element->Desc.Depth > 1 ? *FString::Printf(TEXT("x%3d"), Element->Desc.Depth) : (Element->Desc.IsCubemap() ? TEXT("cube") : TEXT(" ")),
Element->Desc.bIsArray ? *FString::Printf(TEXT("[%3d]"), Element->Desc.ArraySize) : TEXT(" "),
Element->Desc.NumMips,
Element->Desc.DebugName,
GPixelFormats[Element->Desc.Format].Name,
Element->UnusedForNFrames
);
}
}
uint32 NumTargets = 0;
uint32 UsedKB = 0;
uint32 PoolKB = 0;
GetStats(NumTargets, PoolKB, UsedKB);
OutputDevice.Logf(TEXT("%.3fMB total, %.3fMB used, %.3fMB unused, %d render targets"), PoolKB / 1024.f, UsedKB / 1024.f, UnusedAllocationInKB / 1024.f, NumTargets);
uint32 DeferredTotal = 0;
OutputDevice.Logf(TEXT("Deferred Render Targets:"));
for (int32 i = 0; i < DeferredDeleteArray.Num(); ++i)
{
FPooledRenderTarget* Element = DeferredDeleteArray[i];
if (Element)
{
OutputDevice.Logf(
TEXT(" %6.3fMB %4dx%4d%s%s %2dmip(s) %s (%s)"),
ComputeSizeInKB(*Element) / 1024.0f,
Element->Desc.Extent.X,
Element->Desc.Extent.Y,
Element->Desc.Depth > 1 ? *FString::Printf(TEXT("x%3d"), Element->Desc.Depth) : (Element->Desc.IsCubemap() ? TEXT("cube") : TEXT(" ")),
Element->Desc.bIsArray ? *FString::Printf(TEXT("[%3d]"), Element->Desc.ArraySize) : TEXT(" "),
Element->Desc.NumMips,
Element->Desc.DebugName,
GPixelFormats[Element->Desc.Format].Name
);
uint32 SizeInKB = ComputeSizeInKB(*Element);
DeferredTotal += SizeInKB;
}
}
OutputDevice.Logf(TEXT("%.3fMB Deferred total"), DeferredTotal / 1024.f);
}
void FRenderTargetPool::ReleaseDynamicRHI()
{
check(IsInRenderingThread());
DeferredDeleteArray.Empty();
PooledRenderTargets.Empty();
}
// for debugging purpose
FPooledRenderTarget* FRenderTargetPool::GetElementById(uint32 Id) const
{
// is used in game and render thread
if (Id >= (uint32)PooledRenderTargets.Num())
{
return 0;
}
return PooledRenderTargets[Id];
}
void FRenderTargetPool::CompactPool()
{
for (uint32 i = 0, Num = (uint32)PooledRenderTargets.Num(); i < Num; )
{
FPooledRenderTarget* Element = PooledRenderTargets[i];
if (!Element)
{
PooledRenderTargets.RemoveAtSwap(i);
PooledRenderTargetHashes.RemoveAtSwap(i);
--Num;
}
else
{
++i;
}
}
}
bool FPooledRenderTarget::OnFrameStart()
{
check(IsInRenderingThread());
// If there are any references to the pooled render target other than the pool itself, then it may not be freed.
if (!IsFree())
{
check(!UnusedForNFrames);
return false;
}
++UnusedForNFrames;
// this logic can be improved
if (UnusedForNFrames > 10)
{
// release
return true;
}
return false;
}
uint32 FPooledRenderTarget::ComputeMemorySize() const
{
uint32 Size = 0;
if (Desc.Is2DTexture())
{
Size += RHIComputeMemorySize(RenderTargetItem.TargetableTexture);
if (RenderTargetItem.ShaderResourceTexture != RenderTargetItem.TargetableTexture)
{
Size += RHIComputeMemorySize(RenderTargetItem.ShaderResourceTexture);
}
}
else if (Desc.Is3DTexture())
{
Size += RHIComputeMemorySize(RenderTargetItem.TargetableTexture);
if (RenderTargetItem.ShaderResourceTexture != RenderTargetItem.TargetableTexture)
{
Size += RHIComputeMemorySize(RenderTargetItem.ShaderResourceTexture);
}
}
else
{
Size += RHIComputeMemorySize(RenderTargetItem.TargetableTexture);
if (RenderTargetItem.ShaderResourceTexture != RenderTargetItem.TargetableTexture)
{
Size += RHIComputeMemorySize(RenderTargetItem.ShaderResourceTexture);
}
}
return Size;
}
bool FPooledRenderTarget::IsFree() const
{
uint32 RefCount = GetRefCount();
check(RefCount >= 1);
// If the only reference to the pooled render target is from the pool, then it's unused.
return RefCount == 1;
}