Files
UnrealEngineUWP/Engine/Source/Runtime/RenderCore/Private/RenderGraphBuilder.cpp
zach bethel 003552b730 Deprecated non-command list RHI methods.
- RHICreate{Vertex, Index, Structured}Buffer
 - RHICreate{ShaderResource, UnorderedAccess}View
 - RHIUpdateUniformBuffer
 - Various initialization / locking methods for helper buffer types in RHIUtilities.h

The goal is to continue to force resource creation through command lists to avoid surprises with moving things off the render thread.

#rb christopher.waters

[CL 26183746 by zach bethel in 5.3 branch]
2023-06-22 11:25:09 -04:00

4149 lines
128 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "RenderGraphBuilder.h"
#include "RenderGraphPrivate.h"
#include "RenderGraphTrace.h"
#include "RenderGraphUtils.h"
#include "RenderTargetPool.h"
#include "RenderGraphResourcePool.h"
#include "VisualizeTexture.h"
#include "ProfilingDebugging/CsvProfiler.h"
#include "Async/ParallelFor.h"
#if ENABLE_RHI_VALIDATION
inline void GatherPassUAVsForOverlapValidation(const FRDGPass* Pass, TArray<FRHIUnorderedAccessView*, TInlineAllocator<MaxSimultaneousUAVs, FRDGArrayAllocator>>& OutUAVs)
{
// RHI validation tracking of Begin/EndUAVOverlaps happens on the underlying resource, so we need to be careful about not
// passing multiple UAVs that refer to the same resource, otherwise we get double-Begin and double-End validation errors.
// Filter UAVs to only those with unique parent resources.
TArray<FRDGViewableResource*, TInlineAllocator<MaxSimultaneousUAVs, FRDGArrayAllocator>> UniqueParents;
Pass->GetParameters().Enumerate([&](FRDGParameter Parameter)
{
if (Parameter.IsUAV())
{
if (FRDGUnorderedAccessViewRef UAV = Parameter.GetAsUAV())
{
FRDGViewableResource* Parent = UAV->GetParent();
// Check if we've already seen this parent.
bool bFound = false;
for (int32 Index = 0; !bFound && Index < UniqueParents.Num(); ++Index)
{
bFound = UniqueParents[Index] == Parent;
}
if (!bFound)
{
UniqueParents.Add(Parent);
OutUAVs.Add(UAV->GetRHI());
}
}
}
});
}
#endif
struct FParallelPassSet : public FRHICommandListImmediate::FQueuedCommandList
{
FParallelPassSet() = default;
TArray<FRDGPass*, FRDGArrayAllocator> Passes;
IF_RHI_WANT_BREADCRUMB_EVENTS(FRDGBreadcrumbState* BreadcrumbStateBegin{});
IF_RHI_WANT_BREADCRUMB_EVENTS(FRDGBreadcrumbState* BreadcrumbStateEnd{});
int8 bInitialized = 0;
bool bDispatchAfterExecute = false;
bool bParallelTranslate = false;
};
inline void BeginUAVOverlap(const FRDGPass* Pass, FRHIComputeCommandList& RHICmdList)
{
#if ENABLE_RHI_VALIDATION
TArray<FRHIUnorderedAccessView*, TInlineAllocator<MaxSimultaneousUAVs, FRDGArrayAllocator>> UAVs;
GatherPassUAVsForOverlapValidation(Pass, UAVs);
if (UAVs.Num())
{
RHICmdList.BeginUAVOverlap(UAVs);
}
#endif
}
inline void EndUAVOverlap(const FRDGPass* Pass, FRHIComputeCommandList& RHICmdList)
{
#if ENABLE_RHI_VALIDATION
TArray<FRHIUnorderedAccessView*, TInlineAllocator<MaxSimultaneousUAVs, FRDGArrayAllocator>> UAVs;
GatherPassUAVsForOverlapValidation(Pass, UAVs);
if (UAVs.Num())
{
RHICmdList.EndUAVOverlap(UAVs);
}
#endif
}
inline ERHIAccess MakeValidAccess(ERHIAccess AccessOld, ERHIAccess AccessNew)
{
const ERHIAccess AccessUnion = AccessOld | AccessNew;
const ERHIAccess NonMergeableAccessMask = ~GRHIMergeableAccessMask;
// Return the union of new and old if they are okay to merge.
if (!EnumHasAnyFlags(AccessUnion, NonMergeableAccessMask))
{
return IsWritableAccess(AccessUnion) ? (AccessUnion & ~ERHIAccess::ReadOnlyExclusiveMask) : AccessUnion;
}
// Keep the old one if it can't be merged.
if (EnumHasAnyFlags(AccessOld, NonMergeableAccessMask))
{
return AccessOld;
}
// Replace with the new one if it can't be merged.
return AccessNew;
}
inline void GetPassAccess(ERDGPassFlags PassFlags, ERHIAccess& SRVAccess, ERHIAccess& UAVAccess)
{
SRVAccess = ERHIAccess::Unknown;
UAVAccess = ERHIAccess::Unknown;
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::Raster))
{
SRVAccess |= ERHIAccess::SRVGraphics;
UAVAccess |= ERHIAccess::UAVGraphics;
}
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::AsyncCompute | ERDGPassFlags::Compute))
{
SRVAccess |= ERHIAccess::SRVCompute;
UAVAccess |= ERHIAccess::UAVCompute;
}
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::Copy))
{
SRVAccess |= ERHIAccess::CopySrc;
}
}
enum class ERDGTextureAccessFlags
{
None = 0,
// Access is within the fixed-function render pass.
RenderTarget = 1 << 0
};
ENUM_CLASS_FLAGS(ERDGTextureAccessFlags);
/** Enumerates all texture accesses and provides the access and subresource range info. This results in
* multiple invocations of the same resource, but with different access / subresource range.
*/
template <typename TAccessFunction>
void EnumerateTextureAccess(FRDGParameterStruct PassParameters, ERDGPassFlags PassFlags, TAccessFunction AccessFunction)
{
const ERDGTextureAccessFlags NoneFlags = ERDGTextureAccessFlags::None;
ERHIAccess SRVAccess, UAVAccess;
GetPassAccess(PassFlags, SRVAccess, UAVAccess);
PassParameters.EnumerateTextures([&](FRDGParameter Parameter)
{
switch (Parameter.GetType())
{
case UBMT_RDG_TEXTURE:
if (FRDGTextureRef Texture = Parameter.GetAsTexture())
{
AccessFunction(nullptr, Texture, SRVAccess, NoneFlags, Texture->GetSubresourceRangeSRV());
}
break;
case UBMT_RDG_TEXTURE_ACCESS:
{
if (FRDGTextureAccess TextureAccess = Parameter.GetAsTextureAccess())
{
AccessFunction(nullptr, TextureAccess.GetTexture(), TextureAccess.GetAccess(), NoneFlags, TextureAccess->GetSubresourceRange());
}
}
break;
case UBMT_RDG_TEXTURE_ACCESS_ARRAY:
{
const FRDGTextureAccessArray& TextureAccessArray = Parameter.GetAsTextureAccessArray();
for (FRDGTextureAccess TextureAccess : TextureAccessArray)
{
AccessFunction(nullptr, TextureAccess.GetTexture(), TextureAccess.GetAccess(), NoneFlags, TextureAccess->GetSubresourceRange());
}
}
break;
case UBMT_RDG_TEXTURE_SRV:
if (FRDGTextureSRVRef SRV = Parameter.GetAsTextureSRV())
{
AccessFunction(SRV, SRV->GetParent(), SRVAccess, NoneFlags, SRV->GetSubresourceRange());
}
break;
case UBMT_RDG_TEXTURE_UAV:
if (FRDGTextureUAVRef UAV = Parameter.GetAsTextureUAV())
{
AccessFunction(UAV, UAV->GetParent(), UAVAccess, NoneFlags, UAV->GetSubresourceRange());
}
break;
case UBMT_RENDER_TARGET_BINDING_SLOTS:
{
const ERDGTextureAccessFlags RenderTargetAccess = ERDGTextureAccessFlags::RenderTarget;
const ERHIAccess RTVAccess = ERHIAccess::RTV;
const FRenderTargetBindingSlots& RenderTargets = Parameter.GetAsRenderTargetBindingSlots();
RenderTargets.Enumerate([&](FRenderTargetBinding RenderTarget)
{
FRDGTextureRef Texture = RenderTarget.GetTexture();
FRDGTextureRef ResolveTexture = RenderTarget.GetResolveTexture();
FRDGTextureSubresourceRange Range(Texture->GetSubresourceRange());
Range.MipIndex = RenderTarget.GetMipIndex();
Range.NumMips = 1;
if (RenderTarget.GetArraySlice() != -1)
{
Range.ArraySlice = RenderTarget.GetArraySlice();
Range.NumArraySlices = 1;
}
AccessFunction(nullptr, Texture, RTVAccess, RenderTargetAccess, Range);
if (ResolveTexture && ResolveTexture != Texture)
{
// Resolve targets must use the RTV|ResolveDst flag combination when the resolve is performed through the render
// pass. The ResolveDst flag must be used alone only when the resolve is performed using RHICopyToResolveTarget.
AccessFunction(nullptr, ResolveTexture, ERHIAccess::RTV | ERHIAccess::ResolveDst, RenderTargetAccess, Range);
}
});
const FDepthStencilBinding& DepthStencil = RenderTargets.DepthStencil;
if (FRDGTextureRef Texture = DepthStencil.GetTexture())
{
DepthStencil.GetDepthStencilAccess().EnumerateSubresources([&](ERHIAccess NewAccess, uint32 PlaneSlice)
{
FRDGTextureSubresourceRange Range = Texture->GetSubresourceRange();
// Adjust the range to use a single plane slice if not using of them all.
if (PlaneSlice != FRHITransitionInfo::kAllSubresources)
{
Range.PlaneSlice = PlaneSlice;
Range.NumPlaneSlices = 1;
}
AccessFunction(nullptr, Texture, NewAccess, RenderTargetAccess, Range);
});
}
if (FRDGTextureRef Texture = RenderTargets.ShadingRateTexture)
{
AccessFunction(nullptr, Texture, ERHIAccess::ShadingRateSource, RenderTargetAccess, Texture->GetSubresourceRangeSRV());
}
}
break;
}
});
}
/** Enumerates all buffer accesses and provides the access info. */
template <typename TAccessFunction>
void EnumerateBufferAccess(FRDGParameterStruct PassParameters, ERDGPassFlags PassFlags, TAccessFunction AccessFunction)
{
ERHIAccess SRVAccess, UAVAccess;
GetPassAccess(PassFlags, SRVAccess, UAVAccess);
PassParameters.EnumerateBuffers([&](FRDGParameter Parameter)
{
switch (Parameter.GetType())
{
case UBMT_RDG_BUFFER_ACCESS:
if (FRDGBufferAccess BufferAccess = Parameter.GetAsBufferAccess())
{
AccessFunction(nullptr, BufferAccess.GetBuffer(), BufferAccess.GetAccess());
}
break;
case UBMT_RDG_BUFFER_ACCESS_ARRAY:
{
const FRDGBufferAccessArray& BufferAccessArray = Parameter.GetAsBufferAccessArray();
for (FRDGBufferAccess BufferAccess : BufferAccessArray)
{
AccessFunction(nullptr, BufferAccess.GetBuffer(), BufferAccess.GetAccess());
}
}
break;
case UBMT_RDG_BUFFER_SRV:
if (FRDGBufferSRVRef SRV = Parameter.GetAsBufferSRV())
{
FRDGBufferRef Buffer = SRV->GetParent();
ERHIAccess BufferAccess = SRVAccess;
if (EnumHasAnyFlags(Buffer->Desc.Usage, BUF_AccelerationStructure))
{
BufferAccess = ERHIAccess::BVHRead | ERHIAccess::SRVMask;
}
AccessFunction(SRV, Buffer, BufferAccess);
}
break;
case UBMT_RDG_BUFFER_UAV:
if (FRDGBufferUAVRef UAV = Parameter.GetAsBufferUAV())
{
AccessFunction(UAV, UAV->GetParent(), UAVAccess);
}
break;
}
});
}
inline FRDGViewHandle GetHandleIfNoUAVBarrier(FRDGViewRef Resource)
{
if (Resource && (Resource->Type == ERDGViewType::BufferUAV || Resource->Type == ERDGViewType::TextureUAV))
{
if (EnumHasAnyFlags(static_cast<FRDGUnorderedAccessViewRef>(Resource)->Flags, ERDGUnorderedAccessViewFlags::SkipBarrier))
{
return Resource->GetHandle();
}
}
return FRDGViewHandle::Null;
}
inline EResourceTransitionFlags GetTextureViewTransitionFlags(FRDGViewRef Resource, FRDGTextureRef Texture)
{
if (Resource)
{
switch (Resource->Type)
{
case ERDGViewType::TextureUAV:
{
FRDGTextureUAVRef UAV = static_cast<FRDGTextureUAVRef>(Resource);
if (UAV->Desc.MetaData != ERDGTextureMetaDataAccess::None)
{
return EResourceTransitionFlags::MaintainCompression;
}
}
break;
case ERDGViewType::TextureSRV:
{
FRDGTextureSRVRef SRV = static_cast<FRDGTextureSRVRef>(Resource);
if (SRV->Desc.MetaData != ERDGTextureMetaDataAccess::None)
{
return EResourceTransitionFlags::MaintainCompression;
}
}
break;
}
}
else
{
if (EnumHasAnyFlags(Texture->Flags, ERDGTextureFlags::MaintainCompression))
{
return EResourceTransitionFlags::MaintainCompression;
}
}
return EResourceTransitionFlags::None;
}
void FRDGBuilder::SetFlushResourcesRHI()
{
if (GRHINeedsExtraDeletionLatency || !GRHICommandList.Bypass())
{
checkf(!bFlushResourcesRHI, TEXT("SetFlushRHIResources has been already been called. It may only be called once."));
bFlushResourcesRHI = true;
if (IsImmediateMode())
{
BeginFlushResourcesRHI();
EndFlushResourcesRHI();
}
}
}
void FRDGBuilder::BeginFlushResourcesRHI()
{
if (!bFlushResourcesRHI)
{
return;
}
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(STAT_RDG_FlushResourcesRHI);
SCOPED_NAMED_EVENT(BeginFlushResourcesRHI, FColor::Emerald);
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
}
void FRDGBuilder::EndFlushResourcesRHI()
{
if (!bFlushResourcesRHI)
{
return;
}
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(STAT_RDG_FlushResourcesRHI);
SCOPED_NAMED_EVENT(EndFlushResourcesRHI, FColor::Emerald);
RHICmdList.ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
}
void FRDGBuilder::TickPoolElements()
{
GRenderGraphResourcePool.TickPoolElements();
#if RDG_ENABLE_DEBUG
if (GRDGTransitionLog > 0)
{
--GRDGTransitionLog;
}
#endif
#if RDG_STATS
CSV_CUSTOM_STAT(RDGCount, Passes, GRDGStatPassCount, ECsvCustomStatOp::Set);
CSV_CUSTOM_STAT(RDGCount, Buffers, GRDGStatBufferCount, ECsvCustomStatOp::Set);
CSV_CUSTOM_STAT(RDGCount, Textures, GRDGStatTextureCount, ECsvCustomStatOp::Set);
TRACE_COUNTER_SET(COUNTER_RDG_PassCount, GRDGStatPassCount);
TRACE_COUNTER_SET(COUNTER_RDG_PassCullCount, GRDGStatPassCullCount);
TRACE_COUNTER_SET(COUNTER_RDG_RenderPassMergeCount, GRDGStatRenderPassMergeCount);
TRACE_COUNTER_SET(COUNTER_RDG_PassDependencyCount, GRDGStatPassDependencyCount);
TRACE_COUNTER_SET(COUNTER_RDG_TextureCount, GRDGStatTextureCount);
TRACE_COUNTER_SET(COUNTER_RDG_TextureReferenceCount, GRDGStatTextureReferenceCount);
TRACE_COUNTER_SET(COUNTER_RDG_TextureReferenceAverage, (float)(GRDGStatTextureReferenceCount / FMath::Max((float)GRDGStatTextureCount, 1.0f)));
TRACE_COUNTER_SET(COUNTER_RDG_BufferCount, GRDGStatBufferCount);
TRACE_COUNTER_SET(COUNTER_RDG_BufferReferenceCount, GRDGStatBufferReferenceCount);
TRACE_COUNTER_SET(COUNTER_RDG_BufferReferenceAverage, (float)(GRDGStatBufferReferenceCount / FMath::Max((float)GRDGStatBufferCount, 1.0f)));
TRACE_COUNTER_SET(COUNTER_RDG_ViewCount, GRDGStatViewCount);
TRACE_COUNTER_SET(COUNTER_RDG_TransientTextureCount, GRDGStatTransientTextureCount);
TRACE_COUNTER_SET(COUNTER_RDG_TransientBufferCount, GRDGStatTransientBufferCount);
TRACE_COUNTER_SET(COUNTER_RDG_TransitionCount, GRDGStatTransitionCount);
TRACE_COUNTER_SET(COUNTER_RDG_AliasingCount, GRDGStatAliasingCount);
TRACE_COUNTER_SET(COUNTER_RDG_TransitionBatchCount, GRDGStatTransitionBatchCount);
TRACE_COUNTER_SET(COUNTER_RDG_MemoryWatermark, int64(GRDGStatMemoryWatermark));
SET_DWORD_STAT(STAT_RDG_PassCount, GRDGStatPassCount);
SET_DWORD_STAT(STAT_RDG_PassCullCount, GRDGStatPassCullCount);
SET_DWORD_STAT(STAT_RDG_RenderPassMergeCount, GRDGStatRenderPassMergeCount);
SET_DWORD_STAT(STAT_RDG_PassDependencyCount, GRDGStatPassDependencyCount);
SET_DWORD_STAT(STAT_RDG_TextureCount, GRDGStatTextureCount);
SET_DWORD_STAT(STAT_RDG_TextureReferenceCount, GRDGStatTextureReferenceCount);
SET_FLOAT_STAT(STAT_RDG_TextureReferenceAverage, (float)(GRDGStatTextureReferenceCount / FMath::Max((float)GRDGStatTextureCount, 1.0f)));
SET_DWORD_STAT(STAT_RDG_BufferCount, GRDGStatBufferCount);
SET_DWORD_STAT(STAT_RDG_BufferReferenceCount, GRDGStatBufferReferenceCount);
SET_FLOAT_STAT(STAT_RDG_BufferReferenceAverage, (float)(GRDGStatBufferReferenceCount / FMath::Max((float)GRDGStatBufferCount, 1.0f)));
SET_DWORD_STAT(STAT_RDG_ViewCount, GRDGStatViewCount);
SET_DWORD_STAT(STAT_RDG_TransientTextureCount, GRDGStatTransientTextureCount);
SET_DWORD_STAT(STAT_RDG_TransientBufferCount, GRDGStatTransientBufferCount);
SET_DWORD_STAT(STAT_RDG_TransitionCount, GRDGStatTransitionCount);
SET_DWORD_STAT(STAT_RDG_AliasingCount, GRDGStatAliasingCount);
SET_DWORD_STAT(STAT_RDG_TransitionBatchCount, GRDGStatTransitionBatchCount);
SET_MEMORY_STAT(STAT_RDG_MemoryWatermark, int64(GRDGStatMemoryWatermark));
GRDGStatPassCount = 0;
GRDGStatPassCullCount = 0;
GRDGStatRenderPassMergeCount = 0;
GRDGStatPassDependencyCount = 0;
GRDGStatTextureCount = 0;
GRDGStatTextureReferenceCount = 0;
GRDGStatBufferCount = 0;
GRDGStatBufferReferenceCount = 0;
GRDGStatViewCount = 0;
GRDGStatTransientTextureCount = 0;
GRDGStatTransientBufferCount = 0;
GRDGStatTransitionCount = 0;
GRDGStatAliasingCount = 0;
GRDGStatTransitionBatchCount = 0;
GRDGStatMemoryWatermark = 0;
#endif
}
bool FRDGBuilder::IsImmediateMode()
{
return ::IsImmediateMode();
}
ERDGPassFlags FRDGBuilder::OverridePassFlags(const TCHAR* PassName, ERDGPassFlags PassFlags)
{
const bool bDebugAllowedForPass =
#if RDG_ENABLE_DEBUG
IsDebugAllowedForPass(PassName);
#else
true;
#endif
if (IsAsyncComputeSupported())
{
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::Compute) && GRDGAsyncCompute == RDG_ASYNC_COMPUTE_FORCE_ENABLED)
{
PassFlags &= ~ERDGPassFlags::Compute;
PassFlags |= ERDGPassFlags::AsyncCompute;
}
}
else
{
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::AsyncCompute))
{
PassFlags &= ~ERDGPassFlags::AsyncCompute;
PassFlags |= ERDGPassFlags::Compute;
}
}
return PassFlags;
}
bool FRDGBuilder::IsTransient(FRDGBufferRef Buffer) const
{
if (!IsTransientInternal(Buffer, EnumHasAnyFlags(Buffer->Desc.Usage, BUF_FastVRAM)))
{
return false;
}
if (!GRDGTransientIndirectArgBuffers && EnumHasAnyFlags(Buffer->Desc.Usage, BUF_DrawIndirect))
{
return false;
}
return EnumHasAnyFlags(Buffer->Desc.Usage, BUF_UnorderedAccess);
}
bool FRDGBuilder::IsTransient(FRDGTextureRef Texture) const
{
if (EnumHasAnyFlags(Texture->Desc.Flags, ETextureCreateFlags::Shared))
{
return false;
}
return IsTransientInternal(Texture, EnumHasAnyFlags(Texture->Desc.Flags, ETextureCreateFlags::FastVRAM));
}
bool FRDGBuilder::IsTransientInternal(FRDGViewableResource* Resource, bool bFastVRAM) const
{
// Immediate mode can't use the transient allocator because we don't know if the user will extract the resource.
if (!GRDGTransientAllocator || IsImmediateMode())
{
return false;
}
// FastVRAM resources are always transient regardless of extraction or other hints, since they are performance critical.
if (!bFastVRAM || !FPlatformMemory::SupportsFastVRAMMemory())
{
if (GRDGTransientAllocator == 2)
{
return false;
}
if (Resource->bForceNonTransient)
{
return false;
}
if (Resource->bExtracted)
{
if (GRDGTransientExtractedResources == 0)
{
return false;
}
if (GRDGTransientExtractedResources == 1 && Resource->TransientExtractionHint == FRDGViewableResource::ETransientExtractionHint::Disable)
{
return false;
}
}
}
#if RDG_ENABLE_DEBUG
if (GRDGDebugDisableTransientResources != 0 && IsDebugAllowedForResource(Resource->Name))
{
return false;
}
#endif
return true;
}
FRDGBuilder::FRDGBuilder(FRHICommandListImmediate& InRHICmdList, FRDGEventName InName, ERDGBuilderFlags InFlags)
: RHICmdList(InRHICmdList)
, Blackboard(Allocator)
, BuilderName(InName)
, CompilePipe(TEXT("RDG_CompilePipe"))
#if RDG_CPU_SCOPES
, CPUScopeStacks(Allocator)
#endif
, GPUScopeStacks(Allocator)
, bParallelExecuteEnabled(IsParallelExecuteEnabled() && EnumHasAnyFlags(InFlags, ERDGBuilderFlags::AllowParallelExecute))
, bParallelSetupEnabled(IsParallelSetupEnabled() && EnumHasAnyFlags(InFlags, ERDGBuilderFlags::AllowParallelExecute))
#if RDG_ENABLE_DEBUG
, UserValidation(Allocator, bParallelExecuteEnabled)
, BarrierValidation(&Passes, BuilderName)
#endif
, TransientResourceAllocator(GRDGTransientResourceAllocator.Get())
, ExtendResourceLifetimeScope(RHICmdList)
{
AddProloguePass();
#if RDG_EVENTS != RDG_EVENTS_NONE
// This is polled once as a workaround for a race condition since the underlying global is not always changed on the render thread.
GRDGEmitDrawEvents_RenderThread = GetEmitDrawEvents();
#endif
#if RHI_WANT_BREADCRUMB_EVENTS
if (bParallelExecuteEnabled)
{
BreadcrumbState = FRDGBreadcrumbState::Create(Allocator);
}
#endif
}
FRDGBuilder::~FRDGBuilder()
{
if (bParallelExecuteEnabled && GRDGParallelDestruction > 0)
{
// Move expensive operations into the async deleter, which will be called in the base class destructor.
BeginAsyncDelete([
Passes = MoveTemp(Passes),
Textures = MoveTemp(Textures),
Buffers = MoveTemp(Buffers),
Views = MoveTemp(Views),
UniformBuffers = MoveTemp(UniformBuffers),
Blackboard = MoveTemp(Blackboard),
ActivePooledTextures = MoveTemp(ActivePooledTextures),
ActivePooledBuffers = MoveTemp(ActivePooledBuffers),
UploadedBuffers = MoveTemp(UploadedBuffers)
] () mutable {});
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
const TRefCountPtr<FRDGPooledBuffer>& FRDGBuilder::ConvertToExternalBuffer(FRDGBufferRef Buffer)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateConvertToExternalResource(Buffer));
if (!Buffer->bExternal)
{
Buffer->bExternal = 1;
Buffer->bForceNonTransient = 1;
BeginResourceRHI(GetProloguePassHandle(), Buffer);
ExternalBuffers.Add(Buffer->GetRHIUnchecked(), Buffer);
}
return GetPooledBuffer(Buffer);
}
const TRefCountPtr<IPooledRenderTarget>& FRDGBuilder::ConvertToExternalTexture(FRDGTextureRef Texture)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateConvertToExternalResource(Texture));
if (!Texture->bExternal)
{
Texture->bExternal = 1;
Texture->bForceNonTransient = 1;
BeginResourceRHI(GetProloguePassHandle(), Texture);
ExternalTextures.Add(Texture->GetRHIUnchecked(), Texture);
}
return GetPooledTexture(Texture);
}
FRHIUniformBuffer* FRDGBuilder::ConvertToExternalUniformBuffer(FRDGUniformBufferRef UniformBuffer)
{
if (!UniformBuffer->bExternal)
{
UniformBuffer->GetParameters().Enumerate([this](const FRDGParameter& Param)
{
auto ConvertTexture = [](FRDGBuilder* Builder, FRDGTextureRef Texture)
{
if (Texture && !Texture->IsExternal())
{
Builder->ConvertToExternalTexture(Texture);
}
};
auto ConvertBuffer = [](FRDGBuilder* Builder, FRDGBufferRef Buffer)
{
if (Buffer && !Buffer->IsExternal())
{
Builder->ConvertToExternalBuffer(Buffer);
}
};
switch (Param.GetType())
{
case UBMT_RDG_TEXTURE:
{
ConvertTexture(this, Param.GetAsTexture());
}
break;
case UBMT_RDG_TEXTURE_ACCESS:
{
ConvertTexture(this, Param.GetAsTextureAccess().GetTexture());
}
break;
case UBMT_RDG_TEXTURE_ACCESS_ARRAY:
{
const FRDGTextureAccessArray& Array = Param.GetAsTextureAccessArray();
for (int Index = 0; Index < Array.Num(); ++Index)
{
ConvertTexture(this, Array[Index].GetTexture());
}
}
break;
case UBMT_RDG_TEXTURE_SRV:
{
ConvertTexture(this, Param.GetAsTextureSRV()->Desc.Texture);
InitRHI(Param.GetAsView());
}
break;
case UBMT_RDG_TEXTURE_UAV:
{
ConvertTexture(this, Param.GetAsTextureUAV()->Desc.Texture);
InitRHI(Param.GetAsView());
}
break;
case UBMT_RDG_BUFFER_ACCESS:
{
ConvertBuffer(this, Param.GetAsBufferAccess().GetBuffer());
}
break;
case UBMT_RDG_BUFFER_ACCESS_ARRAY:
{
const FRDGBufferAccessArray& Array = Param.GetAsBufferAccessArray();
for (int Index = 0; Index < Array.Num(); ++Index)
{
ConvertBuffer(this, Array[Index].GetBuffer());
}
}
break;
case UBMT_RDG_BUFFER_SRV:
{
ConvertBuffer(this, Param.GetAsBufferSRV()->Desc.Buffer);
InitRHI(Param.GetAsView());
}
break;
case UBMT_RDG_BUFFER_UAV:
{
ConvertBuffer(this, Param.GetAsBufferUAV()->Desc.Buffer);
InitRHI(Param.GetAsView());
}
break;
case UBMT_RDG_UNIFORM_BUFFER:
{
FRDGUniformBufferRef Buffer = Param.GetAsUniformBuffer().GetUniformBuffer();
if (Buffer)
{
ConvertToExternalUniformBuffer(Buffer);
}
}
break;
// Non-RDG cases
case UBMT_INT32:
case UBMT_UINT32:
case UBMT_FLOAT32:
case UBMT_TEXTURE:
case UBMT_SRV:
case UBMT_UAV:
case UBMT_SAMPLER:
case UBMT_NESTED_STRUCT:
case UBMT_INCLUDED_STRUCT:
case UBMT_REFERENCED_STRUCT:
case UBMT_RENDER_TARGET_BINDING_SLOTS:
break;
default:
check(0);
}
});
}
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateConvertToExternalUniformBuffer(UniformBuffer));
if (!UniformBuffer->bExternal)
{
UniformBuffer->bExternal = true;
UniformBuffer->bQueuedForCreate = true;
// It's safe to reset the access to false because validation won't allow this call during execution.
IF_RDG_ENABLE_DEBUG(GRDGAllowRHIAccess = true);
UniformBuffer->InitRHI();
IF_RDG_ENABLE_DEBUG(GRDGAllowRHIAccess = false);
}
return UniformBuffer->GetRHIUnchecked();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
BEGIN_SHADER_PARAMETER_STRUCT(FAccessModePassParameters, )
RDG_TEXTURE_ACCESS_ARRAY(Textures)
RDG_BUFFER_ACCESS_ARRAY(Buffers)
END_SHADER_PARAMETER_STRUCT()
void FRDGBuilder::UseExternalAccessMode(FRDGViewableResource* Resource, ERHIAccess ReadOnlyAccess, ERHIPipeline Pipelines)
{
if (!IsAsyncComputeSupported())
{
Pipelines = ERHIPipeline::Graphics;
}
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateUseExternalAccessMode(Resource, ReadOnlyAccess, Pipelines));
auto& AccessModeState = Resource->AccessModeState;
// We already validated that back-to-back calls to UseExternalAccessMode are valid only if the parameters match,
// so we can safely no-op this call.
if (AccessModeState.Mode == FRDGViewableResource::EAccessMode::External || AccessModeState.bLocked)
{
return;
}
// We have to flush the queue when going from QueuedInternal -> External. A queued internal state
// implies that the resource was in an external access mode before, so it needs an 'end' pass to
// contain any passes which might have used the resource in its external state.
if (AccessModeState.bQueued)
{
FlushAccessModeQueue();
}
check(!AccessModeState.bQueued);
AccessModeQueue.Emplace(Resource);
AccessModeState.bQueued = 1;
Resource->SetExternalAccessMode(ReadOnlyAccess, Pipelines);
}
void FRDGBuilder::UseInternalAccessMode(FRDGViewableResource* Resource)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateUseInternalAccessMode(Resource));
auto& AccessModeState = Resource->AccessModeState;
// Just no-op if the resource is already in (or queued for) the Internal state.
if (AccessModeState.Mode == FRDGViewableResource::EAccessMode::Internal || AccessModeState.bLocked)
{
return;
}
// If the resource has a queued transition to the external access state, then we can safely back it out.
if (AccessModeState.bQueued)
{
int32 Index = AccessModeQueue.IndexOfByKey(Resource);
check(Index < AccessModeQueue.Num());
AccessModeQueue.RemoveAtSwap(Index, 1, false);
AccessModeState.bQueued = 0;
}
else
{
AccessModeQueue.Emplace(Resource);
AccessModeState.bQueued = 1;
}
AccessModeState.Mode = FRDGViewableResource::EAccessMode::Internal;
}
void FRDGBuilder::FlushAccessModeQueue()
{
if (AccessModeQueue.IsEmpty() || !AuxiliaryPasses.IsFlushAccessModeQueueAllowed())
{
return;
}
// Don't allow Dump GPU to dump access mode passes. We rely on FlushAccessQueue in dump GPU to transition things back to external access.
RDG_RECURSION_COUNTER_SCOPE(AuxiliaryPasses.Dump);
RDG_RECURSION_COUNTER_SCOPE(AuxiliaryPasses.FlushAccessModeQueue);
FAccessModePassParameters* ParametersByPipeline[] =
{
AllocParameters<FAccessModePassParameters>(),
AllocParameters<FAccessModePassParameters>()
};
const ERHIAccess AccessMaskByPipeline[] =
{
ERHIAccess::ReadOnlyExclusiveMask,
ERHIAccess::ReadOnlyExclusiveComputeMask
};
ERHIPipeline ParameterPipelines = ERHIPipeline::None;
TArray<FRDGPass::FExternalAccessOp, FRDGArrayAllocator> Ops;
Ops.Reserve(bParallelSetupEnabled ? AccessModeQueue.Num() : 0);
for (FRDGViewableResource* Resource : AccessModeQueue)
{
const auto& AccessModeState = Resource->AccessModeState;
Resource->AccessModeState.bQueued = false;
if (bParallelSetupEnabled)
{
Ops.Emplace(Resource, AccessModeState.Mode);
}
else
{
Resource->AccessModeState.ActiveMode = Resource->AccessModeState.Mode;
}
ParameterPipelines |= AccessModeState.Pipelines;
if (AccessModeState.Mode == FRDGViewableResource::EAccessMode::External)
{
ExternalAccessResources.Emplace(Resource);
}
else
{
ExternalAccessResources.Remove(Resource);
}
for (uint32 PipelineIndex = 0; PipelineIndex < GetRHIPipelineCount(); ++PipelineIndex)
{
const ERHIPipeline Pipeline = static_cast<ERHIPipeline>(1 << PipelineIndex);
if (EnumHasAnyFlags(AccessModeState.Pipelines, Pipeline))
{
const ERHIAccess Access = AccessModeState.Access & AccessMaskByPipeline[PipelineIndex];
check(Access != ERHIAccess::None);
switch (Resource->Type)
{
case ERDGViewableResourceType::Texture:
ParametersByPipeline[PipelineIndex]->Textures.Emplace(GetAsTexture(Resource), Access);
break;
case ERDGViewableResourceType::Buffer:
ParametersByPipeline[PipelineIndex]->Buffers.Emplace(GetAsBuffer(Resource), Access);
break;
}
}
}
}
if (EnumHasAnyFlags(ParameterPipelines, ERHIPipeline::Graphics))
{
auto ExecuteLambda = [](FRHIComputeCommandList&) {};
using LambdaPassType = TRDGLambdaPass<FAccessModePassParameters, decltype(ExecuteLambda)>;
FAccessModePassParameters* Parameters = ParametersByPipeline[GetRHIPipelineIndex(ERHIPipeline::Graphics)];
FRDGPass* Pass = Passes.Allocate<LambdaPassType>(
Allocator,
RDG_EVENT_NAME("AccessModePass[Graphics] (Textures: %d, Buffers: %d)", Parameters->Textures.Num(), Parameters->Buffers.Num()),
FAccessModePassParameters::FTypeInfo::GetStructMetadata(),
Parameters,
// Use all of the work flags so that any access is valid.
ERDGPassFlags::Copy | ERDGPassFlags::Compute | ERDGPassFlags::Raster | ERDGPassFlags::SkipRenderPass | ERDGPassFlags::NeverCull,
MoveTemp(ExecuteLambda));
Pass->ExternalAccessOps = MoveTemp(Ops);
Pass->bExternalAccessPass = 1;
SetupParameterPass(Pass);
}
if (EnumHasAnyFlags(ParameterPipelines, ERHIPipeline::AsyncCompute))
{
auto ExecuteLambda = [](FRHIComputeCommandList&) {};
using LambdaPassType = TRDGLambdaPass<FAccessModePassParameters, decltype(ExecuteLambda)>;
FAccessModePassParameters* Parameters = ParametersByPipeline[GetRHIPipelineIndex(ERHIPipeline::AsyncCompute)];
FRDGPass* Pass = Passes.Allocate<LambdaPassType>(
Allocator,
RDG_EVENT_NAME("AccessModePass[AsyncCompute] (Textures: %d, Buffers: %d)", Parameters->Textures.Num(), Parameters->Buffers.Num()),
FAccessModePassParameters::FTypeInfo::GetStructMetadata(),
Parameters,
ERDGPassFlags::AsyncCompute | ERDGPassFlags::NeverCull,
MoveTemp(ExecuteLambda));
Pass->ExternalAccessOps = MoveTemp(Ops);
Pass->bExternalAccessPass = 1;
SetupParameterPass(Pass);
}
AccessModeQueue.Reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
FRDGTextureRef FRDGBuilder::RegisterExternalTexture(
const TRefCountPtr<IPooledRenderTarget>& ExternalPooledTexture,
ERDGTextureFlags Flags)
{
#if RDG_ENABLE_DEBUG
checkf(ExternalPooledTexture.IsValid(), TEXT("Attempted to register NULL external texture."));
#endif
const TCHAR* Name = ExternalPooledTexture->GetDesc().DebugName;
if (!Name)
{
Name = TEXT("External");
}
return RegisterExternalTexture(ExternalPooledTexture, Name, Flags);
}
FRDGTexture* FRDGBuilder::RegisterExternalTexture(
const TRefCountPtr<IPooledRenderTarget>& ExternalPooledTexture,
const TCHAR* Name,
ERDGTextureFlags Flags)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateRegisterExternalTexture(ExternalPooledTexture, Name, Flags));
FRHITexture* ExternalTextureRHI = ExternalPooledTexture->GetRHI();
IF_RDG_ENABLE_DEBUG(checkf(ExternalTextureRHI, TEXT("Attempted to register texture %s, but its RHI texture is null."), Name));
if (FRDGTexture* FoundTexture = FindExternalTexture(ExternalTextureRHI))
{
return FoundTexture;
}
const FRDGTextureDesc Desc = Translate(ExternalPooledTexture->GetDesc());
FRDGTexture* Texture = Textures.Allocate(Allocator, Name, Desc, Flags);
SetRHI(Texture, ExternalPooledTexture.GetReference(), GetProloguePassHandle());
Texture->bExternal = true;
ExternalTextures.Add(Texture->GetRHIUnchecked(), Texture);
if (Texture->bTransient)
{
FRDGSubresourceState State;
State.SetPass(ERHIPipeline::Graphics, GetProloguePassHandle());
InitTextureSubresources(*Texture->State, Texture->Layout, State);
}
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateRegisterExternalTexture(Texture));
IF_RDG_ENABLE_TRACE(Trace.AddResource(Texture));
return Texture;
}
FRDGBufferRef FRDGBuilder::RegisterExternalBuffer(const TRefCountPtr<FRDGPooledBuffer>& ExternalPooledBuffer, ERDGBufferFlags Flags)
{
#if RDG_ENABLE_DEBUG
checkf(ExternalPooledBuffer.IsValid(), TEXT("Attempted to register NULL external buffer."));
#endif
const TCHAR* Name = ExternalPooledBuffer->Name;
if (!Name)
{
Name = TEXT("External");
}
return RegisterExternalBuffer(ExternalPooledBuffer, Name, Flags);
}
FRDGBufferRef FRDGBuilder::RegisterExternalBuffer(
const TRefCountPtr<FRDGPooledBuffer>& ExternalPooledBuffer,
const TCHAR* Name,
ERDGBufferFlags Flags)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateRegisterExternalBuffer(ExternalPooledBuffer, Name, Flags));
if (FRDGBuffer* FoundBuffer = FindExternalBuffer(ExternalPooledBuffer))
{
return FoundBuffer;
}
FRDGBuffer* Buffer = Buffers.Allocate(Allocator, Name, ExternalPooledBuffer->Desc, Flags);
SetRHI(Buffer, ExternalPooledBuffer, GetProloguePassHandle());
Buffer->bExternal = true;
ExternalBuffers.Add(Buffer->GetRHIUnchecked(), Buffer);
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateRegisterExternalBuffer(Buffer));
IF_RDG_ENABLE_TRACE(Trace.AddResource(Buffer));
return Buffer;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::AddPassDependency(FRDGPass* Producer, FRDGPass* Consumer)
{
auto& Producers = Consumer->Producers;
if (Producers.Find(Producer->Handle) == INDEX_NONE)
{
#if RDG_STATS
GRDGStatPassDependencyCount++;
#endif
if (Producer->Pipeline != Consumer->Pipeline)
{
const auto BinarySearchOrAdd = [](FRDGPassHandleArray& Range, FRDGPassHandle Handle)
{
const int32 LowerBoundIndex = Algo::LowerBound(Range, Handle);
if (LowerBoundIndex < Range.Num())
{
if (Range[LowerBoundIndex] == Handle)
{
return;
}
}
Range.Insert(Handle, LowerBoundIndex);
};
// Consumers could be culled, so we have to store all of them in a sorted list.
BinarySearchOrAdd(Producer->CrossPipelineConsumers, Consumer->Handle);
// Finds the latest producer on the other pipeline for the consumer.
if (Consumer->CrossPipelineProducer.IsNull() || Producer->Handle > Consumer->CrossPipelineProducer)
{
Consumer->CrossPipelineProducer = Producer->Handle;
}
}
Producers.Add(Producer->Handle);
}
}
void FRDGBuilder::AddCullingDependency(FRDGProducerStatesByPipeline& LastProducers, const FRDGProducerState& NextState, ERHIPipeline NextPipeline)
{
for (ERHIPipeline LastPipeline : GetRHIPipelines())
{
FRDGProducerState& LastProducer = LastProducers[LastPipeline];
if (LastProducer.Access != ERHIAccess::Unknown)
{
FRDGPass* LastProducerPass = LastProducer.Pass;
if (LastPipeline != NextPipeline)
{
// Only certain platforms allow multi-pipe UAV access.
const ERHIAccess MultiPipelineUAVMask = ERHIAccess::UAVMask & GRHIMultiPipelineMergeableAccessMask;
// If skipping a UAV barrier across pipelines, use the producer pass that will emit the correct async fence.
if (EnumHasAnyFlags(NextState.Access, MultiPipelineUAVMask) && SkipUAVBarrier(LastProducer.NoUAVBarrierHandle, NextState.NoUAVBarrierHandle))
{
LastProducerPass = LastProducer.PassIfSkipUAVBarrier;
}
}
if (LastProducerPass)
{
AddPassDependency(LastProducerPass, NextState.Pass);
}
}
}
if (IsWritableAccess(NextState.Access))
{
FRDGProducerState& LastProducer = LastProducers[NextPipeline];
// A separate producer pass is tracked for UAV -> UAV dependencies that are skipped. Consider the following scenario:
//
// Graphics: A -> B -> D -> E -> G -> I
// (UAV) (SkipUAV0) (SkipUAV1) (SkipUAV1) (SRV) (UAV2)
//
// Async Compute: C -> F -> H
// (SkipUAV0) (SkipUAV1) (SRV)
//
// Expected Cross Pipe Dependencies: [A -> C], C -> D, [B -> F], F -> G, E -> H, F -> I. The dependencies wrapped in
// braces are only introduced properly by tracking a different producer for cross-pipeline skip UAV dependencies, which
// is only updated if skip UAV is inactive, or if transitioning from one skip UAV set to another (or another writable resource).
if (LastProducer.NoUAVBarrierHandle.IsNull())
{
if (NextState.NoUAVBarrierHandle.IsNull())
{
// Assigns the next producer when no skip UAV sets are active.
LastProducer.PassIfSkipUAVBarrier = NextState.Pass;
}
}
else if (LastProducer.NoUAVBarrierHandle != NextState.NoUAVBarrierHandle)
{
// Assigns the last producer in the prior skip UAV barrier set when moving out of a skip UAV barrier set.
LastProducer.PassIfSkipUAVBarrier = LastProducer.Pass;
}
LastProducer.Access = NextState.Access;
LastProducer.Pass = NextState.Pass;
LastProducer.NoUAVBarrierHandle = NextState.NoUAVBarrierHandle;
}
}
void FRDGBuilder::CompilePassBarriers()
{
// Walk the culled graph and compile barriers for each subresource. Certain transitions are redundant; read-to-read, for example.
// We can avoid them by traversing and merging compatible states together. The merging states removes a transition, but the merging
// heuristic is conservative and choosing not to merge doesn't necessarily mean a transition is performed. They are two distinct steps.
// Merged states track the first and last pass interval. Pass references are also accumulated onto each resource. This must happen
// after culling since culled passes can't contribute references.
const FRDGPassHandle ProloguePassHandle = GetProloguePassHandle();
const FRDGPassHandle EpiloguePassHandle = GetEpiloguePassHandle();
SCOPED_NAMED_EVENT(CompileBarriers, FColor::Emerald);
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (Pass->bCulled || Pass->bEmptyParameters)
{
continue;
}
const ERHIPipeline PassPipeline = Pass->Pipeline;
const auto MergeSubresourceStates = [&](ERDGViewableResourceType ResourceType, FRDGSubresourceState*& PassMergeState, FRDGSubresourceState*& ResourceMergeState, const FRDGSubresourceState& PassState)
{
if (!ResourceMergeState || !FRDGSubresourceState::IsMergeAllowed(ResourceType, *ResourceMergeState, PassState))
{
// Allocate a new pending merge state and assign it to the pass state.
ResourceMergeState = AllocSubresource(PassState);
}
else
{
// Merge the pass state into the merged state.
ResourceMergeState->Access |= PassState.Access;
FRDGPassHandle& FirstPassHandle = ResourceMergeState->FirstPass[PassPipeline];
if (FirstPassHandle.IsNull())
{
FirstPassHandle = PassHandle;
}
ResourceMergeState->LastPass[PassPipeline] = PassHandle;
}
PassMergeState = ResourceMergeState;
};
for (auto& PassState : Pass->TextureStates)
{
FRDGTextureRef Texture = PassState.Texture;
if (Texture->FirstBarrier == FRDGTexture::EFirstBarrier::ImmediateRequested)
{
check(Texture->bExternal);
Texture->FirstBarrier = FRDGTexture::EFirstBarrier::ImmediateConfirmed;
Texture->FirstPass = PassHandle;
for (FRDGSubresourceState& SubresourceState : *Texture->State)
{
SubresourceState.SetPass(ERHIPipeline::Graphics, PassHandle);
}
}
#if RDG_STATS
GRDGStatTextureReferenceCount += PassState.ReferenceCount;
#endif
for (int32 Index = 0; Index < PassState.State.Num(); ++Index)
{
if (PassState.State[Index].Access == ERHIAccess::Unknown)
{
continue;
}
MergeSubresourceStates(ERDGViewableResourceType::Texture, PassState.MergeState[Index], Texture->MergeState[Index], PassState.State[Index]);
}
}
for (auto& PassState : Pass->BufferStates)
{
FRDGBufferRef Buffer = PassState.Buffer;
if (Buffer->FirstBarrier == FRDGBuffer::EFirstBarrier::ImmediateRequested)
{
check(Buffer->bExternal);
Buffer->FirstBarrier = FRDGBuffer::EFirstBarrier::ImmediateConfirmed;
Buffer->FirstPass = PassHandle;
Buffer->State->SetPass(ERHIPipeline::Graphics, PassHandle);
}
#if RDG_STATS
GRDGStatBufferReferenceCount += PassState.ReferenceCount;
#endif
MergeSubresourceStates(ERDGViewableResourceType::Buffer, PassState.MergeState, Buffer->MergeState, PassState.State);
}
}
}
void FRDGBuilder::Compile()
{
SCOPE_CYCLE_COUNTER(STAT_RDG_CompileTime);
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDG_Compile, GRDGVerboseCSVStats != 0);
const FRDGPassHandle ProloguePassHandle = GetProloguePassHandle();
const FRDGPassHandle EpiloguePassHandle = GetEpiloguePassHandle();
const uint32 CompilePassCount = Passes.Num();
TransitionCreateQueue.Reserve(CompilePassCount);
const bool bCullPasses = GRDGCullPasses > 0;
if (bParallelSetupEnabled)
{
SetupPassQueue.Flush(TEXT("FRDGBuilder::SetupPassResources"), [this](FRDGPass* Pass) { SetupPassResources(Pass); });
}
if (bCullPasses)
{
CullPassStack.Reserve(CompilePassCount);
}
if (bCullPasses || AsyncComputePassCount > 0)
{
SCOPED_NAMED_EVENT(PassDependencies, FColor::Emerald);
if (!bParallelSetupEnabled)
{
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
SetupPassDependencies(Passes[PassHandle]);
}
}
const auto AddLastProducersToCullStack = [&](const FRDGProducerStatesByPipeline& LastProducers)
{
for (const FRDGProducerState& LastProducer : LastProducers)
{
if (LastProducer.Pass)
{
CullPassStack.Emplace(LastProducer.Pass->Handle);
}
}
};
// The last producer of a extracted or external resource is a cull graph root as it's not contained within the graph.
for (const FExtractedTexture& ExtractedTexture : ExtractedTextures)
{
FRDGTextureRef Texture = ExtractedTexture.Texture;
for (auto& LastProducer : Texture->LastProducers)
{
AddLastProducersToCullStack(LastProducer);
}
}
for (const FExtractedBuffer& ExtractedBuffer : ExtractedBuffers)
{
FRDGBufferRef Buffer = ExtractedBuffer.Buffer;
AddLastProducersToCullStack(Buffer->LastProducer);
}
for (const auto& Pair : ExternalTextures)
{
FRDGTexture* Texture = Pair.Value;
for (auto& LastProducer : Texture->LastProducers)
{
AddLastProducersToCullStack(LastProducer);
}
}
for (const auto& Pair : ExternalBuffers)
{
FRDGBuffer* Buffer = Pair.Value;
AddLastProducersToCullStack(Buffer->LastProducer);
}
}
else if (!bParallelSetupEnabled)
{
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
// Add reference counts for passes.
for (auto& PassState : Pass->TextureStates)
{
PassState.Texture->ReferenceCount += PassState.ReferenceCount;
}
for (auto& PassState : Pass->BufferStates)
{
PassState.Buffer->ReferenceCount += PassState.ReferenceCount;
}
}
}
for (const FExtractedTexture& ExtractedTexture : ExtractedTextures)
{
ExtractedTexture.Texture->ReferenceCount++;
}
for (const FExtractedBuffer& ExtractedBuffer : ExtractedBuffers)
{
ExtractedBuffer.Buffer->ReferenceCount++;
}
// All dependencies in the raw graph have been specified; if enabled, all passes are marked as culled and a
// depth first search is employed to find reachable regions of the graph. Roots of the search are those passes
// with outputs leaving the graph or those marked to never cull.
if (bCullPasses)
{
SCOPED_NAMED_EVENT(PassCulling, FColor::Emerald);
CullPassStack.Emplace(EpiloguePassHandle);
// Mark the epilogue pass as culled so that it is traversed.
EpiloguePass->bCulled = 1;
// Manually mark the prologue passes as not culled.
ProloguePass->bCulled = 0;
while (CullPassStack.Num())
{
FRDGPass* Pass = Passes[CullPassStack.Pop()];
if (Pass->bCulled)
{
Pass->bCulled = 0;
CullPassStack.Append(Pass->Producers);
}
}
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (!Pass->bCulled)
{
continue;
}
// Subtract reference counts from culled passes that were added during pass setup.
for (auto& PassState : Pass->TextureStates)
{
PassState.Texture->ReferenceCount -= PassState.ReferenceCount;
}
for (auto& PassState : Pass->BufferStates)
{
PassState.Buffer->ReferenceCount -= PassState.ReferenceCount;
}
}
}
// Traverses passes on the graphics pipe and merges raster passes with the same render targets into a single RHI render pass.
if (IsRenderPassMergeEnabled() && RasterPassCount > 0)
{
SCOPED_NAMED_EVENT(MergeRenderPasses, FColor::Emerald);
TArray<FRDGPassHandle, TInlineAllocator<32, FRDGArrayAllocator>> PassesToMerge;
FRDGPass* PrevPass = nullptr;
const FRenderTargetBindingSlots* PrevRenderTargets = nullptr;
const auto CommitMerge = [&]
{
if (PassesToMerge.Num())
{
const auto SetEpilogueBarrierPass = [&](FRDGPass* Pass, FRDGPassHandle EpilogueBarrierPassHandle)
{
Pass->EpilogueBarrierPass = EpilogueBarrierPassHandle;
Pass->ResourcesToEnd.Reset();
Passes[EpilogueBarrierPassHandle]->ResourcesToEnd.Add(Pass);
};
const auto SetPrologueBarrierPass = [&](FRDGPass* Pass, FRDGPassHandle PrologueBarrierPassHandle)
{
Pass->PrologueBarrierPass = PrologueBarrierPassHandle;
Pass->ResourcesToBegin.Reset();
Passes[PrologueBarrierPassHandle]->ResourcesToBegin.Add(Pass);
};
const FRDGPassHandle FirstPassHandle = PassesToMerge[0];
const FRDGPassHandle LastPassHandle = PassesToMerge.Last();
Passes[FirstPassHandle]->ResourcesToBegin.Reserve(PassesToMerge.Num());
Passes[LastPassHandle]->ResourcesToEnd.Reserve(PassesToMerge.Num());
// Given an interval of passes to merge into a single render pass: [B, X, X, X, X, E]
//
// The begin pass (B) and end (E) passes will call {Begin, End}RenderPass, respectively. Also,
// begin will handle all prologue barriers for the entire merged interval, and end will handle all
// epilogue barriers. This avoids transitioning of resources within the render pass and batches the
// transitions more efficiently. This assumes we have filtered out dependencies between passes from
// the merge set, which is done during traversal.
// (B) First pass in the merge sequence.
{
FRDGPass* Pass = Passes[FirstPassHandle];
Pass->bSkipRenderPassEnd = 1;
SetEpilogueBarrierPass(Pass, LastPassHandle);
}
// (X) Intermediate passes.
for (int32 PassIndex = 1, PassCount = PassesToMerge.Num() - 1; PassIndex < PassCount; ++PassIndex)
{
const FRDGPassHandle PassHandle = PassesToMerge[PassIndex];
FRDGPass* Pass = Passes[PassHandle];
Pass->bSkipRenderPassBegin = 1;
Pass->bSkipRenderPassEnd = 1;
SetPrologueBarrierPass(Pass, FirstPassHandle);
SetEpilogueBarrierPass(Pass, LastPassHandle);
}
// (E) Last pass in the merge sequence.
{
FRDGPass* Pass = Passes[LastPassHandle];
Pass->bSkipRenderPassBegin = 1;
SetPrologueBarrierPass(Pass, FirstPassHandle);
}
#if RDG_STATS
GRDGStatRenderPassMergeCount += PassesToMerge.Num();
#endif
}
PassesToMerge.Reset();
PrevPass = nullptr;
PrevRenderTargets = nullptr;
};
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* NextPass = Passes[PassHandle];
if (NextPass->bCulled || NextPass->bEmptyParameters)
{
continue;
}
if (EnumHasAnyFlags(NextPass->Flags, ERDGPassFlags::Raster))
{
// A pass where the user controls the render pass or it is forced to skip pass merging can't merge with other passes
if (EnumHasAnyFlags(NextPass->Flags, ERDGPassFlags::SkipRenderPass | ERDGPassFlags::NeverMerge))
{
CommitMerge();
continue;
}
// A pass which writes to resources outside of the render pass introduces new dependencies which break merging.
if (!NextPass->bRenderPassOnlyWrites)
{
CommitMerge();
continue;
}
const FRenderTargetBindingSlots& RenderTargets = NextPass->GetParameters().GetRenderTargets();
if (PrevPass)
{
check(PrevRenderTargets);
if (PrevRenderTargets->CanMergeBefore(RenderTargets)
#if WITH_MGPU
&& PrevPass->GPUMask == NextPass->GPUMask
#endif
)
{
if (!PassesToMerge.Num())
{
PassesToMerge.Add(PrevPass->GetHandle());
}
PassesToMerge.Add(PassHandle);
}
else
{
CommitMerge();
}
}
PrevPass = NextPass;
PrevRenderTargets = &RenderTargets;
}
else if (!EnumHasAnyFlags(NextPass->Flags, ERDGPassFlags::AsyncCompute))
{
// A non-raster pass on the graphics pipe will invalidate the render target merge.
CommitMerge();
}
}
CommitMerge();
}
if (AsyncComputePassCount > 0)
{
SCOPED_NAMED_EVENT(AsyncComputeFences, FColor::Emerald);
// Establishes fork / join overlap regions for async compute. This is used for fencing as well as resource
// allocation / deallocation. Async compute passes can't allocate / release their resource references until
// the fork / join is complete, since the two pipes run in parallel. Therefore, all resource lifetimes on
// async compute are extended to cover the full async region.
FRDGPassHandle CurrentGraphicsForkPassHandle;
FRDGPass* AsyncComputePassBeforeFork = nullptr;
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* AsyncComputePass = Passes[PassHandle];
if (!AsyncComputePass->IsAsyncCompute() || AsyncComputePass->bCulled)
{
continue;
}
FRDGPassHandle GraphicsForkPassHandle = FRDGPassHandle::Max(AsyncComputePass->CrossPipelineProducer, FRDGPassHandle::Max(CurrentGraphicsForkPassHandle, ProloguePassHandle));
FRDGPass* GraphicsForkPass = Passes[GraphicsForkPassHandle];
AsyncComputePass->GraphicsForkPass = GraphicsForkPassHandle;
Passes[GraphicsForkPass->PrologueBarrierPass]->ResourcesToBegin.Add(AsyncComputePass);
if (CurrentGraphicsForkPassHandle != GraphicsForkPassHandle)
{
CurrentGraphicsForkPassHandle = GraphicsForkPassHandle;
FRDGBarrierBatchBegin& EpilogueBarriersToBeginForAsyncCompute = GraphicsForkPass->GetEpilogueBarriersToBeginForAsyncCompute(Allocator, TransitionCreateQueue);
GraphicsForkPass->bGraphicsFork = 1;
EpilogueBarriersToBeginForAsyncCompute.SetUseCrossPipelineFence();
AsyncComputePass->bAsyncComputeBegin = 1;
AsyncComputePass->GetPrologueBarriersToEnd(Allocator).AddDependency(&EpilogueBarriersToBeginForAsyncCompute);
// Since we are fencing the graphics pipe to some new async compute work, make sure to flush any prior work.
if (AsyncComputePassBeforeFork)
{
AsyncComputePassBeforeFork->bDispatchAfterExecute = 1;
}
}
AsyncComputePassBeforeFork = AsyncComputePass;
}
FRDGPassHandle CurrentGraphicsJoinPassHandle;
for (FRDGPassHandle PassHandle = EpiloguePassHandle - 1; PassHandle > ProloguePassHandle; --PassHandle)
{
FRDGPass* AsyncComputePass = Passes[PassHandle];
if (!AsyncComputePass->IsAsyncCompute() || AsyncComputePass->bCulled)
{
continue;
}
FRDGPassHandle CrossPipelineConsumer;
// Cross pipeline consumers are sorted. Find the earliest consumer that isn't culled.
for (FRDGPassHandle ConsumerHandle : AsyncComputePass->CrossPipelineConsumers)
{
FRDGPass* Consumer = Passes[ConsumerHandle];
if (!Consumer->bCulled)
{
CrossPipelineConsumer = ConsumerHandle;
break;
}
}
FRDGPassHandle GraphicsJoinPassHandle = FRDGPassHandle::Min(CrossPipelineConsumer, FRDGPassHandle::Min(CurrentGraphicsJoinPassHandle, EpiloguePassHandle));
FRDGPass* GraphicsJoinPass = Passes[GraphicsJoinPassHandle];
AsyncComputePass->GraphicsJoinPass = GraphicsJoinPassHandle;
Passes[GraphicsJoinPass->EpilogueBarrierPass]->ResourcesToEnd.Add(AsyncComputePass);
if (CurrentGraphicsJoinPassHandle != GraphicsJoinPassHandle)
{
CurrentGraphicsJoinPassHandle = GraphicsJoinPassHandle;
FRDGBarrierBatchBegin& EpilogueBarriersToBeginForGraphics = AsyncComputePass->GetEpilogueBarriersToBeginForGraphics(Allocator, TransitionCreateQueue);
AsyncComputePass->bAsyncComputeEnd = 1;
AsyncComputePass->bDispatchAfterExecute = 1;
EpilogueBarriersToBeginForGraphics.SetUseCrossPipelineFence();
GraphicsJoinPass->bGraphicsJoin = 1;
GraphicsJoinPass->GetPrologueBarriersToEnd(Allocator).AddDependency(&EpilogueBarriersToBeginForGraphics);
}
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
template <typename LambdaType>
void FRDGBuilder::FPassQueue::Flush(UE::Tasks::FPipe& Pipe, const TCHAR* Name, LambdaType&& Lambda)
{
if (LastTask.IsCompleted())
{
LastTask = Pipe.Launch(Name, [this, Lambda, Name]
{
SCOPED_NAMED_EVENT_TCHAR(Name, FColor::Magenta);
while (FRDGPass* Pass = Queue.Pop())
{
Lambda(Pass);
}
});
}
}
template <typename LambdaType>
void FRDGBuilder::FPassQueue::Flush(const TCHAR* Name, LambdaType&& Lambda)
{
SCOPED_NAMED_EVENT_TCHAR(Name, FColor::Magenta);
LastTask.Wait();
while (FRDGPass* Pass = Queue.Pop())
{
Lambda(Pass);
}
}
void FRDGBuilder::FlushSetupQueue()
{
if (bParallelSetupEnabled)
{
SetupPassQueue.Flush(CompilePipe, TEXT("FRDGBuilder::SetupPassResources"), [this](FRDGPass* Pass) { SetupPassResources(Pass); });
}
}
template <typename LambdaType>
UE::Tasks::FTask FRDGBuilder::LaunchCompileTask(const TCHAR* Name, bool bCondition, LambdaType&& Lambda)
{
if (bCondition)
{
SCOPED_NAMED_EVENT_TCHAR(Name, FColor::Magenta);
return CompilePipe.Launch(Name, [Lambda] { Lambda(); }, LowLevelTasks::ETaskPriority::High);
}
else
{
Lambda();
return {};
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::Execute()
{
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(RDG);
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::Execute", FColor::Magenta);
GRDGTransientResourceAllocator.ReleasePendingDeallocations();
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::FlushAccessModeQueue", FColor::Magenta);
for (FRDGViewableResource* Resource : ExternalAccessResources)
{
UseInternalAccessMode(Resource);
}
FlushAccessModeQueue();
}
// Create the epilogue pass at the end of the graph just prior to compilation.
SetupEmptyPass(EpiloguePass = Passes.Allocate<FRDGSentinelPass>(Allocator, RDG_EVENT_NAME("Graph Epilogue")));
const FRDGPassHandle ProloguePassHandle = GetProloguePassHandle();
const FRDGPassHandle EpiloguePassHandle = GetEpiloguePassHandle();
UE::Tasks::FTask SubmitBufferUploadsTask;
UE::Tasks::FTask CompilePassBarriersTask;
UE::Tasks::FTask CreateUniformBuffersTask;
UE::Tasks::FTask SetupParallelExecuteTask;
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExecuteBegin());
IF_RDG_ENABLE_DEBUG(GRDGAllowRHIAccess = true);
if (!IsImmediateMode())
{
UE::Tasks::FTask CompileTask = LaunchCompileTask(TEXT("FRDGBuilder::Compile"), bParallelSetupEnabled, [this] { Compile(); });
CompilePassBarriersTask = LaunchCompileTask(TEXT("FRDGBuilder::CompilePassBarriers"), bParallelSetupEnabled, [this] { CompilePassBarriers(); });
BeginFlushResourcesRHI();
if (!ParallelSetupEvents.IsEmpty())
{
UE::Tasks::Wait(ParallelSetupEvents);
ParallelSetupEvents.Empty();
}
PrepareBufferUploads();
GPUScopeStacks.ReserveOps(Passes.Num());
IF_RDG_CPU_SCOPES(CPUScopeStacks.ReserveOps());
if (bParallelExecuteEnabled)
{
#if RHI_WANT_BREADCRUMB_EVENTS
RHICmdList.ExportBreadcrumbState(*BreadcrumbState);
#endif
// Parallel execute setup can be done off the render thread and synced prior to dispatch.
SetupParallelExecuteTask = LaunchCompileTask(TEXT("FRDGBuilder::SetupParallelExecute"), bParallelExecuteEnabled, [this] { SetupParallelExecute(); });
}
CompileTask.Wait();
{
SCOPE_CYCLE_COUNTER(STAT_RDG_CollectResourcesTime);
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(RDG_CollectResources);
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::CollectResources", FColor::Magenta);
UniformBuffersToCreate.Reserve(UniformBuffers.Num());
EnumerateExtendedLifetimeResources(Textures, [](FRDGTexture* Texture)
{
++Texture->ReferenceCount;
});
EnumerateExtendedLifetimeResources(Buffers, [](FRDGBuffer* Buffer)
{
++Buffer->ReferenceCount;
});
// Null out any culled external resources so that the reference is freed up.
for (const auto& Pair : ExternalTextures)
{
FRDGTexture* Texture = Pair.Value;
if (Texture->IsCulled())
{
EndResourceRHI(ProloguePassHandle, Texture, 0);
}
}
for (const auto& Pair : ExternalBuffers)
{
FRDGBuffer* Buffer = Pair.Value;
if (Buffer->IsCulled())
{
EndResourceRHI(ProloguePassHandle, Buffer, 0);
}
}
for (FRDGPassHandle PassHandle = Passes.Begin(); PassHandle < ProloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (!Pass->bCulled)
{
EndResourcesRHI(Pass, ProloguePassHandle);
}
}
for (FRDGPassHandle PassHandle = ProloguePassHandle; PassHandle <= EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (!Pass->bCulled)
{
BeginResourcesRHI(Pass, PassHandle);
EndResourcesRHI(Pass, PassHandle);
}
}
EnumerateExtendedLifetimeResources(Textures, [&](FRDGTextureRef Texture)
{
EndResourceRHI(EpiloguePassHandle, Texture, 1);
});
EnumerateExtendedLifetimeResources(Buffers, [&](FRDGBufferRef Buffer)
{
EndResourceRHI(EpiloguePassHandle, Buffer, 1);
});
if (TransientResourceAllocator)
{
#if RDG_ENABLE_TRACE
TransientResourceAllocator->Flush(RHICmdList, Trace.IsEnabled() ? &Trace.TransientAllocationStats : nullptr);
#else
TransientResourceAllocator->Flush(RHICmdList);
#endif
}
}
// We have to wait until after view creation to launch uploads because we can't lock / unlock while creating views simultaneously.
SubmitBufferUploadsTask = SubmitBufferUploads();
// Uniform buffer creation depends on view creation.
CreateUniformBuffersTask = CreateUniformBuffers();
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::CollectBarriers", FColor::Magenta);
SCOPE_CYCLE_COUNTER(STAT_RDG_CollectBarriersTime);
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDG_CollectBarriers, GRDGVerboseCSVStats != 0);
CompilePassBarriersTask.Wait();
for (FRDGPassHandle PassHandle = ProloguePassHandle + 1; PassHandle < EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (!Pass->bCulled && !Pass->bEmptyParameters)
{
CollectPassBarriers(Pass);
}
}
}
}
else
{
PrepareBufferUploads();
SubmitBufferUploads();
CreateUniformBuffers();
}
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::Finalize", FColor::Magenta);
EpilogueResourceAccesses.Reserve(Textures.Num() + Buffers.Num());
Textures.Enumerate([&](FRDGTextureRef Texture)
{
AddEpilogueTransition(Texture);
});
Buffers.Enumerate([&](FRDGBufferRef Buffer)
{
AddEpilogueTransition(Buffer);
});
}
const ENamedThreads::Type RenderThread = ENamedThreads::GetRenderThread_Local();
CreatePassBarriers([&]
{
CreateUniformBuffersTask.Wait();
if (SubmitBufferUploadsTask.IsValid())
{
SubmitBufferUploadsTask.Wait();
check(RHICmdListBufferUploads);
RHICmdList.QueueAsyncCommandListSubmit(RHICmdListBufferUploads);
RHICmdListBufferUploads = nullptr;
}
// Process RHI thread flush before helping with barrier compilation on the render thread.
EndFlushResourcesRHI();
});
UE::Tasks::FTask ParallelExecuteTask;
if (bParallelExecuteEnabled)
{
SetupParallelExecuteTask.Wait();
ParallelExecuteTask = CompilePipe.Launch(TEXT("DispatchParallelExecute"), [this] { DispatchParallelExecute(); }, LowLevelTasks::ETaskPriority::High);
}
IF_RDG_ENABLE_DEBUG(GRDGAllowRHIAccess = bParallelExecuteEnabled);
IF_RDG_ENABLE_TRACE(Trace.OutputGraphBegin());
if (!IsImmediateMode())
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::ExecutePasses", FColor::Magenta);
SCOPE_CYCLE_COUNTER(STAT_RDG_ExecuteTime);
CSV_SCOPED_TIMING_STAT_EXCLUSIVE(RDG_Execute);
for (FRDGPassHandle PassHandle = ProloguePassHandle; PassHandle <= EpiloguePassHandle; ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (Pass->bCulled)
{
#if RDG_STATS
GRDGStatPassCullCount++;
#endif
continue;
}
if (bParallelExecuteEnabled)
{
if (Pass->bParallelExecute)
{
#if RDG_CPU_SCOPES // CPU scopes are replayed on the render thread prior to executing the entire batch.
Pass->CPUScopeOps.Execute();
#endif
if (Pass->bParallelExecuteBegin)
{
FParallelPassSet& ParallelPassSet = ParallelPassSets[Pass->ParallelPassSetIndex];
// Busy wait until our pass set is ready. This will be set by the dispatch task.
while (!FPlatformAtomics::AtomicRead(&ParallelPassSet.bInitialized)) {};
FRHICommandListImmediate::ETranslatePriority TranslatePriority = ParallelPassSet.bParallelTranslate ? FRHICommandListImmediate::ETranslatePriority::Normal : FRHICommandListImmediate::ETranslatePriority::Disabled;
check(ParallelPassSet.CmdList != nullptr);
RHICmdList.QueueAsyncCommandListSubmit(MakeArrayView<FRHICommandListImmediate::FQueuedCommandList>(&ParallelPassSet, 1), TranslatePriority);
IF_RHI_WANT_BREADCRUMB_EVENTS(RHICmdList.ImportBreadcrumbState(*ParallelPassSet.BreadcrumbStateEnd));
if (ParallelPassSet.bDispatchAfterExecute)
{
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
}
}
continue;
}
}
else if (!Pass->bSentinel)
{
CompilePassOps(Pass);
}
ExecutePass(Pass, RHICmdList);
}
}
else
{
ExecutePass(EpiloguePass, RHICmdList);
}
RHICmdList.SetStaticUniformBuffers({});
#if WITH_MGPU
if (bForceCopyCrossGPU)
{
ForceCopyCrossGPU();
}
#endif
RHICmdList.SetTrackedAccess(EpilogueResourceAccesses);
// Wait for the parallel dispatch task before attempting to wait on the execute event array (the former mutates the array).
ParallelExecuteTask.Wait();
// Wait on the actual parallel execute tasks in the Execute call. When draining is okay to let them overlap with other graph setup.
// This also needs to be done before extraction of external resources to be consistent with non-parallel rendering.
if (!ParallelExecuteEvents.IsEmpty())
{
UE::Tasks::Wait(ParallelExecuteEvents);
ParallelExecuteEvents.Empty();
}
for (const FExtractedTexture& ExtractedTexture : ExtractedTextures)
{
check(ExtractedTexture.Texture->RenderTarget);
*ExtractedTexture.PooledTexture = ExtractedTexture.Texture->RenderTarget;
}
for (const FExtractedBuffer& ExtractedBuffer : ExtractedBuffers)
{
check(ExtractedBuffer.Buffer->PooledBuffer);
*ExtractedBuffer.PooledBuffer = ExtractedBuffer.Buffer->PooledBuffer;
}
IF_RDG_ENABLE_TRACE(Trace.OutputGraphEnd(*this));
GPUScopeStacks.Graphics.EndExecute(RHICmdList, ERHIPipeline::Graphics);
GPUScopeStacks.AsyncCompute.EndExecute(RHICmdList, ERHIPipeline::AsyncCompute);
IF_RDG_CPU_SCOPES(CPUScopeStacks.EndExecute());
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExecuteEnd());
IF_RDG_ENABLE_DEBUG(GRDGAllowRHIAccess = false);
#if RDG_STATS
GRDGStatBufferCount += Buffers.Num();
GRDGStatTextureCount += Textures.Num();
GRDGStatViewCount += Views.Num();
GRDGStatMemoryWatermark = FMath::Max(GRDGStatMemoryWatermark, Allocator.GetByteCount());
#endif
RasterPassCount = 0;
AsyncComputePassCount = 0;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::MarkResourcesAsProduced(FRDGPass* Pass)
{
const auto MarkAsProduced = [&](FRDGViewableResource* Resource)
{
Resource->bProduced = true;
};
const auto MarkAsProducedIfWritable = [&](FRDGViewableResource* Resource, ERHIAccess Access)
{
if (IsWritableAccess(Access))
{
Resource->bProduced = true;
}
};
Pass->GetParameters().Enumerate([&](FRDGParameter Parameter)
{
switch (Parameter.GetType())
{
case UBMT_RDG_TEXTURE_UAV:
if (FRDGTextureUAV* UAV = Parameter.GetAsTextureUAV())
{
MarkAsProduced(UAV->GetParent());
}
break;
case UBMT_RDG_BUFFER_UAV:
if (FRDGBufferUAV* UAV = Parameter.GetAsBufferUAV())
{
MarkAsProduced(UAV->GetParent());
}
break;
case UBMT_RDG_TEXTURE_ACCESS:
{
if (FRDGTextureAccess TextureAccess = Parameter.GetAsTextureAccess())
{
MarkAsProducedIfWritable(TextureAccess.GetTexture(), TextureAccess.GetAccess());
}
}
break;
case UBMT_RDG_TEXTURE_ACCESS_ARRAY:
{
const FRDGTextureAccessArray& TextureAccessArray = Parameter.GetAsTextureAccessArray();
for (FRDGTextureAccess TextureAccess : TextureAccessArray)
{
MarkAsProducedIfWritable(TextureAccess.GetTexture(), TextureAccess.GetAccess());
}
}
break;
case UBMT_RDG_BUFFER_ACCESS:
if (FRDGBufferAccess BufferAccess = Parameter.GetAsBufferAccess())
{
MarkAsProducedIfWritable(BufferAccess.GetBuffer(), BufferAccess.GetAccess());
}
break;
case UBMT_RDG_BUFFER_ACCESS_ARRAY:
{
const FRDGBufferAccessArray& BufferAccessArray = Parameter.GetAsBufferAccessArray();
for (FRDGBufferAccess BufferAccess : BufferAccessArray)
{
MarkAsProducedIfWritable(BufferAccess.GetBuffer(), BufferAccess.GetAccess());
}
}
break;
case UBMT_RENDER_TARGET_BINDING_SLOTS:
{
const FRenderTargetBindingSlots& RenderTargets = Parameter.GetAsRenderTargetBindingSlots();
RenderTargets.Enumerate([&](FRenderTargetBinding RenderTarget)
{
MarkAsProduced(RenderTarget.GetTexture());
if (FRDGTexture* ResolveTexture = RenderTarget.GetResolveTexture())
{
MarkAsProduced(ResolveTexture);
}
});
const FDepthStencilBinding& DepthStencil = RenderTargets.DepthStencil;
if (DepthStencil.GetDepthStencilAccess().IsAnyWrite())
{
MarkAsProduced(DepthStencil.GetTexture());
}
}
break;
}
});
}
void FRDGBuilder::SetupPassDependencies(FRDGPass* Pass)
{
for (auto& PassState : Pass->TextureStates)
{
FRDGTextureRef Texture = PassState.Texture;
auto& LastProducers = Texture->LastProducers;
Texture->ReferenceCount += PassState.ReferenceCount;
for (uint32 Index = 0, Count = LastProducers.Num(); Index < Count; ++Index)
{
const auto& SubresourceState = PassState.State[Index];
if (SubresourceState.Access == ERHIAccess::Unknown)
{
continue;
}
FRDGProducerState ProducerState;
ProducerState.Pass = Pass;
ProducerState.Access = SubresourceState.Access;
ProducerState.NoUAVBarrierHandle = SubresourceState.NoUAVBarrierFilter.GetUniqueHandle();
AddCullingDependency(LastProducers[Index], ProducerState, Pass->Pipeline);
}
}
for (auto& PassState : Pass->BufferStates)
{
FRDGBufferRef Buffer = PassState.Buffer;
const auto& SubresourceState = PassState.State;
Buffer->ReferenceCount += PassState.ReferenceCount;
FRDGProducerState ProducerState;
ProducerState.Pass = Pass;
ProducerState.Access = SubresourceState.Access;
ProducerState.NoUAVBarrierHandle = SubresourceState.NoUAVBarrierFilter.GetUniqueHandle();
AddCullingDependency(Buffer->LastProducer, ProducerState, Pass->Pipeline);
}
const bool bCullPasses = GRDGCullPasses > 0;
Pass->bCulled = bCullPasses;
if (bCullPasses && (Pass->bHasExternalOutputs || EnumHasAnyFlags(Pass->Flags, ERDGPassFlags::NeverCull)))
{
CullPassStack.Emplace(Pass->Handle);
}
}
void FRDGBuilder::SetupPassResources(FRDGPass* Pass)
{
const FRDGParameterStruct PassParameters = Pass->GetParameters();
const FRDGPassHandle PassHandle = Pass->Handle;
const ERDGPassFlags PassFlags = Pass->Flags;
const ERHIPipeline PassPipeline = Pass->Pipeline;
bool bRenderPassOnlyWrites = true;
const auto TryAddView = [&](FRDGViewRef View)
{
if (View && View->LastPass != PassHandle)
{
View->LastPass = PassHandle;
Pass->Views.Add(View->Handle);
}
};
Pass->Views.Reserve(PassParameters.GetBufferParameterCount() + PassParameters.GetTextureParameterCount());
Pass->TextureStates.Reserve(PassParameters.GetTextureParameterCount() + (PassParameters.HasRenderTargets() ? (MaxSimultaneousRenderTargets + 1) : 0));
EnumerateTextureAccess(PassParameters, PassFlags, [&](FRDGViewRef TextureView, FRDGTextureRef Texture, ERHIAccess Access, ERDGTextureAccessFlags AccessFlags, FRDGTextureSubresourceRange Range)
{
TryAddView(TextureView);
if (Texture->AccessModeState.IsExternalAccess() && !Pass->bExternalAccessPass)
{
// Resources in external access mode are expected to remain in the same state and are ignored by the graph.
// As only External | Extracted resources can be set as external by the user, the graph doesn't need to track
// them any more for culling / transition purposes. Validation checks that these invariants are true.
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExternalAccess(Texture, Access, Pass));
return;
}
const FRDGViewHandle NoUAVBarrierHandle = GetHandleIfNoUAVBarrier(TextureView);
const EResourceTransitionFlags TransitionFlags = GetTextureViewTransitionFlags(TextureView, Texture);
FRDGPass::FTextureState* PassState;
if (Texture->LastPass != PassHandle)
{
Texture->LastPass = PassHandle;
Texture->PassStateIndex = static_cast<uint16>(Pass->TextureStates.Num());
PassState = &Pass->TextureStates.Emplace_GetRef(Texture);
}
else
{
PassState = &Pass->TextureStates[Texture->PassStateIndex];
}
PassState->ReferenceCount++;
EnumerateSubresourceRange(PassState->State, Texture->Layout, Range, [&](FRDGSubresourceState& State)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateAddSubresourceAccess(Texture, State, Access));
State.Access = MakeValidAccess(State.Access, Access);
State.Flags |= TransitionFlags;
State.NoUAVBarrierFilter.AddHandle(NoUAVBarrierHandle);
State.SetPass(PassPipeline, PassHandle);
});
if (IsWritableAccess(Access))
{
bRenderPassOnlyWrites &= EnumHasAnyFlags(AccessFlags, ERDGTextureAccessFlags::RenderTarget);
// When running in parallel this is set via MarkResourcesAsProduced. We also can't touch this as its a bitfield and not atomic.
if (!bParallelSetupEnabled)
{
Texture->bProduced = true;
}
}
});
Pass->BufferStates.Reserve(PassParameters.GetBufferParameterCount());
EnumerateBufferAccess(PassParameters, PassFlags, [&](FRDGViewRef BufferView, FRDGBufferRef Buffer, ERHIAccess Access)
{
TryAddView(BufferView);
if (Buffer->AccessModeState.IsExternalAccess() && !Pass->bExternalAccessPass)
{
// Resources in external access mode are expected to remain in the same state and are ignored by the graph.
// As only External | Extracted resources can be set as external by the user, the graph doesn't need to track
// them any more for culling / transition purposes. Validation checks that these invariants are true.
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExternalAccess(Buffer, Access, Pass));
return;
}
const FRDGViewHandle NoUAVBarrierHandle = GetHandleIfNoUAVBarrier(BufferView);
FRDGPass::FBufferState* PassState;
if (Buffer->LastPass != PassHandle)
{
Buffer->LastPass = PassHandle;
Buffer->PassStateIndex = Pass->BufferStates.Num();
PassState = &Pass->BufferStates.Emplace_GetRef(Buffer);
}
else
{
PassState = &Pass->BufferStates[Buffer->PassStateIndex];
}
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateAddSubresourceAccess(Buffer, PassState->State, Access));
PassState->ReferenceCount++;
PassState->State.Access = MakeValidAccess(PassState->State.Access, Access);
PassState->State.NoUAVBarrierFilter.AddHandle(NoUAVBarrierHandle);
PassState->State.SetPass(PassPipeline, PassHandle);
if (IsWritableAccess(Access))
{
bRenderPassOnlyWrites = false;
// When running in parallel this is set via MarkResourcesAsProduced. We also can't touch this as its a bitfield and not atomic.
if (!bParallelSetupEnabled)
{
Buffer->bProduced = true;
}
}
});
Pass->bEmptyParameters = !Pass->TextureStates.Num() && !Pass->BufferStates.Num();
Pass->bRenderPassOnlyWrites = bRenderPassOnlyWrites;
Pass->bHasExternalOutputs = PassParameters.HasExternalOutputs();
Pass->UniformBuffers.Reserve(PassParameters.GetUniformBufferParameterCount());
PassParameters.EnumerateUniformBuffers([&](FRDGUniformBufferBinding UniformBuffer)
{
Pass->UniformBuffers.Emplace(UniformBuffer.GetUniformBuffer()->Handle);
});
if (bParallelSetupEnabled)
{
SetupPassDependencies(Pass);
for (FRDGPass::FExternalAccessOp Op : Pass->ExternalAccessOps)
{
Op.Resource->AccessModeState.ActiveMode = Op.Mode;
}
}
}
void FRDGBuilder::SetupPassInternals(FRDGPass* Pass)
{
const FRDGPassHandle PassHandle = Pass->Handle;
const ERDGPassFlags PassFlags = Pass->Flags;
const ERHIPipeline PassPipeline = Pass->Pipeline;
Pass->GraphicsForkPass = PassHandle;
Pass->GraphicsJoinPass = PassHandle;
Pass->PrologueBarrierPass = PassHandle;
Pass->EpilogueBarrierPass = PassHandle;
if (Pass->Pipeline == ERHIPipeline::Graphics)
{
Pass->ResourcesToBegin.Add(Pass);
Pass->ResourcesToEnd.Add(Pass);
}
AsyncComputePassCount += EnumHasAnyFlags(PassFlags, ERDGPassFlags::AsyncCompute) ? 1 : 0;
RasterPassCount += EnumHasAnyFlags(PassFlags, ERDGPassFlags::Raster) ? 1 : 0;
#if WITH_MGPU
Pass->GPUMask = RHICmdList.GetGPUMask();
#endif
#if STATS
Pass->CommandListStat = CommandListStatScope;
#endif
#if RDG_STATS
GRDGStatPassCount++;
#endif
IF_RDG_CPU_SCOPES(Pass->CPUScopes = CPUScopeStacks.GetCurrentScopes());
Pass->GPUScopes = GPUScopeStacks.GetCurrentScopes(PassPipeline);
#if RDG_GPU_DEBUG_SCOPES && RDG_ENABLE_TRACE
Pass->TraceEventScope = GPUScopeStacks.GetCurrentScopes(ERHIPipeline::Graphics).Event;
#endif
#if RDG_GPU_DEBUG_SCOPES && RDG_ENABLE_DEBUG
if (GRDGValidation != 0)
{
if (const FRDGEventScope* Scope = Pass->GPUScopes.Event)
{
Pass->FullPathIfDebug = Scope->GetPath(Pass->Name);
}
}
#endif
}
void FRDGBuilder::SetupAuxiliaryPasses(FRDGPass* Pass)
{
if (IsImmediateMode() && !Pass->bSentinel)
{
SCOPED_NAMED_EVENT(FRDGBuilder_ExecutePass, FColor::Emerald);
RDG_ALLOW_RHI_ACCESS_SCOPE();
// Trivially redirect the merge states to the pass states, since we won't be compiling the graph.
for (auto& PassState : Pass->TextureStates)
{
const uint32 SubresourceCount = PassState.State.Num();
PassState.MergeState.SetNum(SubresourceCount);
for (uint32 Index = 0; Index < SubresourceCount; ++Index)
{
if (PassState.State[Index].Access != ERHIAccess::Unknown)
{
PassState.MergeState[Index] = &PassState.State[Index];
}
}
}
for (auto& PassState : Pass->BufferStates)
{
PassState.MergeState = &PassState.State;
}
check(!EnumHasAnyFlags(Pass->Pipeline, ERHIPipeline::AsyncCompute));
check(ParallelSetupEvents.IsEmpty());
PrepareBufferUploads();
SubmitBufferUploads();
CompilePassOps(Pass);
BeginResourcesRHI(Pass, Pass->Handle);
CreateUniformBuffers();
CollectPassBarriers(Pass);
CreatePassBarriers([] {});
if (!ParallelSetupEvents.IsEmpty())
{
UE::Tasks::Wait(ParallelSetupEvents);
ParallelSetupEvents.Reset();
}
ExecutePass(Pass, RHICmdList);
}
IF_RDG_ENABLE_DEBUG(VisualizePassOutputs(Pass));
#if RDG_DUMP_RESOURCES
DumpResourcePassOutputs(Pass);
#endif
}
FRDGPass* FRDGBuilder::SetupParameterPass(FRDGPass* Pass)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateAddPass(Pass, AuxiliaryPasses.IsActive()));
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDGBuilder_SetupPass, GRDGVerboseCSVStats != 0);
SetupPassInternals(Pass);
if (bParallelSetupEnabled)
{
MarkResourcesAsProduced(Pass);
SetupPassQueue.Push(Pass);
}
else
{
SetupPassResources(Pass);
}
SetupAuxiliaryPasses(Pass);
return Pass;
}
FRDGPass* FRDGBuilder::SetupEmptyPass(FRDGPass* Pass)
{
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateAddPass(Pass, AuxiliaryPasses.IsActive()));
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDGBuilder_SetupPass, GRDGVerboseCSVStats != 0);
Pass->bEmptyParameters = true;
SetupPassInternals(Pass);
SetupAuxiliaryPasses(Pass);
return Pass;
}
void FRDGBuilder::CompilePassOps(FRDGPass* Pass)
{
#if WITH_MGPU
FRHIGPUMask GPUMask = Pass->GPUMask;
#else
FRHIGPUMask GPUMask = FRHIGPUMask::All();
#endif
#if RDG_CMDLIST_STATS
if (CommandListStatState != Pass->CommandListStat && !Pass->bSentinel)
{
CommandListStatState = Pass->CommandListStat;
Pass->bSetCommandListStat = 1;
}
#endif
#if RDG_CPU_SCOPES
Pass->CPUScopeOps = CPUScopeStacks.CompilePassPrologue(Pass);
#endif
Pass->GPUScopeOpsPrologue = GPUScopeStacks.CompilePassPrologue(Pass, GPUMask);
Pass->GPUScopeOpsEpilogue = GPUScopeStacks.CompilePassEpilogue(Pass);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::PrepareBufferUploads()
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::PrepareBufferUploads", FColor::Magenta);
for (FUploadedBuffer& UploadedBuffer : UploadedBuffers)
{
FRDGBuffer* Buffer = UploadedBuffer.Buffer;
if (!Buffer->HasRHI())
{
BeginResourceRHI(GetProloguePassHandle(), Buffer);
}
check(UploadedBuffer.DataSize <= Buffer->Desc.GetSize());
}
}
UE::Tasks::FTask FRDGBuilder::SubmitBufferUploads()
{
const auto SubmitUploadsLambda = [this](FRHICommandList& RHICmdListUpload)
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::SubmitBufferUploads", FColor::Magenta);
for (FUploadedBuffer& UploadedBuffer : UploadedBuffers)
{
if (UploadedBuffer.bUseDataCallbacks)
{
UploadedBuffer.Data = UploadedBuffer.DataCallback();
UploadedBuffer.DataSize = UploadedBuffer.DataSizeCallback();
}
if (UploadedBuffer.Data && UploadedBuffer.DataSize)
{
void* DestPtr = RHICmdListUpload.LockBuffer(UploadedBuffer.Buffer->GetRHIUnchecked(), 0, UploadedBuffer.DataSize, RLM_WriteOnly);
FMemory::Memcpy(DestPtr, UploadedBuffer.Data, UploadedBuffer.DataSize);
RHICmdListUpload.UnlockBuffer(UploadedBuffer.Buffer->GetRHIUnchecked());
if (UploadedBuffer.bUseFreeCallbacks)
{
UploadedBuffer.DataFreeCallback(UploadedBuffer.Data);
}
}
}
UploadedBuffers.Reset();
};
if (bParallelSetupEnabled)
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::SubmitBufferUploads", FColor::Magenta);
return UE::Tasks::Launch(TEXT("FRDGBuilder::SubmitBufferUploads"), [this, SubmitUploadsLambda]
{
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
RHICmdListBufferUploads = new FRHICommandList(FRHIGPUMask::All());
RHICmdListBufferUploads->SwitchPipeline(ERHIPipeline::Graphics);
SubmitUploadsLambda(*RHICmdListBufferUploads);
RHICmdListBufferUploads->FinishRecording();
}, LowLevelTasks::ETaskPriority::High);
}
else
{
SubmitUploadsLambda(RHICmdList);
return {};
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::SetupParallelExecute()
{
SCOPED_NAMED_EVENT(SetupParallelExecute, FColor::Emerald);
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
TArray<FRDGPass*, TInlineAllocator<64, FRDGArrayAllocator>> ParallelPassCandidates;
uint32 ParallelPassCandidatesWorkload = 0;
bool bDispatchAfterExecute = false;
bool bParallelTranslate = false;
const auto FlushParallelPassCandidates = [&]()
{
if (ParallelPassCandidates.IsEmpty())
{
return;
}
int32 PassBeginIndex = 0;
int32 PassEndIndex = ParallelPassCandidates.Num();
// It's possible that the first pass is inside a merged RHI render pass region. If so, we must push it forward until after the render pass ends.
if (const FRDGPass* FirstPass = ParallelPassCandidates[PassBeginIndex]; FirstPass->PrologueBarrierPass < FirstPass->Handle)
{
const FRDGPass* EpilogueBarrierPass = Passes[FirstPass->EpilogueBarrierPass];
for (; PassBeginIndex < ParallelPassCandidates.Num(); ++PassBeginIndex)
{
if (ParallelPassCandidates[PassBeginIndex] == EpilogueBarrierPass)
{
++PassBeginIndex;
break;
}
}
}
if (PassBeginIndex < PassEndIndex)
{
// It's possible that the last pass is inside a merged RHI render pass region. If so, we must push it backwards until after the render pass begins.
if (FRDGPass* LastPass = ParallelPassCandidates.Last(); LastPass->EpilogueBarrierPass > LastPass->Handle)
{
FRDGPass* PrologueBarrierPass = Passes[LastPass->PrologueBarrierPass];
while (PassEndIndex > PassBeginIndex)
{
if (ParallelPassCandidates[--PassEndIndex] == PrologueBarrierPass)
{
break;
}
}
}
}
const int32 ParallelPassCandidateCount = PassEndIndex - PassBeginIndex;
if (ParallelPassCandidateCount >= GRDGParallelExecutePassMin)
{
FRDGPass* PassBegin = ParallelPassCandidates[PassBeginIndex];
PassBegin->bParallelExecuteBegin = 1;
PassBegin->ParallelPassSetIndex = ParallelPassSets.Num();
FRDGPass* PassEnd = ParallelPassCandidates[PassEndIndex - 1];
PassEnd->bParallelExecuteEnd = 1;
PassEnd->ParallelPassSetIndex = ParallelPassSets.Num();
for (int32 PassIndex = PassBeginIndex; PassIndex < PassEndIndex; ++PassIndex)
{
ParallelPassCandidates[PassIndex]->bParallelExecute = 1;
}
FParallelPassSet& ParallelPassSet = ParallelPassSets.Emplace_GetRef();
ParallelPassSet.Passes.Append(ParallelPassCandidates.GetData() + PassBeginIndex, ParallelPassCandidateCount);
ParallelPassSet.bDispatchAfterExecute = bDispatchAfterExecute;
ParallelPassSet.bParallelTranslate = bParallelTranslate;
}
ParallelPassCandidates.Reset();
ParallelPassCandidatesWorkload = 0;
bDispatchAfterExecute = false;
bParallelTranslate = false;
};
ParallelPassSets.Reserve(32);
ParallelPassCandidates.Emplace(ProloguePass);
for (FRDGPassHandle PassHandle = GetProloguePassHandle() + 1; PassHandle < GetEpiloguePassHandle(); ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (Pass->bCulled)
{
continue;
}
CompilePassOps(Pass);
if (!Pass->bParallelExecuteAllowed)
{
FlushParallelPassCandidates();
continue;
}
bool bPassHasParallelTranslate = EnumHasAnyFlags(Pass->Flags, ERDGPassFlags::ParallelTranslate);
if (bParallelTranslate != bPassHasParallelTranslate)
{
FlushParallelPassCandidates();
}
bDispatchAfterExecute |= Pass->bDispatchAfterExecute;
bParallelTranslate |= bPassHasParallelTranslate;
ParallelPassCandidates.Emplace(Pass);
ParallelPassCandidatesWorkload += Pass->Workload;
if (ParallelPassCandidatesWorkload >= (uint32)GRDGParallelExecutePassMax)
{
FlushParallelPassCandidates();
}
}
ParallelPassCandidates.Emplace(EpiloguePass);
FlushParallelPassCandidates();
#if RHI_WANT_BREADCRUMB_EVENTS
SCOPED_NAMED_EVENT(BreadcrumbSetup, FColor::Emerald);
for (FRDGPassHandle PassHandle = GetProloguePassHandle(); PassHandle <= GetEpiloguePassHandle(); ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
if (Pass->bCulled)
{
continue;
}
if (Pass->bParallelExecuteBegin)
{
FParallelPassSet& ParallelPassSet = ParallelPassSets[Pass->ParallelPassSetIndex];
ParallelPassSet.BreadcrumbStateBegin = BreadcrumbState->Copy(Allocator);
ParallelPassSet.BreadcrumbStateEnd = ParallelPassSet.BreadcrumbStateBegin;
}
Pass->GPUScopeOpsPrologue.Event.Execute(*BreadcrumbState);
Pass->GPUScopeOpsEpilogue.Event.Execute(*BreadcrumbState);
if (Pass->bParallelExecuteEnd)
{
FParallelPassSet& ParallelPassSet = ParallelPassSets[Pass->ParallelPassSetIndex];
if (ParallelPassSet.BreadcrumbStateEnd->Version != BreadcrumbState->Version)
{
ParallelPassSet.BreadcrumbStateEnd = BreadcrumbState->Copy(Allocator);
}
}
}
#endif
}
void FRDGBuilder::DispatchParallelExecute()
{
SCOPED_NAMED_EVENT(DispatchParallelExecute, FColor::Emerald);
check(ParallelExecuteEvents.IsEmpty());
ParallelExecuteEvents.Reserve(ParallelPassSets.Num());
for (FParallelPassSet& ParallelPassSet : ParallelPassSets)
{
FRHICommandList* RHICmdListPass = new FRHICommandList(FRHIGPUMask::All());
ParallelPassSet.CmdList = RHICmdListPass;
IF_RHI_WANT_BREADCRUMB_EVENTS(RHICmdListPass->ImportBreadcrumbState(*ParallelPassSet.BreadcrumbStateBegin));
// Mark this set as initialized so that it can be submitted.
FPlatformAtomics::AtomicStore(&ParallelPassSet.bInitialized, 1);
ParallelExecuteEvents.Emplace(UE::Tasks::Launch(TEXT("FRDGBuilder::ParallelExecute"), [this, &ParallelPassSet, RHICmdListPass]
{
SCOPED_NAMED_EVENT(ParallelExecute, FColor::Emerald);
FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
for (FRDGPass* Pass : ParallelPassSet.Passes)
{
ExecutePass(Pass, *RHICmdListPass);
}
RHICmdListPass->FinishRecording();
}, LowLevelTasks::ETaskPriority::High));
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
UE::Tasks::FTask FRDGBuilder::CreateUniformBuffers()
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::CreateUniformBuffers", FColor::Magenta);
const int32 ParallelDispatchThreshold = 4;
const auto CreateUniformBuffersFunction = [this]
{
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::CreateUniformBuffers", FColor::Magenta);
for (FRDGUniformBufferHandle UniformBufferHandle : UniformBuffersToCreate)
{
UniformBuffers[UniformBufferHandle]->InitRHI();
}
UniformBuffersToCreate.Reset();
};
UE::Tasks::FTask Task;
if (bParallelSetupEnabled && UniformBuffersToCreate.Num() > ParallelDispatchThreshold)
{
Task = UE::Tasks::Launch(TEXT("FRDGBuilder::CreateUniformBuffer"),
[CreateUniformBuffersFunction]
{
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
CreateUniformBuffersFunction();
}, LowLevelTasks::ETaskPriority::High);
}
else
{
CreateUniformBuffersFunction();
}
return Task;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::AddProloguePass()
{
ProloguePass = SetupEmptyPass(Passes.Allocate<FRDGSentinelPass>(Allocator, RDG_EVENT_NAME("Graph Prologue (Graphics)")));
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::ExecutePassPrologue(FRHIComputeCommandList& RHICmdListPass, FRDGPass* Pass)
{
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDGBuilder_ExecutePassPrologue, GRDGVerboseCSVStats != 0);
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExecutePassBegin(Pass));
#if RDG_CMDLIST_STATS
if (Pass->bSetCommandListStat)
{
RHICmdListPass.SetCurrentStat(Pass->CommandListStat);
}
#endif
const ERDGPassFlags PassFlags = Pass->Flags;
const ERHIPipeline PassPipeline = Pass->Pipeline;
if (Pass->PrologueBarriersToBegin)
{
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchBegin(Pass, *Pass->PrologueBarriersToBegin));
Pass->PrologueBarriersToBegin->Submit(RHICmdListPass, PassPipeline);
}
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchEnd(Pass, Pass->PrologueBarriersToEnd));
Pass->PrologueBarriersToEnd.Submit(RHICmdListPass, PassPipeline);
if (PassPipeline == ERHIPipeline::AsyncCompute && !Pass->bSentinel && AsyncComputeBudgetState != Pass->AsyncComputeBudget)
{
AsyncComputeBudgetState = Pass->AsyncComputeBudget;
RHICmdListPass.SetAsyncComputeBudget(Pass->AsyncComputeBudget);
}
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::Raster))
{
if (!EnumHasAnyFlags(PassFlags, ERDGPassFlags::SkipRenderPass) && !Pass->SkipRenderPassBegin())
{
static_cast<FRHICommandList&>(RHICmdListPass).BeginRenderPass(Pass->GetParameters().GetRenderPassInfo(), Pass->GetName());
}
}
BeginUAVOverlap(Pass, RHICmdListPass);
}
void FRDGBuilder::ExecutePassEpilogue(FRHIComputeCommandList& RHICmdListPass, FRDGPass* Pass)
{
CSV_SCOPED_TIMING_STAT_EXCLUSIVE_CONDITIONAL(RDGBuilder_ExecutePassEpilogue, GRDGVerboseCSVStats != 0);
EndUAVOverlap(Pass, RHICmdListPass);
const ERDGPassFlags PassFlags = Pass->Flags;
const ERHIPipeline PassPipeline = Pass->Pipeline;
const FRDGParameterStruct PassParameters = Pass->GetParameters();
if (EnumHasAnyFlags(PassFlags, ERDGPassFlags::Raster) && !EnumHasAnyFlags(PassFlags, ERDGPassFlags::SkipRenderPass) && !Pass->SkipRenderPassEnd())
{
static_cast<FRHICommandList&>(RHICmdListPass).EndRenderPass();
}
FRDGTransitionQueue Transitions;
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchBegin(Pass, Pass->EpilogueBarriersToBeginForGraphics));
Pass->EpilogueBarriersToBeginForGraphics.Submit(RHICmdListPass, PassPipeline, Transitions);
if (Pass->EpilogueBarriersToBeginForAsyncCompute)
{
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchBegin(Pass, *Pass->EpilogueBarriersToBeginForAsyncCompute));
Pass->EpilogueBarriersToBeginForAsyncCompute->Submit(RHICmdListPass, PassPipeline, Transitions);
}
if (Pass->EpilogueBarriersToBeginForAll)
{
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchBegin(Pass, *Pass->EpilogueBarriersToBeginForAll));
Pass->EpilogueBarriersToBeginForAll->Submit(RHICmdListPass, PassPipeline, Transitions);
}
for (FRDGBarrierBatchBegin* BarriersToBegin : Pass->SharedEpilogueBarriersToBegin)
{
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchBegin(Pass, *BarriersToBegin));
BarriersToBegin->Submit(RHICmdListPass, PassPipeline, Transitions);
}
if (!Transitions.IsEmpty())
{
RHICmdListPass.BeginTransitions(Transitions);
}
if (Pass->EpilogueBarriersToEnd)
{
IF_RDG_ENABLE_DEBUG(BarrierValidation.ValidateBarrierBatchEnd(Pass, *Pass->EpilogueBarriersToEnd));
Pass->EpilogueBarriersToEnd->Submit(RHICmdListPass, PassPipeline);
}
IF_RDG_ENABLE_DEBUG(UserValidation.ValidateExecutePassEnd(Pass));
}
void FRDGBuilder::ExecutePass(FRDGPass* Pass, FRHIComputeCommandList& RHICmdListPass)
{
{
// Note that we must do this before doing anything with RHICmdList for the pass.
// For example, if this pass only executes on GPU 1 we want to avoid adding a
// 0-duration event for this pass on GPU 0's time line.
SCOPED_GPU_MASK(RHICmdListPass, Pass->GPUMask);
// Extra scope here to ensure nested ordering of SCOPED_GPU_MASK and FRHICommandListScopedPipeline constructor/destructors
{
FRHICommandListScopedPipeline Scope(RHICmdListPass, Pass->Pipeline);
#if 0 // Disabled by default to reduce memory usage in Insights.
SCOPED_NAMED_EVENT_TCHAR(Pass->GetName(), FColor::Magenta);
#endif
#if RDG_CPU_SCOPES
if (!Pass->bParallelExecute)
{
Pass->CPUScopeOps.Execute();
}
#endif
IF_RDG_ENABLE_DEBUG(ConditionalDebugBreak(RDG_BREAKPOINT_PASS_EXECUTE, BuilderName.GetTCHAR(), Pass->GetName()));
Pass->GPUScopeOpsPrologue.Execute(RHICmdListPass);
ExecutePassPrologue(RHICmdListPass, Pass);
#if RDG_DUMP_RESOURCES_AT_EACH_DRAW
BeginPassDump(Pass);
#endif
Pass->Execute(RHICmdListPass);
#if RDG_DUMP_RESOURCES_AT_EACH_DRAW
EndPassDump(Pass);
#endif
ExecutePassEpilogue(RHICmdListPass, Pass);
Pass->GPUScopeOpsEpilogue.Execute(RHICmdListPass);
}
}
if (!Pass->bParallelExecute && Pass->bDispatchAfterExecute)
{
if (Pass->Pipeline == ERHIPipeline::Graphics)
{
RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
}
}
if (GRDGDebugFlushGPU)
{
check(!GRDGAsyncCompute && !bParallelExecuteEnabled);
RHICmdList.SubmitCommandsAndFlushGPU();
RHICmdList.BlockUntilGPUIdle();
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::BeginResourcesRHI(FRDGPass* ResourcePass, FRDGPassHandle ExecutePassHandle)
{
for (FRDGPass* PassToBegin : ResourcePass->ResourcesToBegin)
{
for (const auto& PassState : PassToBegin->TextureStates)
{
BeginResourceRHI(ExecutePassHandle, PassState.Texture);
}
for (const auto& PassState : PassToBegin->BufferStates)
{
BeginResourceRHI(ExecutePassHandle, PassState.Buffer);
}
for (FRDGUniformBufferHandle UniformBufferHandle : PassToBegin->UniformBuffers)
{
if (FRDGUniformBuffer* UniformBuffer = UniformBuffers[UniformBufferHandle]; !UniformBuffer->bQueuedForCreate)
{
UniformBuffer->bQueuedForCreate = true;
UniformBuffersToCreate.Add(UniformBufferHandle);
}
}
for (FRDGViewHandle ViewHandle : PassToBegin->Views)
{
InitRHI(Views[ViewHandle]);
}
}
}
void FRDGBuilder::EndResourcesRHI(FRDGPass* ResourcePass, FRDGPassHandle ExecutePassHandle)
{
for (FRDGPass* PassToEnd : ResourcePass->ResourcesToEnd)
{
for (const auto& PassState : PassToEnd->TextureStates)
{
EndResourceRHI(ExecutePassHandle, PassState.Texture, PassState.ReferenceCount);
}
for (const auto& PassState : PassToEnd->BufferStates)
{
EndResourceRHI(ExecutePassHandle, PassState.Buffer, PassState.ReferenceCount);
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::CollectPassBarriers(FRDGPass* Pass)
{
IF_RDG_ENABLE_DEBUG(ConditionalDebugBreak(RDG_BREAKPOINT_PASS_COMPILE, BuilderName.GetTCHAR(), Pass->GetName()));
for (auto& PassState : Pass->TextureStates)
{
FRDGTextureRef Texture = PassState.Texture;
AddTransition(Pass->Handle, Texture, PassState.MergeState);
IF_RDG_ENABLE_TRACE(Trace.AddTexturePassDependency(Texture, Pass));
}
for (auto& PassState : Pass->BufferStates)
{
FRDGBufferRef Buffer = PassState.Buffer;
AddTransition(Pass->Handle, Buffer, *PassState.MergeState);
IF_RDG_ENABLE_TRACE(Trace.AddBufferPassDependency(Buffer, Pass));
}
}
void FRDGBuilder::CreatePassBarriers(TFunctionRef<void()> PreWork)
{
const int32 NumBarriersPerTask = 32;
if (bParallelSetupEnabled && TransitionCreateQueue.Num() > NumBarriersPerTask)
{
ParallelForWithPreWork(TEXT("FRDGBuilder::CreatePassBarriers"), TransitionCreateQueue.Num(), NumBarriersPerTask, [this](int32 Index)
{
TransitionCreateQueue[Index]->CreateTransition();
}, PreWork);
}
else
{
PreWork();
SCOPED_NAMED_EVENT_TEXT("FRDGBuilder::CreatePassBarriers", FColor::Magenta);
for (FRDGBarrierBatchBegin* BeginBatch : TransitionCreateQueue)
{
BeginBatch->CreateTransition();
}
}
TransitionCreateQueue.Reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::AddEpilogueTransition(FRDGTextureRef Texture)
{
if (!Texture->HasRHI() || Texture->IsCulled() || !Texture->bLastOwner)
{
return;
}
check(IsImmediateMode() || Texture->bExtracted || Texture->ReferenceCount == FRDGViewableResource::DeallocatedReferenceCount);
if (!EnumHasAnyFlags(Texture->Flags, ERDGTextureFlags::SkipTracking))
{
const FRDGPassHandle EpiloguePassHandle = GetEpiloguePassHandle();
FRDGSubresourceState SubresourceState;
SubresourceState.SetPass(ERHIPipeline::Graphics, EpiloguePassHandle);
// Texture is using the RHI transient allocator. Transition it back to Discard in the final pass it is used.
if (Texture->bTransient && !Texture->TransientTexture->IsAcquired())
{
const TInterval<uint32> DiscardPasses = Texture->TransientTexture->GetDiscardPasses();
const FRDGPassHandle MinDiscardPassHandle(DiscardPasses.Min);
const FRDGPassHandle MaxDiscardPassHandle(FMath::Min<uint32>(DiscardPasses.Max, EpiloguePassHandle.GetIndex()));
AddAliasingTransition(MinDiscardPassHandle, MaxDiscardPassHandle, Texture, FRHITransientAliasingInfo::Discard(Texture->GetRHIUnchecked()));
SubresourceState.SetPass(ERHIPipeline::Graphics, MaxDiscardPassHandle);
Texture->EpilogueAccess = ERHIAccess::Discard;
}
SubresourceState.Access = Texture->EpilogueAccess;
InitTextureSubresources(ScratchTextureState, Texture->Layout, &SubresourceState);
AddTransition(EpiloguePassHandle, Texture, ScratchTextureState);
ScratchTextureState.Reset();
EpilogueResourceAccesses.Emplace(Texture->GetRHI(), Texture->EpilogueAccess);
}
if (Texture->Allocation)
{
ActivePooledTextures.Emplace(MoveTemp(Texture->Allocation));
}
else if (!Texture->bTransient)
{
// Non-transient textures need to be 'resurrected' to hold the last reference in the chain.
// Transient textures don't have that restriction since there's no actual pooling happening.
ActivePooledTextures.Emplace(Texture->RenderTarget);
}
}
void FRDGBuilder::AddEpilogueTransition(FRDGBufferRef Buffer)
{
if (!Buffer->HasRHI() || Buffer->IsCulled() || !Buffer->bLastOwner)
{
return;
}
check(IsImmediateMode() || Buffer->bExtracted || Buffer->ReferenceCount == FRDGViewableResource::DeallocatedReferenceCount);
if (!EnumHasAnyFlags(Buffer->Flags, ERDGBufferFlags::SkipTracking))
{
const FRDGPassHandle EpiloguePassHandle = GetEpiloguePassHandle();
FRDGSubresourceState SubresourceState;
SubresourceState.SetPass(ERHIPipeline::Graphics, EpiloguePassHandle);
// Texture is using the RHI transient allocator. Transition it back to Discard in the final pass it is used.
if (Buffer->bTransient)
{
const TInterval<uint32> DiscardPasses = Buffer->TransientBuffer->GetDiscardPasses();
const FRDGPassHandle MinDiscardPassHandle(DiscardPasses.Min);
const FRDGPassHandle MaxDiscardPassHandle(FMath::Min<uint32>(DiscardPasses.Max, EpiloguePassHandle.GetIndex()));
AddAliasingTransition(MinDiscardPassHandle, MaxDiscardPassHandle, Buffer, FRHITransientAliasingInfo::Discard(Buffer->GetRHIUnchecked()));
SubresourceState.SetPass(ERHIPipeline::Graphics, MaxDiscardPassHandle);
Buffer->EpilogueAccess = ERHIAccess::Discard;
}
SubresourceState.Access = Buffer->EpilogueAccess;
AddTransition(Buffer->LastPass, Buffer, SubresourceState);
EpilogueResourceAccesses.Emplace(Buffer->GetRHI(), Buffer->EpilogueAccess);
}
if (Buffer->Allocation)
{
ActivePooledBuffers.Emplace(MoveTemp(Buffer->Allocation));
}
else if (!Buffer->bTransient)
{
// Non-transient buffers need to be 'resurrected' to hold the last reference in the chain.
// Transient buffers don't have that restriction since there's no actual pooling happening.
ActivePooledBuffers.Emplace(Buffer->PooledBuffer);
}
}
void FRDGBuilder::AddTransition(FRDGPassHandle PassHandle, FRDGTextureRef Texture, FRDGTextureSubresourceStateIndirect& StateAfter)
{
const FRDGTextureSubresourceLayout Layout = Texture->Layout;
FRDGTextureSubresourceState& StateBefore = Texture->GetState();
const uint32 SubresourceCount = StateBefore.Num();
checkf(StateBefore.Num() == Layout.GetSubresourceCount() && StateBefore.Num() == StateAfter.Num(),
TEXT("Before state array (%d) does not match after state array (%d) for resource %s on pass %s."),
StateBefore.Num(), StateAfter.Num(), Texture->Name, Passes[PassHandle]->GetName());
if (!GRHISupportsSeparateDepthStencilCopyAccess && Texture->Desc.Format == PF_DepthStencil)
{
// Certain RHIs require a fused depth / stencil copy state. For any mip / slice transition involving a copy state,
// adjust the split transitions so both subresources are transitioned using the same barrier batch (i.e. the RHI transition).
// Note that this is only possible when async compute is disabled, as it's not possible to merge transitions from different pipes.
// There are two cases to correct (D for depth, S for stencil, horizontal axis is time):
//
// Case 1: both states transitioning from previous states on passes A and B to a copy state at pass C.
//
// [Pass] A B C A B C
// [D] X --> X Corrected To: X --> X
// [S] X --------> X X --> X (S is pushed forward to transition with D on pass B)
//
// Case 2a|b: one plane transitioning out of a copy state on pass A to pass B (this pass), but the other is not transitioning yet.
//
// [Pass] A B ? A B
// [D] X --> X Corrected To: X --> X
// [S] X --------> X X --> X (S's state is unknown, so it transitions with D and matches D's state).
const ERHIPipeline GraphicsPipe = ERHIPipeline::Graphics;
const uint32 NumSlicesAndMips = Layout.NumMips * Layout.NumArraySlices;
for (uint32 DepthIndex = 0, StencilIndex = NumSlicesAndMips; DepthIndex < NumSlicesAndMips; ++DepthIndex, ++StencilIndex)
{
FRDGSubresourceState*& DepthStateAfter = StateAfter[DepthIndex];
FRDGSubresourceState*& StencilStateAfter = StateAfter[StencilIndex];
// Skip if neither depth nor stencil are being transitioned.
if (!DepthStateAfter && !StencilStateAfter)
{
continue;
}
FRDGSubresourceState& DepthStateBefore = StateBefore[DepthIndex];
FRDGSubresourceState& StencilStateBefore = StateBefore[StencilIndex];
// Case 1: transitioning into a fused copy state.
if (DepthStateAfter && EnumHasAnyFlags(DepthStateAfter->Access, ERHIAccess::CopySrc | ERHIAccess::CopyDest))
{
check(StencilStateAfter && StencilStateAfter->Access == DepthStateAfter->Access);
const FRDGPassHandle MaxPassHandle = FRDGPassHandle::Max(DepthStateBefore.LastPass[GraphicsPipe], StencilStateBefore.LastPass[GraphicsPipe]);
DepthStateBefore.LastPass[GraphicsPipe] = MaxPassHandle;
StencilStateBefore.LastPass[GraphicsPipe] = MaxPassHandle;
}
// Case 2: transitioning out of a fused copy state.
else if (EnumHasAnyFlags(DepthStateBefore.Access, ERHIAccess::CopySrc | ERHIAccess::CopyDest))
{
check(StencilStateBefore.Access == DepthStateBefore.Access);
check(StencilStateBefore.GetLastPass() == DepthStateBefore.GetLastPass());
// Case 2a: depth unknown, so transition to match stencil.
if (!DepthStateAfter)
{
DepthStateAfter = AllocSubresource(*StencilStateAfter);
DepthStateAfter->SetPass(GraphicsPipe, PassHandle);
}
// Case 2b: stencil unknown, so transition to match depth.
else if (!StencilStateAfter)
{
StencilStateAfter = AllocSubresource(*DepthStateAfter);
StencilStateAfter->SetPass(GraphicsPipe, PassHandle);
}
// Two valid after states should be transitioning on this pass.
else
{
check(StencilStateAfter->GetFirstPass() == PassHandle && DepthStateAfter->GetFirstPass() == PassHandle);
}
}
}
}
for (uint32 SubresourceIndex = 0; SubresourceIndex < SubresourceCount; ++SubresourceIndex)
{
if (const FRDGSubresourceState* SubresourceStateAfter = StateAfter[SubresourceIndex])
{
check(SubresourceStateAfter->Access != ERHIAccess::Unknown);
FRDGSubresourceState& SubresourceStateBefore = StateBefore[SubresourceIndex];
if (FRDGSubresourceState::IsTransitionRequired(SubresourceStateBefore, *SubresourceStateAfter))
{
const FRDGTextureSubresource Subresource = Layout.GetSubresource(SubresourceIndex);
FRHITransitionInfo Info;
Info.Texture = Texture->GetRHIUnchecked();
Info.Type = FRHITransitionInfo::EType::Texture;
Info.Flags = SubresourceStateAfter->Flags;
Info.AccessBefore = SubresourceStateBefore.Access;
Info.AccessAfter = SubresourceStateAfter->Access;
Info.MipIndex = Subresource.MipIndex;
Info.ArraySlice = Subresource.ArraySlice;
Info.PlaneSlice = Subresource.PlaneSlice;
if (Info.AccessBefore == ERHIAccess::Discard)
{
Info.Flags |= EResourceTransitionFlags::Discard;
}
AddTransition(Texture, SubresourceStateBefore, *SubresourceStateAfter, Info);
}
SubresourceStateBefore = *SubresourceStateAfter;
}
}
}
void FRDGBuilder::AddTransition(FRDGPassHandle PassHandle, FRDGBufferRef Buffer, FRDGSubresourceState StateAfter)
{
check(StateAfter.Access != ERHIAccess::Unknown);
FRDGSubresourceState& StateBefore = Buffer->GetState();
if (FRDGSubresourceState::IsTransitionRequired(StateBefore, StateAfter))
{
FRHITransitionInfo Info;
Info.Resource = Buffer->GetRHIUnchecked();
Info.Type = FRHITransitionInfo::EType::Buffer;
Info.Flags = StateAfter.Flags;
Info.AccessBefore = StateBefore.Access;
Info.AccessAfter = StateAfter.Access;
AddTransition(Buffer, StateBefore, StateAfter, Info);
}
StateBefore = StateAfter;
}
void FRDGBuilder::AddTransition(
FRDGViewableResource* Resource,
FRDGSubresourceState StateBefore,
FRDGSubresourceState StateAfter,
const FRHITransitionInfo& TransitionInfo)
{
const ERHIPipeline Graphics = ERHIPipeline::Graphics;
const ERHIPipeline AsyncCompute = ERHIPipeline::AsyncCompute;
#if RDG_ENABLE_DEBUG
StateBefore.Validate();
StateAfter.Validate();
#endif
if (IsImmediateMode())
{
// Immediate mode simply enqueues the barrier into the 'after' pass. Everything is on the graphics pipe.
AddToPrologueBarriers(StateAfter.FirstPass[Graphics], [&](FRDGBarrierBatchBegin& Barriers)
{
Barriers.AddTransition(Resource, TransitionInfo);
});
return;
}
const ERHIPipeline PipelinesBefore = StateBefore.GetPipelines();
const ERHIPipeline PipelinesAfter = StateAfter.GetPipelines();
check(PipelinesBefore != ERHIPipeline::None && PipelinesAfter != ERHIPipeline::None);
checkf(StateBefore.GetLastPass() <= StateAfter.GetFirstPass(), TEXT("Submitted a state for '%s' that begins before our previous state has ended."), Resource->Name);
const FRDGPassHandlesByPipeline& PassesBefore = StateBefore.LastPass;
const FRDGPassHandlesByPipeline& PassesAfter = StateAfter.FirstPass;
// 1-to-1 or 1-to-N pipe transition.
if (PipelinesBefore != ERHIPipeline::All)
{
const FRDGPassHandle BeginPassHandle = StateBefore.GetLastPass();
const FRDGPassHandle FirstEndPassHandle = StateAfter.GetFirstPass();
FRDGPass* BeginPass = nullptr;
FRDGBarrierBatchBegin* BarriersToBegin = nullptr;
// Issue the begin in the epilogue of the begin pass if the barrier is being split across multiple passes or the barrier end is in the epilogue.
if (BeginPassHandle < FirstEndPassHandle)
{
BeginPass = GetEpilogueBarrierPass(BeginPassHandle);
BarriersToBegin = &BeginPass->GetEpilogueBarriersToBeginFor(Allocator, TransitionCreateQueue, PipelinesAfter);
}
// This is an immediate prologue transition in the same pass. Issue the begin in the prologue.
else
{
checkf(PipelinesAfter == ERHIPipeline::Graphics,
TEXT("Attempted to queue an immediate async pipe transition for %s. Pipelines: %s. Async transitions must be split."),
Resource->Name, *GetRHIPipelineName(PipelinesAfter));
BeginPass = GetPrologueBarrierPass(BeginPassHandle);
BarriersToBegin = &BeginPass->GetPrologueBarriersToBegin(Allocator, TransitionCreateQueue);
}
BarriersToBegin->AddTransition(Resource, TransitionInfo);
for (ERHIPipeline Pipeline : GetRHIPipelines())
{
/** If doing a 1-to-N transition and this is the same pipe as the begin, we end it immediately afterwards in the epilogue
* of the begin pass. This is because we can't guarantee that the other pipeline won't join back before the end. This can
* happen if the forking async compute pass joins back to graphics (via another independent transition) before the current
* graphics transition is ended.
*
* Async Compute Pipe: EndA BeginB
* / \
* Graphics Pipe: BeginA EndB EndA
*
* A is our 1-to-N transition and B is a future transition of the same resource that we haven't evaluated yet. Instead, the
* same pipe End is performed in the epilogue of the begin pass, which removes the spit barrier but simplifies the tracking:
*
* Async Compute Pipe: EndA BeginB
* / \
* Graphics Pipe: BeginA EndA EndB
*/
if ((PipelinesBefore == Pipeline && PipelinesAfter == ERHIPipeline::All))
{
AddToEpilogueBarriersToEnd(BeginPassHandle, *BarriersToBegin);
}
else if (EnumHasAnyFlags(PipelinesAfter, Pipeline))
{
AddToPrologueBarriersToEnd(PassesAfter[Pipeline], *BarriersToBegin);
}
}
}
// N-to-1 or N-to-N transition.
else
{
checkf(StateBefore.GetLastPass() != StateAfter.GetFirstPass(),
TEXT("Attempted to queue a transition for resource '%s' from '%s' to '%s', but previous and next passes are the same on one pipe."),
Resource->Name, *GetRHIPipelineName(PipelinesBefore), *GetRHIPipelineName(PipelinesAfter));
FRDGBarrierBatchBeginId Id;
Id.PipelinesAfter = PipelinesAfter;
for (ERHIPipeline Pipeline : GetRHIPipelines())
{
Id.Passes[Pipeline] = GetEpilogueBarrierPassHandle(PassesBefore[Pipeline]);
}
FRDGBarrierBatchBegin*& BarriersToBegin = BarrierBatchMap.FindOrAdd(Id);
if (!BarriersToBegin)
{
FRDGPassesByPipeline BarrierBatchPasses;
BarrierBatchPasses[Graphics] = Passes[Id.Passes[Graphics]];
BarrierBatchPasses[AsyncCompute] = Passes[Id.Passes[AsyncCompute]];
BarriersToBegin = Allocator.AllocNoDestruct<FRDGBarrierBatchBegin>(PipelinesBefore, PipelinesAfter, GetEpilogueBarriersToBeginDebugName(PipelinesAfter), BarrierBatchPasses);
TransitionCreateQueue.Emplace(BarriersToBegin);
for (FRDGPass* Pass : BarrierBatchPasses)
{
Pass->SharedEpilogueBarriersToBegin.Add(BarriersToBegin);
}
}
BarriersToBegin->AddTransition(Resource, TransitionInfo);
for (ERHIPipeline Pipeline : GetRHIPipelines())
{
if (EnumHasAnyFlags(PipelinesAfter, Pipeline))
{
AddToPrologueBarriersToEnd(PassesAfter[Pipeline], *BarriersToBegin);
}
}
}
}
void FRDGBuilder::AddAliasingTransition(FRDGPassHandle BeginPassHandle, FRDGPassHandle EndPassHandle, FRDGViewableResource* Resource, const FRHITransientAliasingInfo& Info)
{
check(BeginPassHandle <= EndPassHandle);
FRDGBarrierBatchBegin* BarriersToBegin{};
FRDGPass* EndPass{};
if (BeginPassHandle == EndPassHandle)
{
FRDGPass* BeginPass = Passes[BeginPassHandle];
EndPass = BeginPass;
check(GetPrologueBarrierPassHandle(BeginPassHandle) == BeginPassHandle);
check(BeginPass->GetPipeline() == ERHIPipeline::Graphics);
BarriersToBegin = &BeginPass->GetPrologueBarriersToBegin(Allocator, TransitionCreateQueue);
}
else
{
FRDGPass* BeginPass = GetEpilogueBarrierPass(BeginPassHandle);
EndPass = Passes[EndPassHandle];
check(GetPrologueBarrierPassHandle(EndPassHandle) == EndPassHandle);
check(BeginPass->GetPipeline() == ERHIPipeline::Graphics);
check(EndPass->GetPipeline() == ERHIPipeline::Graphics);
BarriersToBegin = &BeginPass->GetEpilogueBarriersToBeginForGraphics(Allocator, TransitionCreateQueue);
}
BarriersToBegin->AddAlias(Resource, Info);
EndPass->GetPrologueBarriersToEnd(Allocator).AddDependency(BarriersToBegin);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::SetRHI(FRDGTexture* Texture, IPooledRenderTarget* RenderTarget, FRDGPassHandle PassHandle)
{
Texture->RenderTarget = RenderTarget;
if (FRHITransientTexture* TransientTexture = RenderTarget->GetTransientTexture())
{
FRDGTransientRenderTarget* TransientRenderTarget = static_cast<FRDGTransientRenderTarget*>(RenderTarget);
Texture->Allocation = TRefCountPtr<FRDGTransientRenderTarget>(TransientRenderTarget);
SetRHI(Texture, TransientTexture, PassHandle);
}
else
{
FPooledRenderTarget* PooledRenderTarget = static_cast<FPooledRenderTarget*>(RenderTarget);
Texture->Allocation = TRefCountPtr<FPooledRenderTarget>(PooledRenderTarget);
SetRHI(Texture, &PooledRenderTarget->PooledTexture, PassHandle);
}
}
void FRDGBuilder::SetRHI(FRDGTexture* Texture, FRDGPooledTexture* PooledTexture, FRDGPassHandle PassHandle)
{
FRHITexture* TextureRHI = PooledTexture->GetRHI();
Texture->ResourceRHI = TextureRHI;
Texture->PooledTexture = PooledTexture;
Texture->ViewCache = &PooledTexture->ViewCache;
Texture->FirstPass = PassHandle;
FRDGTexture*& Owner = PooledTextureOwnershipMap.FindOrAdd(PooledTexture);
// Link the previous alias to this one.
if (Owner)
{
Owner->NextOwner = Texture->Handle;
Owner->bLastOwner = false;
// Chain the state allocation between all RDG textures which share this pooled texture.
Texture->State = Owner->State;
}
else
{
Texture->State = Allocator.AllocNoDestruct<FRDGTextureSubresourceState>();
FRDGSubresourceState State;
State.SetPass(ERHIPipeline::Graphics, GetProloguePassHandle());
InitTextureSubresources(*Texture->State, Texture->Layout, State);
}
Owner = Texture;
}
void FRDGBuilder::SetRHI(FRDGTexture* Texture, FRHITransientTexture* TransientTexture, FRDGPassHandle PassHandle)
{
Texture->ResourceRHI = TransientTexture->GetRHI();
Texture->TransientTexture = TransientTexture;
Texture->ViewCache = &TransientTexture->ViewCache;
Texture->FirstPass = PassHandle;
Texture->bTransient = true;
Texture->State = Allocator.AllocNoDestruct<FRDGTextureSubresourceState>();
}
void FRDGBuilder::SetRHI(FRDGBuffer* Buffer, FRDGPooledBuffer* PooledBuffer, FRDGPassHandle PassHandle)
{
FRHIBuffer* BufferRHI = PooledBuffer->GetRHI();
Buffer->ResourceRHI = BufferRHI;
Buffer->PooledBuffer = PooledBuffer;
Buffer->ViewCache = &PooledBuffer->ViewCache;
Buffer->Allocation = PooledBuffer;
Buffer->FirstPass = PassHandle;
FRDGBuffer*& Owner = PooledBufferOwnershipMap.FindOrAdd(PooledBuffer);
// Link the previous owner to this one.
if (Owner)
{
Owner->NextOwner = Buffer->Handle;
Owner->bLastOwner = false;
// Chain the state allocation between all RDG buffers which share this pooled buffer.
Buffer->State = Owner->State;
}
else
{
FRDGSubresourceState State;
State.SetPass(ERHIPipeline::Graphics, GetProloguePassHandle());
Buffer->State = Allocator.AllocNoDestruct<FRDGSubresourceState>(State);
}
Owner = Buffer;
}
void FRDGBuilder::SetRHI(FRDGBuffer* Buffer, FRHITransientBuffer* TransientBuffer, FRDGPassHandle PassHandle)
{
check(!Buffer->ResourceRHI);
Buffer->ResourceRHI = TransientBuffer->GetRHI();
Buffer->TransientBuffer = TransientBuffer;
Buffer->ViewCache = &TransientBuffer->ViewCache;
Buffer->FirstPass = PassHandle;
Buffer->bTransient = true;
Buffer->State = Allocator.AllocNoDestruct<FRDGSubresourceState>();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::BeginResourceRHI(FRDGPassHandle PassHandle, FRDGTextureRef Texture)
{
check(Texture);
if (Texture->HasRHI())
{
return;
}
check(Texture->ReferenceCount > 0 || Texture->bExternal || IsImmediateMode());
#if RDG_ENABLE_DEBUG
{
FRDGPass* Pass = Passes[PassHandle];
// Cannot begin a resource on an async compute pass.
check(Pass->Pipeline == ERHIPipeline::Graphics);
// Cannot begin a resource within a merged render pass region.
checkf(GetPrologueBarrierPassHandle(PassHandle) == PassHandle,
TEXT("Cannot begin a resource within a merged render pass. Pass (Handle: %d, Name: %s), Resource %s"), PassHandle, Pass->GetName(), Texture->Name);
}
#endif
if (TransientResourceAllocator && IsTransient(Texture))
{
if (FRHITransientTexture* TransientTexture = TransientResourceAllocator->CreateTexture(Texture->Desc, Texture->Name, PassHandle.GetIndex()))
{
if (Texture->bExternal || Texture->bExtracted)
{
SetRHI(Texture, GRDGTransientResourceAllocator.AllocateRenderTarget(TransientTexture), PassHandle);
}
else
{
SetRHI(Texture, TransientTexture, PassHandle);
}
const FRDGPassHandle MinAcquirePassHandle(TransientTexture->GetAcquirePasses().Min);
AddAliasingTransition(MinAcquirePassHandle, PassHandle, Texture, FRHITransientAliasingInfo::Acquire(TransientTexture->GetRHI(), TransientTexture->GetAliasingOverlaps()));
FRDGSubresourceState InitialState;
InitialState.SetPass(ERHIPipeline::Graphics, MinAcquirePassHandle);
InitialState.Access = ERHIAccess::Discard;
InitTextureSubresources(*Texture->State, Texture->Layout, InitialState);
#if RDG_STATS
GRDGStatTransientTextureCount++;
#endif
}
}
if (!Texture->ResourceRHI)
{
SetRHI(Texture, GRenderTargetPool.FindFreeElement(Texture->Desc, Texture->Name), PassHandle);
}
}
void FRDGBuilder::InitRHI(FRDGTextureSRVRef SRV)
{
check(SRV);
if (SRV->HasRHI())
{
return;
}
FRDGTextureRef Texture = SRV->Desc.Texture;
FRHITexture* TextureRHI = Texture->GetRHIUnchecked();
check(TextureRHI);
SRV->ResourceRHI = Texture->ViewCache->GetOrCreateSRV(RHICmdList, TextureRHI, SRV->Desc);
}
void FRDGBuilder::InitRHI(FRDGTextureUAVRef UAV)
{
check(UAV);
if (UAV->HasRHI())
{
return;
}
FRDGTextureRef Texture = UAV->Desc.Texture;
FRHITexture* TextureRHI = Texture->GetRHIUnchecked();
check(TextureRHI);
UAV->ResourceRHI = Texture->ViewCache->GetOrCreateUAV(RHICmdList, TextureRHI, UAV->Desc);
}
void FRDGBuilder::BeginResourceRHI(FRDGPassHandle PassHandle, FRDGBufferRef Buffer)
{
check(Buffer);
if (Buffer->HasRHI())
{
return;
}
check(Buffer->ReferenceCount > 0 || Buffer->bExternal || Buffer->bQueuedForUpload || IsImmediateMode());
#if RDG_ENABLE_DEBUG
{
const FRDGPass* Pass = Passes[PassHandle];
// Cannot begin a resource on an async compute pass.
check(Pass->Pipeline == ERHIPipeline::Graphics);
// Cannot begin a resource within a merged render pass region.
checkf(GetPrologueBarrierPassHandle(PassHandle) == PassHandle,
TEXT("Cannot begin a resource within a merged render pass. Pass (Handle: %d, Name: %s), Resource %s"), PassHandle, Pass->GetName(), Buffer->Name);
}
#endif
Buffer->FinalizeDesc();
// If transient then create the resource on the transient allocator. External or extracted resource can't be transient because of lifetime tracking issues.
if (TransientResourceAllocator && IsTransient(Buffer))
{
if (FRHITransientBuffer* TransientBuffer = TransientResourceAllocator->CreateBuffer(Translate(Buffer->Desc), Buffer->Name, PassHandle.GetIndex()))
{
SetRHI(Buffer, TransientBuffer, PassHandle);
const FRDGPassHandle MinAcquirePassHandle(TransientBuffer->GetAcquirePasses().Min);
AddAliasingTransition(MinAcquirePassHandle, PassHandle, Buffer, FRHITransientAliasingInfo::Acquire(TransientBuffer->GetRHI(), TransientBuffer->GetAliasingOverlaps()));
FRDGSubresourceState* InitialState = Buffer->State;
InitialState->SetPass(ERHIPipeline::Graphics, MinAcquirePassHandle);
InitialState->Access = ERHIAccess::Discard;
#if RDG_STATS
GRDGStatTransientBufferCount++;
#endif
}
}
if (!Buffer->bTransient)
{
const ERDGPooledBufferAlignment Alignment = Buffer->bQueuedForUpload ? ERDGPooledBufferAlignment::PowerOfTwo : ERDGPooledBufferAlignment::Page;
SetRHI(Buffer, GRenderGraphResourcePool.FindFreeBuffer(Buffer->Desc, Buffer->Name, Alignment), PassHandle);
}
#if RHI_ENABLE_RESOURCE_INFO
if (Buffer->HasRHI())
{
Buffer->ResourceRHI->SetOwnerName(Buffer->OwnerName);
}
#endif
}
void FRDGBuilder::InitRHI(FRDGBufferSRVRef SRV)
{
check(SRV);
if (SRV->HasRHI())
{
return;
}
FRDGBufferRef Buffer = SRV->Desc.Buffer;
FRHIBuffer* BufferRHI = Buffer->GetRHIUnchecked();
check(BufferRHI);
FRHIBufferSRVCreateInfo SRVCreateInfo = SRV->Desc;
if (EnumHasAnyFlags(Buffer->Desc.Usage, EBufferUsageFlags::StructuredBuffer))
{
// RDG allows structured buffer views to be typed, but the view creation logic requires that it
// be unknown (as do platform APIs -- structured buffers are not typed). This could be validated
// at the high level but the current API makes it confusing. For now, it's considered a no-op.
SRVCreateInfo.Format = PF_Unknown;
}
SRV->ResourceRHI = Buffer->ViewCache->GetOrCreateSRV(RHICmdList, BufferRHI, SRVCreateInfo);
}
void FRDGBuilder::InitRHI(FRDGBufferUAV* UAV)
{
check(UAV);
if (UAV->HasRHI())
{
return;
}
FRDGBufferRef Buffer = UAV->Desc.Buffer;
check(Buffer);
FRHIBufferUAVCreateInfo UAVCreateInfo = UAV->Desc;
if (EnumHasAnyFlags(Buffer->Desc.Usage, EBufferUsageFlags::StructuredBuffer))
{
// RDG allows structured buffer views to be typed, but the view creation logic requires that it
// be unknown (as do platform APIs -- structured buffers are not typed). This could be validated
// at the high level but the current API makes it confusing. For now, it's considered a no-op.
UAVCreateInfo.Format = PF_Unknown;
}
UAV->ResourceRHI = Buffer->ViewCache->GetOrCreateUAV(RHICmdList, Buffer->GetRHIUnchecked(), UAVCreateInfo);
}
void FRDGBuilder::InitRHI(FRDGView* View)
{
if (View->HasRHI())
{
return;
}
switch (View->Type)
{
case ERDGViewType::TextureUAV:
InitRHI(static_cast<FRDGTextureUAV*>(View));
break;
case ERDGViewType::TextureSRV:
InitRHI(static_cast<FRDGTextureSRV*>(View));
break;
case ERDGViewType::BufferUAV:
InitRHI(static_cast<FRDGBufferUAV*>(View));
break;
case ERDGViewType::BufferSRV:
InitRHI(static_cast<FRDGBufferSRV*>(View));
break;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
void FRDGBuilder::EndResourceRHI(FRDGPassHandle PassHandle, FRDGTextureRef Texture, uint32 ReferenceCount)
{
check(Texture);
check(Texture->ReferenceCount != FRDGViewableResource::DeallocatedReferenceCount);
check(Texture->ReferenceCount >= ReferenceCount || IsImmediateMode());
Texture->ReferenceCount -= ReferenceCount;
if (Texture->ReferenceCount == 0)
{
if (Texture->bTransient)
{
// Texture is using a transient external render target.
if (Texture->RenderTarget)
{
if (!Texture->bExtracted)
{
// This releases the reference without invoking a virtual function call.
GRDGTransientResourceAllocator.Release(TRefCountPtr<FRDGTransientRenderTarget>(MoveTemp(Texture->Allocation)), PassHandle);
}
}
// Texture is using an internal transient texture.
else
{
TransientResourceAllocator->DeallocateMemory(Texture->TransientTexture, PassHandle.GetIndex());
}
}
else
{
// Only tracked render targets are released. Untracked ones persist until the end of the frame.
if (static_cast<FPooledRenderTarget*>(Texture->RenderTarget)->IsTracked())
{
// This releases the reference without invoking a virtual function call.
TRefCountPtr<FPooledRenderTarget>(MoveTemp(Texture->Allocation));
}
}
Texture->LastPass = PassHandle;
Texture->ReferenceCount = FRDGViewableResource::DeallocatedReferenceCount;
}
}
void FRDGBuilder::EndResourceRHI(FRDGPassHandle PassHandle, FRDGBufferRef Buffer, uint32 ReferenceCount)
{
check(Buffer);
check(Buffer->ReferenceCount != FRDGViewableResource::DeallocatedReferenceCount);
check(Buffer->ReferenceCount >= ReferenceCount || IsImmediateMode());
Buffer->ReferenceCount -= ReferenceCount;
if (Buffer->ReferenceCount == 0)
{
if (Buffer->bTransient)
{
TransientResourceAllocator->DeallocateMemory(Buffer->TransientBuffer, PassHandle.GetIndex());
}
else
{
Buffer->Allocation = nullptr;
}
Buffer->LastPass = PassHandle;
Buffer->ReferenceCount = FRDGViewableResource::DeallocatedReferenceCount;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
#if RDG_ENABLE_DEBUG
void FRDGBuilder::VisualizePassOutputs(const FRDGPass* Pass)
{
#if SUPPORTS_VISUALIZE_TEXTURE
if (!AuxiliaryPasses.IsVisualizeAllowed())
{
return;
}
RDG_RECURSION_COUNTER_SCOPE(AuxiliaryPasses.Visualize);
Pass->GetParameters().EnumerateTextures([&](FRDGParameter Parameter)
{
switch (Parameter.GetType())
{
case UBMT_RDG_TEXTURE_ACCESS:
{
if (FRDGTextureAccess TextureAccess = Parameter.GetAsTextureAccess())
{
if (IsWritableAccess(TextureAccess.GetAccess()))
{
if (TOptional<uint32> CaptureId = GVisualizeTexture.ShouldCapture(TextureAccess->Name, /* MipIndex = */ 0))
{
GVisualizeTexture.CreateContentCapturePass(*this, TextureAccess.GetTexture(), *CaptureId);
}
}
}
}
break;
case UBMT_RDG_TEXTURE_ACCESS_ARRAY:
{
const FRDGTextureAccessArray& TextureAccessArray = Parameter.GetAsTextureAccessArray();
for (FRDGTextureAccess TextureAccess : TextureAccessArray)
{
if (IsWritableAccess(TextureAccess.GetAccess()))
{
if (TOptional<uint32> CaptureId = GVisualizeTexture.ShouldCapture(TextureAccess->Name, /* MipIndex = */ 0))
{
GVisualizeTexture.CreateContentCapturePass(*this, TextureAccess.GetTexture(), *CaptureId);
}
}
}
}
break;
case UBMT_RDG_TEXTURE_UAV:
{
if (FRDGTextureUAVRef UAV = Parameter.GetAsTextureUAV())
{
FRDGTextureRef Texture = UAV->Desc.Texture;
if (TOptional<uint32> CaptureId = GVisualizeTexture.ShouldCapture(Texture->Name, UAV->Desc.MipLevel))
{
GVisualizeTexture.CreateContentCapturePass(*this, Texture, *CaptureId);
}
}
}
break;
case UBMT_RENDER_TARGET_BINDING_SLOTS:
{
const FRenderTargetBindingSlots& RenderTargets = Parameter.GetAsRenderTargetBindingSlots();
RenderTargets.Enumerate([&](FRenderTargetBinding RenderTarget)
{
FRDGTextureRef Texture = RenderTarget.GetTexture();
if (TOptional<uint32> CaptureId = GVisualizeTexture.ShouldCapture(Texture->Name, RenderTarget.GetMipIndex()))
{
GVisualizeTexture.CreateContentCapturePass(*this, Texture, *CaptureId);
}
});
const FDepthStencilBinding& DepthStencil = RenderTargets.DepthStencil;
if (FRDGTextureRef Texture = DepthStencil.GetTexture())
{
const bool bHasStoreAction = DepthStencil.GetDepthStencilAccess().IsAnyWrite();
if (bHasStoreAction)
{
const uint32 MipIndex = 0;
if (TOptional<uint32> CaptureId = GVisualizeTexture.ShouldCapture(Texture->Name, MipIndex))
{
GVisualizeTexture.CreateContentCapturePass(*this, Texture, *CaptureId);
}
}
}
}
break;
}
});
#endif
}
void FRDGBuilder::ClobberPassOutputs(const FRDGPass* Pass)
{
if (!GRDGValidation || !GRDGClobberResources || !AuxiliaryPasses.IsClobberAllowed())
{
return;
}
RDG_RECURSION_COUNTER_SCOPE(AuxiliaryPasses.Clobber);
RDG_EVENT_SCOPE(*this, "RDG ClobberResources");
const FLinearColor ClobberColor = GetClobberColor();
const auto ClobberTextureUAV = [&](FRDGTextureUAV* TextureUAV)
{
if (IsInteger(TextureUAV->GetParent()->Desc.Format))
{
AddClearUAVPass(*this, TextureUAV, GetClobberBufferValue());
}
else
{
AddClearUAVPass(*this, TextureUAV, ClobberColor);
}
};
Pass->GetParameters().Enumerate([&](FRDGParameter Parameter)
{
switch (Parameter.GetType())
{
case UBMT_RDG_BUFFER_UAV:
{
if (FRDGBufferUAVRef UAV = Parameter.GetAsBufferUAV())
{
FRDGBufferRef Buffer = UAV->GetParent();
if (UserValidation.TryMarkForClobber(Buffer))
{
AddClearUAVPass(*this, UAV, GetClobberBufferValue());
}
}
}
break;
case UBMT_RDG_TEXTURE_ACCESS:
{
if (FRDGTextureAccess TextureAccess = Parameter.GetAsTextureAccess())
{
FRDGTextureRef Texture = TextureAccess.GetTexture();
if (UserValidation.TryMarkForClobber(Texture))
{
if (EnumHasAnyFlags(TextureAccess.GetAccess(), ERHIAccess::UAVMask))
{
for (int32 MipLevel = 0; MipLevel < Texture->Desc.NumMips; MipLevel++)
{
ClobberTextureUAV(CreateUAV(FRDGTextureUAVDesc(Texture, MipLevel)));
}
}
else if (EnumHasAnyFlags(TextureAccess.GetAccess(), ERHIAccess::RTV))
{
AddClearRenderTargetPass(*this, Texture, ClobberColor);
}
}
}
}
break;
case UBMT_RDG_TEXTURE_UAV:
{
if (FRDGTextureUAVRef UAV = Parameter.GetAsTextureUAV())
{
FRDGTextureRef Texture = UAV->GetParent();
if (UserValidation.TryMarkForClobber(Texture))
{
if (Texture->Desc.NumMips == 1)
{
ClobberTextureUAV(UAV);
}
else
{
for (int32 MipLevel = 0; MipLevel < Texture->Desc.NumMips; MipLevel++)
{
ClobberTextureUAV(CreateUAV(FRDGTextureUAVDesc(Texture, MipLevel)));
}
}
}
}
}
break;
case UBMT_RENDER_TARGET_BINDING_SLOTS:
{
const FRenderTargetBindingSlots& RenderTargets = Parameter.GetAsRenderTargetBindingSlots();
RenderTargets.Enumerate([&](FRenderTargetBinding RenderTarget)
{
FRDGTextureRef Texture = RenderTarget.GetTexture();
if (UserValidation.TryMarkForClobber(Texture))
{
AddClearRenderTargetPass(*this, Texture, ClobberColor);
}
});
if (FRDGTextureRef Texture = RenderTargets.DepthStencil.GetTexture())
{
if (UserValidation.TryMarkForClobber(Texture))
{
AddClearDepthStencilPass(*this, Texture, true, GetClobberDepth(), true, GetClobberStencil());
}
}
}
break;
}
});
}
#endif //! RDG_ENABLE_DEBUG
#if WITH_MGPU
void FRDGBuilder::ForceCopyCrossGPU()
{
// Initialize set of external buffers
TSet<FRHIBuffer*> ExternalBufferSet;
ExternalBufferSet.Reserve(ExternalBuffers.Num());
for (auto ExternalBufferIt = ExternalBuffers.CreateConstIterator(); ExternalBufferIt; ++ExternalBufferIt)
{
ExternalBufferSet.Emplace(ExternalBufferIt.Value()->GetRHIUnchecked());
}
// Generate list of cross GPU resources from all passes, and the GPU mask where they were last written
TMap<FRHIBuffer*, FRHIGPUMask> BuffersToTransfer;
TMap<FRHITexture*, FRHIGPUMask> TexturesToTransfer;
for (FRDGPassHandle PassHandle = GetProloguePassHandle(); PassHandle <= GetEpiloguePassHandle(); ++PassHandle)
{
FRDGPass* Pass = Passes[PassHandle];
for (int32 BufferIndex = 0; BufferIndex < Pass->BufferStates.Num(); BufferIndex++)
{
FRHIBuffer* BufferRHI = Pass->BufferStates[BufferIndex].Buffer->GetRHIUnchecked();
if (ExternalBufferSet.Contains(BufferRHI) &&
!EnumHasAnyFlags(BufferRHI->GetUsage(), BUF_MultiGPUAllocate | BUF_MultiGPUGraphIgnore) &&
EnumHasAnyFlags(Pass->BufferStates[BufferIndex].State.Access, ERHIAccess::WritableMask))
{
BuffersToTransfer.Emplace(BufferRHI) = Pass->GPUMask;
}
}
for (int32 TextureIndex = 0; TextureIndex < Pass->TextureStates.Num(); TextureIndex++)
{
if (ExternalTextures.Contains(Pass->TextureStates[TextureIndex].Texture->GetRHIUnchecked()))
{
for (int32 StateIndex = 0; StateIndex < Pass->TextureStates[TextureIndex].State.Num(); StateIndex++)
{
FRHITexture* TextureRHI = Pass->TextureStates[TextureIndex].Texture->GetRHIUnchecked();
if (TextureRHI &&
!EnumHasAnyFlags(TextureRHI->GetFlags(), TexCreate_MultiGPUGraphIgnore) &&
EnumHasAnyFlags(Pass->TextureStates[TextureIndex].State[StateIndex].Access, ERHIAccess::WritableMask))
{
TexturesToTransfer.Emplace(Pass->TextureStates[TextureIndex].Texture->GetRHIUnchecked()) = Pass->GPUMask;
}
}
}
}
}
// Now that we've got the list of external resources, and the GPU they were last written to, make a list of what needs to
// be propagated to other GPUs.
TArray<FTransferResourceParams> Transfers;
const FRHIGPUMask AllGPUMask = FRHIGPUMask::All();
const bool bPullData = false;
const bool bLockstepGPUs = true;
for (auto BufferIt = BuffersToTransfer.CreateConstIterator(); BufferIt; ++BufferIt)
{
FRHIBuffer* Buffer = BufferIt.Key();
FRHIGPUMask GPUMask = BufferIt.Value();
for (uint32 GPUIndex : AllGPUMask)
{
if (!GPUMask.Contains(GPUIndex))
{
Transfers.Add(FTransferResourceParams(Buffer, GPUMask.GetFirstIndex(), GPUIndex, bPullData, bLockstepGPUs));
}
}
}
for (auto TextureIt = TexturesToTransfer.CreateConstIterator(); TextureIt; ++TextureIt)
{
FRHITexture* Texture = TextureIt.Key();
FRHIGPUMask GPUMask = TextureIt.Value();
for (uint32 GPUIndex : AllGPUMask)
{
if (!GPUMask.Contains(GPUIndex))
{
Transfers.Add(FTransferResourceParams(Texture, GPUMask.GetFirstIndex(), GPUIndex, bPullData, bLockstepGPUs));
}
}
}
if (Transfers.Num())
{
RHICmdList.TransferResources(Transfers);
}
}
#endif // WITH_MGPU