You've already forked UnrealEngineUWP
mirror of
https://github.com/izzy2lost/UnrealEngineUWP.git
synced 2026-03-26 18:15:20 -07:00
- GetRayTracingCallableLibrary() similar to how we handle hitgroups. FindRayTracingCallableIndex(...) to query callable shader index in RTPSO. #rb yuriy.odonnell #preflight 628620c99016c6dd8974b510 [CL 20278150 by tiago costa in ue5-main branch]
2006 lines
59 KiB
C++
2006 lines
59 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
/*=============================================================================
|
|
PipelineStateCache.cpp: Pipeline state cache implementation.
|
|
=============================================================================*/
|
|
|
|
#include "PipelineStateCache.h"
|
|
#include "PipelineFileCache.h"
|
|
#include "Misc/ScopeRWLock.h"
|
|
#include "Misc/ScopeLock.h"
|
|
#include "CoreGlobals.h"
|
|
#include "Misc/TimeGuard.h"
|
|
#include "Containers/DiscardableKeyValueCache.h"
|
|
#include "Async/Async.h"
|
|
|
|
// perform cache eviction each frame, used to stress the system and flush out bugs
|
|
#define PSO_DO_CACHE_EVICT_EACH_FRAME 0
|
|
|
|
// Log event and info about cache eviction
|
|
#define PSO_LOG_CACHE_EVICT 0
|
|
|
|
// Stat tracking
|
|
#define PSO_TRACK_CACHE_STATS 0
|
|
|
|
|
|
#define PIPELINESTATECACHE_VERIFYTHREADSAFE (!UE_BUILD_SHIPPING && !UE_BUILD_TEST)
|
|
|
|
CSV_DECLARE_CATEGORY_EXTERN(PSO);
|
|
|
|
static inline uint32 GetTypeHash(const FBoundShaderStateInput& Input)
|
|
{
|
|
return GetTypeHash(Input.VertexDeclarationRHI)
|
|
^ GetTypeHash(Input.VertexShaderRHI)
|
|
^ GetTypeHash(Input.PixelShaderRHI)
|
|
#if PLATFORM_SUPPORTS_MESH_SHADERS
|
|
^ GetTypeHash(Input.GetMeshShader())
|
|
^ GetTypeHash(Input.GetAmplificationShader())
|
|
#endif
|
|
#if PLATFORM_SUPPORTS_GEOMETRY_SHADERS
|
|
^ GetTypeHash(Input.GetGeometryShader())
|
|
#endif
|
|
;
|
|
}
|
|
|
|
static inline uint32 GetTypeHash(const FGraphicsPipelineStateInitializer& Initializer)
|
|
{
|
|
//#todo-rco: Hash!
|
|
return (GetTypeHash(Initializer.BoundShaderState) | (Initializer.NumSamples << 28)) ^ ((uint32)Initializer.PrimitiveType << 24) ^ GetTypeHash(Initializer.BlendState)
|
|
^ Initializer.RenderTargetsEnabled ^ GetTypeHash(Initializer.RasterizerState) ^ GetTypeHash(Initializer.DepthStencilState);
|
|
}
|
|
|
|
constexpr int32 PSO_MISS_FRAME_HISTORY_SIZE = 3;
|
|
static TAtomic<uint32> GraphicsPipelineCacheMisses;
|
|
static TArray<uint32> GraphicsPipelineCacheMissesHistory;
|
|
static TAtomic<uint32> ComputePipelineCacheMisses;
|
|
static TArray<uint32> ComputePipelineCacheMissesHistory;
|
|
static bool ReportFrameHitchThisFrame;
|
|
|
|
static TAutoConsoleVariable<int32> GCVarAsyncPipelineCompile(
|
|
TEXT("r.AsyncPipelineCompile"),
|
|
1,
|
|
TEXT("0 to Create PSOs at the moment they are requested\n")\
|
|
TEXT("1 to Create Pipeline State Objects asynchronously(default)"),
|
|
ECVF_ReadOnly | ECVF_RenderThreadSafe
|
|
);
|
|
|
|
static TAutoConsoleVariable<int32> CVarPSOEvictionTime(
|
|
TEXT("r.pso.evictiontime"),
|
|
60,
|
|
TEXT("Time between checks to remove stale objects from the cache. 0 = no eviction (which may eventually OOM...)"),
|
|
ECVF_ReadOnly | ECVF_RenderThreadSafe
|
|
);
|
|
|
|
#if RHI_RAYTRACING
|
|
static TAutoConsoleVariable<int32> CVarRTPSOCacheSize(
|
|
TEXT("r.RayTracing.PSOCacheSize"),
|
|
50,
|
|
TEXT("Number of ray tracing pipelines to keep in the cache (default = 50). Set to 0 to disable eviction.\n"),
|
|
ECVF_ReadOnly | ECVF_RenderThreadSafe
|
|
);
|
|
#endif // RHI_RAYTRACING
|
|
|
|
extern void DumpPipelineCacheStats();
|
|
|
|
static FAutoConsoleCommand DumpPipelineCmd(
|
|
TEXT("r.DumpPipelineCache"),
|
|
TEXT("Dump current cache stats."),
|
|
FConsoleCommandDelegate::CreateStatic(DumpPipelineCacheStats)
|
|
);
|
|
|
|
void SetComputePipelineState(FRHIComputeCommandList& RHICmdList, FRHIComputeShader* ComputeShader)
|
|
{
|
|
RHICmdList.SetComputePipelineState(PipelineStateCache::GetAndOrCreateComputePipelineState(RHICmdList, ComputeShader, false), ComputeShader);
|
|
}
|
|
|
|
static int32 GPSOPrecompileThreadPoolSize = 0;
|
|
static FAutoConsoleVariableRef GPSOPrecompileThreadPoolSizeVar(
|
|
TEXT("r.pso.PrecompileThreadPoolSize"),
|
|
GPSOPrecompileThreadPoolSize,
|
|
TEXT("The maximum number of threads available for concurrent PSO Precompiling.\n")
|
|
TEXT("0 to disable threadpool usage when precompiling PSOs. (default)")
|
|
,
|
|
ECVF_RenderThreadSafe | ECVF_ReadOnly
|
|
);
|
|
|
|
class FPSOThreadPool
|
|
{
|
|
public:
|
|
~FPSOThreadPool()
|
|
{
|
|
ShutdownThreadPool();
|
|
}
|
|
|
|
FQueuedThreadPool& Get()
|
|
{
|
|
if (PSOPrecompileCompileThreadPool.load() == nullptr)
|
|
{
|
|
FScopeLock lock(&LockCS);
|
|
if (PSOPrecompileCompileThreadPool.load() == nullptr)
|
|
{
|
|
FQueuedThreadPool* PSOPrecompileCompileThreadPoolLocal = FQueuedThreadPool::Allocate();
|
|
PSOPrecompileCompileThreadPoolLocal->Create(GPSOPrecompileThreadPoolSize, 512 * 1024, EThreadPriority::TPri_BelowNormal, TEXT("PSOPrecompilePool"));
|
|
PSOPrecompileCompileThreadPool = PSOPrecompileCompileThreadPoolLocal;
|
|
}
|
|
}
|
|
return *PSOPrecompileCompileThreadPool;
|
|
}
|
|
|
|
void ShutdownThreadPool()
|
|
{
|
|
FQueuedThreadPool* LocalPool = PSOPrecompileCompileThreadPool.exchange(nullptr);
|
|
if (LocalPool != nullptr)
|
|
{
|
|
|
|
LocalPool->Destroy();
|
|
delete LocalPool;
|
|
}
|
|
}
|
|
|
|
private:
|
|
FCriticalSection LockCS;
|
|
std::atomic<FQueuedThreadPool*> PSOPrecompileCompileThreadPool {};
|
|
};
|
|
|
|
static FPSOThreadPool GPSOThreadPool;
|
|
|
|
void PipelineStateCache::PreCompileComplete()
|
|
{
|
|
// free up our threads when the precompile completes.
|
|
GPSOThreadPool.ShutdownThreadPool();
|
|
}
|
|
|
|
extern RHI_API FRHIComputePipelineState* ExecuteSetComputePipelineState(FComputePipelineState* ComputePipelineState);
|
|
extern RHI_API FRHIGraphicsPipelineState* ExecuteSetGraphicsPipelineState(FGraphicsPipelineState* GraphicsPipelineState);
|
|
|
|
// Prints out information about a failed compilation from Init.
|
|
// This is fatal unless the compilation request is from the PSO cache preload.
|
|
static void HandlePipelineCreationFailure(const FGraphicsPipelineStateInitializer& Init)
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Failed to create GraphicsPipeline"));
|
|
// Failure to compile is Fatal unless this is from the PSO file cache preloading.
|
|
#if !(UE_BUILD_SHIPPING || UE_BUILD_TEST)
|
|
if(Init.BoundShaderState.VertexShaderRHI)
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Vertex: %s"), *Init.BoundShaderState.VertexShaderRHI->ShaderName);
|
|
}
|
|
if (Init.BoundShaderState.GetMeshShader())
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Mesh: %s"), *Init.BoundShaderState.GetMeshShader()->ShaderName);
|
|
}
|
|
if (Init.BoundShaderState.GetAmplificationShader())
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Amplification: %s"), *Init.BoundShaderState.GetAmplificationShader()->ShaderName);
|
|
}
|
|
if(Init.BoundShaderState.GetGeometryShader())
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Geometry: %s"), *Init.BoundShaderState.GetGeometryShader()->ShaderName);
|
|
}
|
|
if(Init.BoundShaderState.PixelShaderRHI)
|
|
{
|
|
UE_LOG(LogRHI, Error, TEXT("Pixel: %s"), *Init.BoundShaderState.PixelShaderRHI->ShaderName);
|
|
}
|
|
|
|
UE_LOG(LogRHI, Error, TEXT("Render Targets: (%u)"), Init.RenderTargetFormats.Num());
|
|
for(int32 i = 0; i < Init.RenderTargetFormats.Num(); ++i)
|
|
{
|
|
//#todo-mattc GetPixelFormatString is not available in scw. Need to move it so we can print more info here.
|
|
UE_LOG(LogRHI, Error, TEXT("0x%x"), (uint32)Init.RenderTargetFormats[i]);
|
|
}
|
|
|
|
UE_LOG(LogRHI, Error, TEXT("Depth Stencil Format:"));
|
|
UE_LOG(LogRHI, Error, TEXT("0x%x"), Init.DepthStencilTargetFormat);
|
|
#endif
|
|
|
|
if(Init.bFromPSOFileCache)
|
|
{
|
|
// Let the cache know so it hopefully won't give out this one again
|
|
FPipelineFileCache::RegisterPSOCompileFailure(GetTypeHash(Init), Init);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Shader compilation failures are Fatal."));
|
|
}
|
|
}
|
|
|
|
// FAsyncTask used by the PSO threadpool that will reschedule once only.
|
|
template<typename TTask>
|
|
class FAsyncTaskLimitedReschedule : public FAsyncTask<TTask>
|
|
{
|
|
public:
|
|
/** Forwarding constructor. */
|
|
template<typename...T>
|
|
explicit FAsyncTaskLimitedReschedule(T&&... Args) : FAsyncTask<TTask>(Forward<T>(Args)...) { }
|
|
|
|
bool Reschedule(FQueuedThreadPool* InQueuedPool, EQueuedWorkPriority InQueuedWorkPriority)
|
|
{
|
|
if (bCanReschedule.exchange(false))
|
|
{
|
|
return FAsyncTask<TTask>::Reschedule(InQueuedPool, InQueuedWorkPriority);
|
|
}
|
|
return false;
|
|
}
|
|
private:
|
|
std::atomic_bool bCanReschedule = true;
|
|
};
|
|
|
|
class FPSOPrecompileTask : public FNonAbandonableTask
|
|
{
|
|
friend class FAsyncTask<FPSOPrecompileTask>;
|
|
private:
|
|
TUniqueFunction<void()> Func;
|
|
TUniqueFunction<void()> OnComplete;
|
|
public:
|
|
FPSOPrecompileTask(TUniqueFunction<void()> InFunc, TUniqueFunction<void()> InOnComplete) : Func(MoveTemp(InFunc)), OnComplete(MoveTemp(InOnComplete)) {}
|
|
|
|
protected:
|
|
void DoWork()
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_FPSOPrecompileTask_Work);
|
|
|
|
Func();
|
|
Func = nullptr;
|
|
OnComplete();
|
|
OnComplete = nullptr;
|
|
}
|
|
|
|
FORCEINLINE TStatId GetStatId() const
|
|
{
|
|
RETURN_QUICK_DECLARE_CYCLE_STAT(FPSOPrecompileTask, STATGROUP_ThreadPoolAsyncTasks);
|
|
}
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
* Base class to hold pipeline state (and optionally stats)
|
|
*/
|
|
class FPipelineState
|
|
{
|
|
public:
|
|
|
|
FPipelineState()
|
|
: Stats(nullptr)
|
|
{
|
|
InitStats();
|
|
}
|
|
|
|
virtual ~FPipelineState()
|
|
{
|
|
check(IsComplete());
|
|
check(!PrecompileTask.IsValid());
|
|
}
|
|
|
|
virtual bool IsCompute() const = 0;
|
|
|
|
FGraphEventRef CompletionEvent;
|
|
TUniquePtr<FAsyncTaskLimitedReschedule<FPSOPrecompileTask>> PrecompileTask;
|
|
|
|
bool IsComplete()
|
|
{
|
|
return (!CompletionEvent.IsValid() || CompletionEvent->IsComplete()) && (!PrecompileTask.IsValid() || PrecompileTask->IsDone());
|
|
}
|
|
|
|
// return true if we actually waited on the task
|
|
bool WaitCompletion()
|
|
{
|
|
bool bNeedsToWait = false;
|
|
if(CompletionEvent.IsValid() && !CompletionEvent->IsComplete())
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_FPipelineState_WaitCompletion);
|
|
#if PSO_TRACK_CACHE_STATS
|
|
UE_LOG(LogRHI, Log, TEXT("FTaskGraphInterface Waiting on FPipelineState completionEvent"));
|
|
#endif
|
|
bNeedsToWait = true;
|
|
FTaskGraphInterface::Get().WaitUntilTaskCompletes( CompletionEvent );
|
|
}
|
|
CompletionEvent = nullptr;
|
|
|
|
if (PrecompileTask.IsValid())
|
|
{
|
|
bNeedsToWait = bNeedsToWait || !PrecompileTask->IsDone();
|
|
PrecompileTask->EnsureCompletion();
|
|
PrecompileTask = nullptr;
|
|
}
|
|
|
|
return bNeedsToWait;
|
|
}
|
|
|
|
inline void AddUse()
|
|
{
|
|
FPipelineStateStats::UpdateStats(Stats);
|
|
}
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
|
|
void InitStats()
|
|
{
|
|
FirstUsedTime = LastUsedTime = FPlatformTime::Seconds();
|
|
FirstFrameUsed = LastFrameUsed = 0;
|
|
Hits = HitsAcrossFrames = 0;
|
|
}
|
|
|
|
void AddHit()
|
|
{
|
|
LastUsedTime = FPlatformTime::Seconds();
|
|
Hits++;
|
|
|
|
if (LastFrameUsed != GFrameCounter)
|
|
{
|
|
LastFrameUsed = GFrameCounter;
|
|
HitsAcrossFrames++;
|
|
}
|
|
}
|
|
|
|
double FirstUsedTime;
|
|
double LastUsedTime;
|
|
uint64 FirstFrameUsed;
|
|
uint64 LastFrameUsed;
|
|
int Hits;
|
|
int HitsAcrossFrames;
|
|
|
|
#else
|
|
void InitStats() {}
|
|
void AddHit() {}
|
|
#endif // PSO_TRACK_CACHE_STATS
|
|
|
|
FPipelineStateStats* Stats;
|
|
};
|
|
|
|
/* State for compute */
|
|
class FComputePipelineState : public FPipelineState
|
|
{
|
|
public:
|
|
FComputePipelineState(FRHIComputeShader* InComputeShader)
|
|
: ComputeShader(InComputeShader)
|
|
{
|
|
ComputeShader->AddRef();
|
|
}
|
|
|
|
~FComputePipelineState()
|
|
{
|
|
ComputeShader->Release();
|
|
}
|
|
|
|
virtual bool IsCompute() const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
FRHIComputeShader* ComputeShader;
|
|
TRefCountPtr<FRHIComputePipelineState> RHIPipeline;
|
|
};
|
|
|
|
/* State for graphics */
|
|
class FGraphicsPipelineState : public FPipelineState
|
|
{
|
|
public:
|
|
FGraphicsPipelineState()
|
|
{
|
|
}
|
|
|
|
virtual bool IsCompute() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
TRefCountPtr<FRHIGraphicsPipelineState> RHIPipeline;
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
FThreadSafeCounter InUseCount;
|
|
#endif
|
|
uint64 SortKey = 0;
|
|
};
|
|
|
|
#if RHI_RAYTRACING
|
|
/* State for ray tracing */
|
|
class FRayTracingPipelineState : public FPipelineState
|
|
{
|
|
public:
|
|
FRayTracingPipelineState(const FRayTracingPipelineStateInitializer& Initializer)
|
|
{
|
|
{
|
|
int32 Index = 0;
|
|
for (FRHIRayTracingShader* Shader : Initializer.GetHitGroupTable())
|
|
{
|
|
HitGroupShaderMap.Add(Shader->GetHash(), Index++);
|
|
}
|
|
}
|
|
|
|
{
|
|
int32 Index = 0;
|
|
for (FRHIRayTracingShader* Shader : Initializer.GetCallableTable())
|
|
{
|
|
CallableShaderMap.Add(Shader->GetHash(), Index++);
|
|
}
|
|
}
|
|
|
|
{
|
|
int32 Index = 0;
|
|
for (FRHIRayTracingShader* Shader : Initializer.GetMissTable())
|
|
{
|
|
MissShaderMap.Add(Shader->GetHash(), Index++);
|
|
}
|
|
}
|
|
}
|
|
|
|
virtual bool IsCompute() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
inline void AddHit()
|
|
{
|
|
if (LastFrameHit != GFrameCounter)
|
|
{
|
|
LastFrameHit = GFrameCounter;
|
|
HitsAcrossFrames++;
|
|
}
|
|
|
|
FPipelineState::AddHit();
|
|
}
|
|
|
|
bool operator < (const FRayTracingPipelineState& Other)
|
|
{
|
|
if (LastFrameHit != Other.LastFrameHit)
|
|
{
|
|
return LastFrameHit < Other.LastFrameHit;
|
|
}
|
|
return HitsAcrossFrames < Other.HitsAcrossFrames;
|
|
}
|
|
|
|
bool IsCompilationComplete() const
|
|
{
|
|
return !CompletionEvent.IsValid() || CompletionEvent->IsComplete();
|
|
}
|
|
|
|
FRayTracingPipelineStateRHIRef RHIPipeline;
|
|
|
|
uint64 HitsAcrossFrames = 0;
|
|
uint64 LastFrameHit = 0;
|
|
|
|
TMap<FSHAHash, int32> HitGroupShaderMap;
|
|
TMap<FSHAHash, int32> CallableShaderMap;
|
|
TMap<FSHAHash, int32> MissShaderMap;
|
|
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
FThreadSafeCounter InUseCount;
|
|
#endif
|
|
};
|
|
|
|
//extern RHI_API FRHIRayTracingPipelineState* GetRHIRayTracingPipelineState(FRayTracingPipelineState* PipelineState);
|
|
RHI_API FRHIRayTracingPipelineState* GetRHIRayTracingPipelineState(FRayTracingPipelineState* PipelineState)
|
|
{
|
|
ensure(PipelineState->RHIPipeline);
|
|
PipelineState->CompletionEvent = nullptr;
|
|
return PipelineState->RHIPipeline;
|
|
}
|
|
|
|
#endif // RHI_RAYTRACING
|
|
|
|
int32 FindRayTracingHitGroupIndex(FRayTracingPipelineState* Pipeline, FRHIRayTracingShader* HitGroupShader, bool bRequired)
|
|
{
|
|
#if RHI_RAYTRACING
|
|
if (int32* FoundIndex = Pipeline->HitGroupShaderMap.Find(HitGroupShader->GetHash()))
|
|
{
|
|
return *FoundIndex;
|
|
}
|
|
checkf(!bRequired, TEXT("Required hit group shader was not found in the ray tracing pipeline."));
|
|
#endif // RHI_RAYTRACING
|
|
|
|
return INDEX_NONE;
|
|
}
|
|
|
|
int32 FindRayTracingCallableShaderIndex(FRayTracingPipelineState* Pipeline, FRHIRayTracingShader* CallableShader, bool bRequired)
|
|
{
|
|
#if RHI_RAYTRACING
|
|
if (int32* FoundIndex = Pipeline->CallableShaderMap.Find(CallableShader->GetHash()))
|
|
{
|
|
return *FoundIndex;
|
|
}
|
|
checkf(!bRequired, TEXT("Required callable shader was not found in the ray tracing pipeline."));
|
|
#endif // RHI_RAYTRACING
|
|
|
|
return INDEX_NONE;
|
|
}
|
|
|
|
int32 FindRayTracingMissShaderIndex(FRayTracingPipelineState* Pipeline, FRHIRayTracingShader* MissShader, bool bRequired)
|
|
{
|
|
#if RHI_RAYTRACING
|
|
if (int32* FoundIndex = Pipeline->MissShaderMap.Find(MissShader->GetHash()))
|
|
{
|
|
return *FoundIndex;
|
|
}
|
|
checkf(!bRequired, TEXT("Required miss shader was not found in the ray tracing pipeline."));
|
|
#endif // RHI_RAYTRACING
|
|
|
|
return INDEX_NONE;
|
|
}
|
|
|
|
void SetGraphicsPipelineState(FRHICommandList& RHICmdList, const FGraphicsPipelineStateInitializer& Initializer, uint32 StencilRef, EApplyRendertargetOption ApplyFlags, bool bApplyAdditionalState)
|
|
{
|
|
#if PLATFORM_USE_FALLBACK_PSO
|
|
RHICmdList.SetGraphicsPipelineState(Initializer, StencilRef, bApplyAdditionalState);
|
|
#else
|
|
FGraphicsPipelineState* PipelineState = PipelineStateCache::GetAndOrCreateGraphicsPipelineState(RHICmdList, Initializer, ApplyFlags);
|
|
if (PipelineState && (PipelineState->RHIPipeline || !Initializer.bFromPSOFileCache))
|
|
{
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
int32 Result = PipelineState->InUseCount.Increment();
|
|
check(Result >= 1);
|
|
#endif
|
|
check(IsInRenderingThread() || IsInParallelRenderingThread());
|
|
RHICmdList.SetGraphicsPipelineState(PipelineState, Initializer.BoundShaderState, StencilRef, bApplyAdditionalState);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* TSharedPipelineStateCache
|
|
* This is a cache of the * pipeline states
|
|
* there is a local thread cache which is consolidated with the global thread cache
|
|
* global thread cache is read only until the end of the frame when the local thread caches are consolidated
|
|
*/
|
|
template<class TMyKey,class TMyValue>
|
|
class TSharedPipelineStateCache
|
|
{
|
|
private:
|
|
|
|
TMap<TMyKey, TMyValue>& GetLocalCache()
|
|
{
|
|
void* TLSValue = FPlatformTLS::GetTlsValue(TLSSlot);
|
|
if (TLSValue == nullptr)
|
|
{
|
|
FPipelineStateCacheType* PipelineStateCache = new FPipelineStateCacheType;
|
|
FPlatformTLS::SetTlsValue(TLSSlot, (void*)(PipelineStateCache) );
|
|
|
|
FScopeLock S(&AllThreadsLock);
|
|
AllThreadsPipelineStateCache.Add(PipelineStateCache);
|
|
return *PipelineStateCache;
|
|
}
|
|
return *((FPipelineStateCacheType*)TLSValue);
|
|
}
|
|
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
struct FScopeVerifyIncrement
|
|
{
|
|
volatile int32 &VerifyMutex;
|
|
FScopeVerifyIncrement(volatile int32& InVerifyMutex) : VerifyMutex(InVerifyMutex)
|
|
{
|
|
int32 Result = FPlatformAtomics::InterlockedIncrement(&VerifyMutex);
|
|
if (Result <= 0)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Find was hit while Consolidate was running"));
|
|
}
|
|
}
|
|
|
|
~FScopeVerifyIncrement()
|
|
{
|
|
int32 Result = FPlatformAtomics::InterlockedDecrement(&VerifyMutex);
|
|
if (Result < 0)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Find was hit while Consolidate was running"));
|
|
}
|
|
}
|
|
};
|
|
|
|
struct FScopeVerifyDecrement
|
|
{
|
|
volatile int32 &VerifyMutex;
|
|
FScopeVerifyDecrement(volatile int32& InVerifyMutex) : VerifyMutex(InVerifyMutex)
|
|
{
|
|
int32 Result = FPlatformAtomics::InterlockedDecrement(&VerifyMutex);
|
|
if (Result >= 0)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Consolidate was hit while Get/SetPSO was running"));
|
|
}
|
|
}
|
|
|
|
~FScopeVerifyDecrement()
|
|
{
|
|
int32 Result = FPlatformAtomics::InterlockedIncrement(&VerifyMutex);
|
|
if (Result != 0)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Consolidate was hit while Get/SetPSO was running"));
|
|
}
|
|
}
|
|
};
|
|
#endif
|
|
|
|
public:
|
|
typedef TMap<TMyKey,TMyValue> FPipelineStateCacheType;
|
|
|
|
TSharedPipelineStateCache()
|
|
{
|
|
CurrentMap = &Map1;
|
|
BackfillMap = &Map2;
|
|
DuplicateStateGenerated = 0;
|
|
TLSSlot = FPlatformTLS::AllocTlsSlot();
|
|
}
|
|
|
|
bool Find( const TMyKey& InKey, TMyValue& OutResult )
|
|
{
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
FScopeVerifyIncrement S(VerifyMutex);
|
|
#endif
|
|
// safe because we only ever find when we don't add
|
|
TMyValue* Result = CurrentMap->Find(InKey);
|
|
|
|
if ( Result )
|
|
{
|
|
OutResult = *Result;
|
|
return true;
|
|
}
|
|
|
|
// check the local cahce which is safe because only this thread adds to it
|
|
TMap<TMyKey, TMyValue> &LocalCache = GetLocalCache();
|
|
// if it's not in the local cache then it will rebuild
|
|
Result = LocalCache.Find(InKey);
|
|
if (Result)
|
|
{
|
|
OutResult = *Result;
|
|
return true;
|
|
}
|
|
|
|
Result = BackfillMap->Find(InKey);
|
|
|
|
if ( Result )
|
|
{
|
|
LocalCache.Add(InKey, *Result);
|
|
OutResult = *Result;
|
|
return true;
|
|
}
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
// The list of tasks that are still in progress.
|
|
TArray<TTuple<TMyKey,TMyValue>> Uncompleted;
|
|
|
|
bool Add(const TMyKey& InKey, const TMyValue& InValue)
|
|
{
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
FScopeVerifyIncrement S(VerifyMutex);
|
|
#endif
|
|
// everything is added to the local cache then at end of frame we consoldate them all
|
|
TMap<TMyKey, TMyValue> &LocalCache = GetLocalCache();
|
|
|
|
check( LocalCache.Contains(InKey) == false );
|
|
LocalCache.Add(InKey, InValue);
|
|
checkfSlow(LocalCache.Contains(InKey), TEXT("PSO not found immediately after adding. Likely cause is an uninitialized field in a constructor or copy constructor"));
|
|
return true;
|
|
}
|
|
|
|
void ConsolidateThreadedCaches()
|
|
{
|
|
|
|
SCOPE_TIME_GUARD_MS(TEXT("ConsolidatePipelineCache"), 0.1);
|
|
check(IsInRenderingThread());
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
FScopeVerifyDecrement S(VerifyMutex);
|
|
#endif
|
|
|
|
// consolidate all the local threads keys with the current thread
|
|
// No one is allowed to call GetLocalCache while this is running
|
|
// this is verified by the VerifyMutex.
|
|
for ( FPipelineStateCacheType* PipelineStateCache : AllThreadsPipelineStateCache)
|
|
{
|
|
for (auto PipelineStateCacheIterator = PipelineStateCache->CreateIterator(); PipelineStateCacheIterator; ++PipelineStateCacheIterator)
|
|
{
|
|
const TMyKey& ThreadKey = PipelineStateCacheIterator->Key;
|
|
const TMyValue& ThreadValue = PipelineStateCacheIterator->Value;
|
|
|
|
{
|
|
BackfillMap->Remove(ThreadKey);
|
|
|
|
TMyValue* CurrentValue = CurrentMap->Find(ThreadKey);
|
|
if (CurrentValue)
|
|
{
|
|
// if two threads get from the backfill map then we might just be dealing with one pipelinestate, in which case we have already added it to the currentmap and don't need to do anything else
|
|
if ( *CurrentValue != ThreadValue )
|
|
{
|
|
++DuplicateStateGenerated;
|
|
DeleteArray.Add(ThreadValue);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
CurrentMap->Add(ThreadKey, ThreadValue);
|
|
Uncompleted.Add(TTuple<TMyKey, TMyValue>(ThreadKey, ThreadValue));
|
|
}
|
|
PipelineStateCacheIterator.RemoveCurrent();
|
|
}
|
|
}
|
|
}
|
|
|
|
// tick and complete any uncompleted PSO tasks (we free up precompile tasks here).
|
|
for (int32 i = Uncompleted.Num() - 1; i >= 0; i--)
|
|
{
|
|
checkSlow(CurrentMap->Find(Uncompleted[i].Key));
|
|
if (Uncompleted[i].Value->IsComplete())
|
|
{
|
|
Uncompleted[i].Value->WaitCompletion();
|
|
Uncompleted.RemoveAtSwap(i, 1, /*bAllowShrinking=*/ false);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ProcessDelayedCleanup()
|
|
{
|
|
check(IsInRenderingThread());
|
|
|
|
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
|
|
RHICmdList.EnqueueLambda([DeleteArray = MoveTemp(DeleteArray)](FRHICommandListImmediate& RHICmdList) mutable
|
|
{
|
|
for (TMyValue& OldPipelineState : DeleteArray)
|
|
{
|
|
//once in the delayed list this object should not be findable anymore, so the 0 should remain, making this safe
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
check(OldPipelineState->InUseCount.GetValue() == 0);
|
|
#endif
|
|
// Duplicate entries must wait for in progress compiles to complete.
|
|
// inprogress tasks could also remain in this container and deferred for the next tick.
|
|
bool bWaited = OldPipelineState->WaitCompletion();
|
|
UE_CLOG(bWaited, LogRHI, Log, TEXT("Waited on a pipeline compile task while discarding duplicate."));
|
|
delete OldPipelineState;
|
|
}
|
|
DeleteArray.Empty();
|
|
});
|
|
}
|
|
|
|
int32 DiscardAndSwap()
|
|
{
|
|
check(IsInRenderingThread());
|
|
|
|
// the consolidate should always be run before the DiscardAndSwap.
|
|
// there should be no inuse pipeline states in the backfill map (because they should have been moved into the CurrentMap).
|
|
int32 Discarded = BackfillMap->Num();
|
|
|
|
FRHICommandListImmediate& RHICmdList = FRHICommandListExecutor::GetImmediateCommandList();
|
|
RHICmdList.EnqueueLambda([BackfillMap = MoveTemp(*BackfillMap)](FRHICommandListImmediate& RHICmdList) mutable
|
|
{
|
|
for ( const auto& DiscardIterator : BackfillMap)
|
|
{
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
check( DiscardIterator.Value->InUseCount.GetValue() == 0);
|
|
#endif
|
|
// Incomplete tasks should be put back to the current map. There should be no incomplete tasks encountered here.
|
|
bool bWaited = DiscardIterator.Value->WaitCompletion();
|
|
UE_CLOG(bWaited, LogRHI, Error, TEXT("Waited on a pipeline compile task while discarding retired PSOs."));
|
|
delete DiscardIterator.Value;
|
|
}
|
|
});
|
|
|
|
if ( CurrentMap == &Map1 )
|
|
{
|
|
CurrentMap = &Map2;
|
|
BackfillMap = &Map1;
|
|
}
|
|
else
|
|
{
|
|
CurrentMap = &Map1;
|
|
BackfillMap = &Map2;
|
|
}
|
|
|
|
// keep alive incomplete tasks by moving them back to the current map.
|
|
for (int32 i = Uncompleted.Num() - 1; i >= 0; i--)
|
|
{
|
|
int32 Removed = BackfillMap->Remove(Uncompleted[i].Key);
|
|
checkSlow(Removed);
|
|
if (Removed)
|
|
{
|
|
CurrentMap->Add(Uncompleted[i].Key, Uncompleted[i].Value);
|
|
}
|
|
}
|
|
|
|
return Discarded;
|
|
}
|
|
|
|
void WaitTasksComplete()
|
|
{
|
|
FScopeLock S(&AllThreadsLock);
|
|
|
|
for ( FPipelineStateCacheType* PipelineStateCache : AllThreadsPipelineStateCache )
|
|
{
|
|
WaitTasksComplete(PipelineStateCache);
|
|
}
|
|
|
|
WaitTasksComplete(BackfillMap);
|
|
WaitTasksComplete(CurrentMap);
|
|
}
|
|
|
|
private:
|
|
|
|
void WaitTasksComplete(FPipelineStateCacheType* PipelineStateCache)
|
|
{
|
|
FScopeLock S(&AllThreadsLock);
|
|
for (auto PipelineStateCacheIterator = PipelineStateCache->CreateIterator(); PipelineStateCacheIterator; ++PipelineStateCacheIterator)
|
|
{
|
|
FGraphicsPipelineState* pGPipelineState = PipelineStateCacheIterator->Value;
|
|
if(pGPipelineState != nullptr)
|
|
{
|
|
pGPipelineState->WaitCompletion();
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
uint32 TLSSlot;
|
|
FPipelineStateCacheType *CurrentMap;
|
|
FPipelineStateCacheType *BackfillMap;
|
|
|
|
FPipelineStateCacheType Map1;
|
|
FPipelineStateCacheType Map2;
|
|
|
|
TArray<TMyValue> DeleteArray;
|
|
|
|
FCriticalSection AllThreadsLock;
|
|
TArray<FPipelineStateCacheType*> AllThreadsPipelineStateCache;
|
|
|
|
uint32 DuplicateStateGenerated;
|
|
#if PSO_TRACK_CACHE_STATS
|
|
friend void DumpPipelineCacheStats();
|
|
#endif
|
|
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
volatile int32 VerifyMutex;
|
|
#endif
|
|
|
|
};
|
|
|
|
// Typed caches for compute and graphics
|
|
typedef TDiscardableKeyValueCache< FRHIComputeShader*, FComputePipelineState*> FComputePipelineCache;
|
|
typedef TSharedPipelineStateCache<FGraphicsPipelineStateInitializer, FGraphicsPipelineState*> FGraphicsPipelineCache;
|
|
|
|
// These are the actual caches for both pipelines
|
|
FComputePipelineCache GComputePipelineCache;
|
|
FGraphicsPipelineCache GGraphicsPipelineCache;
|
|
|
|
FAutoConsoleTaskPriority CPrio_FCompilePipelineStateTask(
|
|
TEXT("TaskGraph.TaskPriorities.CompilePipelineStateTask"),
|
|
TEXT("Task and thread priority for FCompilePipelineStateTask."),
|
|
ENamedThreads::HighThreadPriority, // if we have high priority task threads, then use them...
|
|
ENamedThreads::NormalTaskPriority, // .. at normal task priority
|
|
ENamedThreads::HighTaskPriority // if we don't have hi pri threads, then use normal priority threads at high task priority instead
|
|
);
|
|
#if RHI_RAYTRACING
|
|
|
|
// Simple thread-safe pipeline state cache that's designed for low-frequency pipeline creation operations.
|
|
// The expected use case is a very infrequent (i.e. startup / load / streaming time) creation of ray tracing PSOs.
|
|
// This cache uses a single internal lock and therefore is not designed for highly concurrent operations.
|
|
class FRayTracingPipelineCache
|
|
{
|
|
public:
|
|
FRayTracingPipelineCache()
|
|
{}
|
|
|
|
~FRayTracingPipelineCache()
|
|
{}
|
|
|
|
bool FindBase(const FRayTracingPipelineStateInitializer& Initializer, FRayTracingPipelineState*& OutPipeline) const
|
|
{
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
|
|
// Find the most recently used pipeline with compatible configuration
|
|
|
|
FRayTracingPipelineState* BestPipeline = nullptr;
|
|
|
|
for (const auto& It : FullPipelines)
|
|
{
|
|
const FRayTracingPipelineStateSignature& CandidateInitializer = It.Key;
|
|
FRayTracingPipelineState* CandidatePipeline = It.Value;
|
|
|
|
if (!CandidatePipeline->RHIPipeline.IsValid()
|
|
|| CandidateInitializer.bAllowHitGroupIndexing != Initializer.bAllowHitGroupIndexing
|
|
|| CandidateInitializer.MaxPayloadSizeInBytes != Initializer.MaxPayloadSizeInBytes
|
|
|| CandidateInitializer.GetRayGenHash() != Initializer.GetRayGenHash()
|
|
|| CandidateInitializer.GetRayMissHash() != Initializer.GetRayMissHash()
|
|
|| CandidateInitializer.GetCallableHash() != Initializer.GetCallableHash())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (BestPipeline == nullptr || *BestPipeline < *CandidatePipeline)
|
|
{
|
|
BestPipeline = CandidatePipeline;
|
|
}
|
|
}
|
|
|
|
if (BestPipeline)
|
|
{
|
|
OutPipeline = BestPipeline;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool FindBySignature(const FRayTracingPipelineStateSignature& Signature, FRayTracingPipelineState*& OutCachedState) const
|
|
{
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
|
|
FRayTracingPipelineState* const* FoundState = FullPipelines.Find(Signature);
|
|
if (FoundState)
|
|
{
|
|
OutCachedState = *FoundState;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool Find(const FRayTracingPipelineStateInitializer& Initializer, FRayTracingPipelineState*& OutCachedState) const
|
|
{
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
|
|
const FPipelineMap& Cache = Initializer.bPartial ? PartialPipelines : FullPipelines;
|
|
|
|
FRayTracingPipelineState* const* FoundState = Cache.Find(Initializer);
|
|
if (FoundState)
|
|
{
|
|
OutCachedState = *FoundState;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Creates and returns a new pipeline state object, adding it to internal cache.
|
|
// The cache itself owns the object and is responsible for destroying it.
|
|
FRayTracingPipelineState* Add(const FRayTracingPipelineStateInitializer& Initializer)
|
|
{
|
|
FRayTracingPipelineState* Result = new FRayTracingPipelineState(Initializer);
|
|
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
|
|
FPipelineMap& Cache = Initializer.bPartial ? PartialPipelines : FullPipelines;
|
|
|
|
Cache.Add(Initializer, Result);
|
|
Result->AddHit();
|
|
|
|
return Result;
|
|
}
|
|
|
|
void Shutdown()
|
|
{
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
for (auto& It : FullPipelines)
|
|
{
|
|
delete It.Value;
|
|
}
|
|
for (auto& It : PartialPipelines)
|
|
{
|
|
delete It.Value;
|
|
}
|
|
}
|
|
|
|
void Trim(int32 TargetNumEntries)
|
|
{
|
|
FScopeLock ScopeLock(&CriticalSection);
|
|
|
|
// Only full pipeline cache is automatically trimmed.
|
|
FPipelineMap& Cache = FullPipelines;
|
|
|
|
if (Cache.Num() < TargetNumEntries)
|
|
{
|
|
return;
|
|
}
|
|
|
|
struct FEntry
|
|
{
|
|
FRayTracingPipelineStateSignature Key;
|
|
uint64 LastFrameHit;
|
|
uint64 HitsAcrossFrames;
|
|
FRayTracingPipelineState* Pipeline;
|
|
};
|
|
TArray<FEntry, TMemStackAllocator<>> Entries;
|
|
Entries.Reserve(Cache.Num());
|
|
|
|
const uint64 CurrentFrame = GFrameCounter;
|
|
const uint32 NumLatencyFrames = 10;
|
|
|
|
// Find all pipelines that were not used in the last 10 frames
|
|
|
|
for (const auto& It : Cache)
|
|
{
|
|
if (It.Value->LastFrameHit + NumLatencyFrames <= CurrentFrame
|
|
&& It.Value->IsCompilationComplete())
|
|
{
|
|
FEntry Entry;
|
|
Entry.Key = It.Key;
|
|
Entry.HitsAcrossFrames = It.Value->HitsAcrossFrames;
|
|
Entry.LastFrameHit = It.Value->LastFrameHit;
|
|
Entry.Pipeline = It.Value;
|
|
Entries.Add(Entry);
|
|
}
|
|
}
|
|
|
|
Entries.Sort([](const FEntry& A, const FEntry& B)
|
|
{
|
|
if (A.LastFrameHit == B.LastFrameHit)
|
|
{
|
|
return B.HitsAcrossFrames < A.HitsAcrossFrames;
|
|
}
|
|
else
|
|
{
|
|
return B.LastFrameHit < A.LastFrameHit;
|
|
}
|
|
});
|
|
|
|
// Remove least useful pipelines
|
|
|
|
while (Cache.Num() > TargetNumEntries && Entries.Num())
|
|
{
|
|
FEntry& LastEntry = Entries.Last();
|
|
check(LastEntry.Pipeline->RHIPipeline);
|
|
check(LastEntry.Pipeline->IsCompilationComplete());
|
|
delete LastEntry.Pipeline;
|
|
Cache.Remove(LastEntry.Key);
|
|
Entries.Pop(false);
|
|
}
|
|
|
|
LastTrimFrame = CurrentFrame;
|
|
}
|
|
|
|
uint64 GetLastTrimFrame() const { return LastTrimFrame; }
|
|
|
|
private:
|
|
|
|
mutable FCriticalSection CriticalSection;
|
|
using FPipelineMap = TMap<FRayTracingPipelineStateSignature, FRayTracingPipelineState*>;
|
|
FPipelineMap FullPipelines;
|
|
FPipelineMap PartialPipelines;
|
|
uint64 LastTrimFrame = 0;
|
|
};
|
|
|
|
FRayTracingPipelineCache GRayTracingPipelineCache;
|
|
#endif
|
|
|
|
/**
|
|
* Compile task
|
|
*/
|
|
static std::atomic<int32> GPipelinePrecompileTasksInFlight = { 0 };
|
|
|
|
int32 PipelineStateCache::GetNumActivePipelinePrecompileTasks()
|
|
{
|
|
return GPipelinePrecompileTasksInFlight.load();
|
|
}
|
|
|
|
class FCompilePipelineStateTask
|
|
{
|
|
public:
|
|
FPipelineState* Pipeline;
|
|
FGraphicsPipelineStateInitializer Initializer;
|
|
|
|
// InInitializer is only used for non-compute tasks, a default can just be used otherwise
|
|
FCompilePipelineStateTask(FPipelineState* InPipeline, const FGraphicsPipelineStateInitializer& InInitializer)
|
|
: Pipeline(InPipeline)
|
|
, Initializer(InInitializer)
|
|
{
|
|
if(Initializer.bFromPSOFileCache)
|
|
{
|
|
GPipelinePrecompileTasksInFlight++;
|
|
}
|
|
|
|
if (!Pipeline->IsCompute())
|
|
{
|
|
if (Initializer.BoundShaderState.GetMeshShader())
|
|
{
|
|
Initializer.BoundShaderState.GetMeshShader()->AddRef();
|
|
}
|
|
if (Initializer.BoundShaderState.GetAmplificationShader())
|
|
{
|
|
Initializer.BoundShaderState.GetAmplificationShader()->AddRef();
|
|
}
|
|
if (Initializer.BoundShaderState.VertexDeclarationRHI)
|
|
{
|
|
Initializer.BoundShaderState.VertexDeclarationRHI->AddRef();
|
|
}
|
|
if (Initializer.BoundShaderState.VertexShaderRHI)
|
|
{
|
|
Initializer.BoundShaderState.VertexShaderRHI->AddRef();
|
|
}
|
|
if (Initializer.BoundShaderState.PixelShaderRHI)
|
|
{
|
|
Initializer.BoundShaderState.PixelShaderRHI->AddRef();
|
|
}
|
|
if (Initializer.BoundShaderState.GetGeometryShader())
|
|
{
|
|
Initializer.BoundShaderState.GetGeometryShader()->AddRef();
|
|
}
|
|
if (Initializer.BlendState)
|
|
{
|
|
Initializer.BlendState->AddRef();
|
|
}
|
|
if (Initializer.RasterizerState)
|
|
{
|
|
Initializer.RasterizerState->AddRef();
|
|
}
|
|
if (Initializer.DepthStencilState)
|
|
{
|
|
Initializer.DepthStencilState->AddRef();
|
|
}
|
|
|
|
if (Initializer.BlendState)
|
|
{
|
|
Initializer.BlendState->AddRef();
|
|
}
|
|
if (Initializer.RasterizerState)
|
|
{
|
|
Initializer.RasterizerState->AddRef();
|
|
}
|
|
if (Initializer.DepthStencilState)
|
|
{
|
|
Initializer.DepthStencilState->AddRef();
|
|
}
|
|
}
|
|
}
|
|
|
|
~FCompilePipelineStateTask()
|
|
{
|
|
if (Initializer.bFromPSOFileCache)
|
|
{
|
|
GPipelinePrecompileTasksInFlight--;
|
|
}
|
|
}
|
|
|
|
static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
|
|
|
|
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
|
|
{
|
|
CompilePSO();
|
|
}
|
|
|
|
void CompilePSO()
|
|
{
|
|
LLM_SCOPE(ELLMTag::PSO);
|
|
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
|
|
|
|
if (Pipeline->IsCompute())
|
|
{
|
|
FComputePipelineState* ComputePipeline = static_cast<FComputePipelineState*>(Pipeline);
|
|
ComputePipeline->RHIPipeline = RHICreateComputePipelineState(ComputePipeline->ComputeShader);
|
|
}
|
|
else
|
|
{
|
|
if (GRHISupportsMeshShadersTier0)
|
|
{
|
|
if (!Initializer.BoundShaderState.VertexShaderRHI && !Initializer.BoundShaderState.GetMeshShader())
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Tried to create a Gfx Pipeline State without Vertex or Mesh Shader"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!Initializer.BoundShaderState.VertexShaderRHI)
|
|
{
|
|
UE_LOG(LogRHI, Fatal, TEXT("Tried to create a Gfx Pipeline State without Vertex Shader"));
|
|
}
|
|
}
|
|
|
|
FGraphicsPipelineState* GfxPipeline = static_cast<FGraphicsPipelineState*>(Pipeline);
|
|
GfxPipeline->RHIPipeline = RHICreateGraphicsPipelineState(Initializer);
|
|
|
|
if (GfxPipeline->RHIPipeline)
|
|
{
|
|
GfxPipeline->SortKey = GfxPipeline->RHIPipeline->GetSortKey();
|
|
}
|
|
else
|
|
{
|
|
HandlePipelineCreationFailure(Initializer);
|
|
}
|
|
|
|
if (Initializer.BoundShaderState.GetMeshShader())
|
|
{
|
|
Initializer.BoundShaderState.GetMeshShader()->Release();
|
|
}
|
|
if (Initializer.BoundShaderState.GetAmplificationShader())
|
|
{
|
|
Initializer.BoundShaderState.GetAmplificationShader()->Release();
|
|
}
|
|
if (Initializer.BoundShaderState.VertexDeclarationRHI)
|
|
{
|
|
Initializer.BoundShaderState.VertexDeclarationRHI->Release();
|
|
}
|
|
if (Initializer.BoundShaderState.VertexShaderRHI)
|
|
{
|
|
Initializer.BoundShaderState.VertexShaderRHI->Release();
|
|
}
|
|
if (Initializer.BoundShaderState.PixelShaderRHI)
|
|
{
|
|
Initializer.BoundShaderState.PixelShaderRHI->Release();
|
|
}
|
|
if (Initializer.BoundShaderState.GetGeometryShader())
|
|
{
|
|
Initializer.BoundShaderState.GetGeometryShader()->Release();
|
|
}
|
|
if (Initializer.BlendState)
|
|
{
|
|
Initializer.BlendState->Release();
|
|
}
|
|
if (Initializer.RasterizerState)
|
|
{
|
|
Initializer.RasterizerState->Release();
|
|
}
|
|
if (Initializer.DepthStencilState)
|
|
{
|
|
Initializer.DepthStencilState->Release();
|
|
}
|
|
|
|
if (Initializer.BlendState)
|
|
{
|
|
Initializer.BlendState->Release();
|
|
}
|
|
if (Initializer.RasterizerState)
|
|
{
|
|
Initializer.RasterizerState->Release();
|
|
}
|
|
if (Initializer.DepthStencilState)
|
|
{
|
|
Initializer.DepthStencilState->Release();
|
|
}
|
|
}
|
|
}
|
|
|
|
FORCEINLINE TStatId GetStatId() const
|
|
{
|
|
RETURN_QUICK_DECLARE_CYCLE_STAT(FCompilePipelineStateTask, STATGROUP_TaskGraphTasks);
|
|
}
|
|
|
|
ENamedThreads::Type GetDesiredThread()
|
|
{
|
|
// On Mac the compilation is handled using external processes, so engine threads have very little work to do
|
|
// and it's better to leave more CPU time to these extrenal processes and other engine threads.
|
|
return PLATFORM_MAC ? ENamedThreads::AnyBackgroundThreadNormalTask : CPrio_FCompilePipelineStateTask.Get();
|
|
}
|
|
};
|
|
|
|
void PipelineStateCache::ReportFrameHitchToCSV()
|
|
{
|
|
ReportFrameHitchThisFrame = true;
|
|
}
|
|
|
|
/**
|
|
* Called at the end of each frame during the RHI . Evicts all items left in the backfill cached based on time
|
|
*/
|
|
void PipelineStateCache::FlushResources()
|
|
{
|
|
check(IsInRenderingThread());
|
|
|
|
GGraphicsPipelineCache.ConsolidateThreadedCaches();
|
|
GGraphicsPipelineCache.ProcessDelayedCleanup();
|
|
|
|
{
|
|
int32 NumMissesThisFrame = GraphicsPipelineCacheMisses.Load(EMemoryOrder::Relaxed);
|
|
int32 NumMissesLastFrame = GraphicsPipelineCacheMissesHistory.Num() >= 2 ? GraphicsPipelineCacheMissesHistory[1] : 0;
|
|
CSV_CUSTOM_STAT(PSO, PSOMisses, NumMissesThisFrame, ECsvCustomStatOp::Set);
|
|
|
|
// Put a negative number in the CSV to report that there was no hitch this frame for the PSO hitch stat.
|
|
if (!ReportFrameHitchThisFrame)
|
|
{
|
|
NumMissesThisFrame = -1;
|
|
NumMissesLastFrame = -1;
|
|
}
|
|
CSV_CUSTOM_STAT(PSO, PSOMissesOnHitch, NumMissesThisFrame, ECsvCustomStatOp::Set);
|
|
CSV_CUSTOM_STAT(PSO, PSOPrevFrameMissesOnHitch, NumMissesLastFrame, ECsvCustomStatOp::Set);
|
|
}
|
|
|
|
{
|
|
int32 NumMissesThisFrame = ComputePipelineCacheMisses.Load(EMemoryOrder::Relaxed);
|
|
int32 NumMissesLastFrame = ComputePipelineCacheMissesHistory.Num() >= 2 ? ComputePipelineCacheMissesHistory[1] : 0;
|
|
CSV_CUSTOM_STAT(PSO, PSOComputeMisses, NumMissesThisFrame, ECsvCustomStatOp::Set);
|
|
|
|
// Put a negative number in the CSV to report that there was no hitch this frame for the PSO hitch stat.
|
|
if (!ReportFrameHitchThisFrame)
|
|
{
|
|
NumMissesThisFrame = -1;
|
|
NumMissesLastFrame = -1;
|
|
}
|
|
CSV_CUSTOM_STAT(PSO, PSOComputeMissesOnHitch, NumMissesThisFrame, ECsvCustomStatOp::Set);
|
|
CSV_CUSTOM_STAT(PSO, PSOComputePrevFrameMissesOnHitch, NumMissesLastFrame, ECsvCustomStatOp::Set);
|
|
}
|
|
ReportFrameHitchThisFrame = false;
|
|
|
|
GraphicsPipelineCacheMissesHistory.Insert(GraphicsPipelineCacheMisses, 0);
|
|
GraphicsPipelineCacheMissesHistory.SetNum(PSO_MISS_FRAME_HISTORY_SIZE);
|
|
ComputePipelineCacheMissesHistory.Insert(ComputePipelineCacheMisses, 0);
|
|
ComputePipelineCacheMissesHistory.SetNum(PSO_MISS_FRAME_HISTORY_SIZE);
|
|
GraphicsPipelineCacheMisses = 0;
|
|
ComputePipelineCacheMisses = 0;
|
|
|
|
static double LastEvictionTime = FPlatformTime::Seconds();
|
|
double CurrentTime = FPlatformTime::Seconds();
|
|
|
|
#if PSO_DO_CACHE_EVICT_EACH_FRAME
|
|
LastEvictionTime = 0;
|
|
#endif
|
|
|
|
// because it takes two cycles for an object to move from main->backfill->gone we check
|
|
// at half the desired eviction time
|
|
int32 EvictionPeriod = CVarPSOEvictionTime.GetValueOnAnyThread();
|
|
|
|
if (EvictionPeriod == 0 || CurrentTime - LastEvictionTime < EvictionPeriod)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// This should be very fast, if not it's likely eviction time is too high and too
|
|
// many items are building up.
|
|
SCOPE_TIME_GUARD_MS(TEXT("TrimPiplelineCache"), 0.1);
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
DumpPipelineCacheStats();
|
|
#endif
|
|
|
|
LastEvictionTime = CurrentTime;
|
|
|
|
int ReleasedComputeEntries = 0;
|
|
int ReleasedGraphicsEntries = 0;
|
|
|
|
ReleasedComputeEntries = GComputePipelineCache.Discard([](FComputePipelineState* CacheItem) {
|
|
delete CacheItem;
|
|
});
|
|
|
|
ReleasedGraphicsEntries = GGraphicsPipelineCache.DiscardAndSwap();
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
UE_LOG(LogRHI, Log, TEXT("Cleared state cache in %.02f ms. %d ComputeEntries, %d Graphics entries")
|
|
, (FPlatformTime::Seconds() - CurrentTime) / 1000
|
|
, ReleasedComputeEntries, ReleasedGraphicsEntries);
|
|
#endif // PSO_TRACK_CACHE_STATS
|
|
|
|
}
|
|
|
|
static bool IsAsyncCompilationAllowed(FRHIComputeCommandList& RHICmdList)
|
|
{
|
|
return GRHISupportsAsyncPipelinePrecompile &&
|
|
FDataDrivenShaderPlatformInfo::GetSupportsAsyncPipelineCompilation(GMaxRHIShaderPlatform) &&
|
|
GCVarAsyncPipelineCompile.GetValueOnAnyThread() && !RHICmdList.Bypass() && (IsRunningRHIInSeparateThread() && !IsInRHIThread()) && RHICmdList.AsyncPSOCompileAllowed();
|
|
}
|
|
|
|
uint64 PipelineStateCache::RetrieveGraphicsPipelineStateSortKey(const FGraphicsPipelineState* GraphicsPipelineState)
|
|
{
|
|
return GraphicsPipelineState != nullptr ? GraphicsPipelineState->SortKey : 0;
|
|
}
|
|
|
|
FComputePipelineState* PipelineStateCache::GetAndOrCreateComputePipelineState(FRHIComputeCommandList& RHICmdList, FRHIComputeShader* ComputeShader, bool bFromFileCache)
|
|
{
|
|
bool DoAsyncCompile = IsAsyncCompilationAllowed(RHICmdList);
|
|
|
|
FComputePipelineState* OutCachedState = nullptr;
|
|
|
|
uint32 LockFlags = GComputePipelineCache.ApplyLock(0, FComputePipelineCache::LockFlags::ReadLock);
|
|
|
|
bool WasFound = GComputePipelineCache.Find(ComputeShader, OutCachedState, LockFlags | FComputePipelineCache::LockFlags::WriteLockOnAddFail, LockFlags);
|
|
|
|
if (WasFound == false)
|
|
{
|
|
FPipelineFileCache::CacheComputePSO(GetTypeHash(ComputeShader), ComputeShader);
|
|
|
|
// create new graphics state
|
|
OutCachedState = new FComputePipelineState(ComputeShader);
|
|
OutCachedState->Stats = FPipelineFileCache::RegisterPSOStats(GetTypeHash(ComputeShader));
|
|
|
|
if (!bFromFileCache)
|
|
{
|
|
++ComputePipelineCacheMisses;
|
|
}
|
|
|
|
// create a compilation task, or just do it now...
|
|
if (DoAsyncCompile)
|
|
{
|
|
OutCachedState->CompletionEvent = TGraphTask<FCompilePipelineStateTask>::CreateTask().ConstructAndDispatchWhenReady(OutCachedState, FGraphicsPipelineStateInitializer());
|
|
RHICmdList.AddDispatchPrerequisite(OutCachedState->CompletionEvent);
|
|
}
|
|
else
|
|
{
|
|
OutCachedState->RHIPipeline = RHICreateComputePipelineState(OutCachedState->ComputeShader);
|
|
}
|
|
|
|
GComputePipelineCache.Add(ComputeShader, OutCachedState, LockFlags);
|
|
}
|
|
else
|
|
{
|
|
if (DoAsyncCompile)
|
|
{
|
|
if(!OutCachedState->IsComplete())
|
|
{
|
|
RHICmdList.AddDispatchPrerequisite(OutCachedState->CompletionEvent);
|
|
}
|
|
}
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
OutCachedState->AddHit();
|
|
#endif
|
|
}
|
|
|
|
GComputePipelineCache.Unlock(LockFlags);
|
|
|
|
#if 0
|
|
bool DoAsyncCompile = IsAsyncCompilationAllowed(RHICmdList);
|
|
|
|
|
|
TSharedPtr<FComputePipelineState> OutCachedState;
|
|
|
|
// Find or add an entry for this initializer
|
|
bool WasFound = GComputePipelineCache.FindOrAdd(ComputeShader, OutCachedState, [&RHICmdList, &ComputeShader, &DoAsyncCompile] {
|
|
// create new graphics state
|
|
TSharedPtr<FComputePipelineState> PipelineState(new FComputePipelineState(ComputeShader));
|
|
PipelineState->Stats = FPipelineFileCache::RegisterPSOStats(GetTypeHash(ComputeShader));
|
|
|
|
// create a compilation task, or just do it now...
|
|
if (DoAsyncCompile)
|
|
{
|
|
PipelineState->CompletionEvent = TGraphTask<FCompilePipelineStateTask>::CreateTask().ConstructAndDispatchWhenReady(PipelineState.Get(), FGraphicsPipelineStateInitializer());
|
|
RHICmdList.QueueAsyncPipelineStateCompile(PipelineState->CompletionEvent);
|
|
}
|
|
else
|
|
{
|
|
PipelineState->RHIPipeline = RHICreateComputePipelineState(PipelineState->ComputeShader);
|
|
}
|
|
|
|
// wrap it and return it
|
|
return PipelineState;
|
|
});
|
|
|
|
check(OutCachedState.IsValid());
|
|
|
|
// if we found an entry the block above wasn't executed
|
|
if (WasFound)
|
|
{
|
|
if (DoAsyncCompile)
|
|
{
|
|
FRWScopeLock ScopeLock(GComputePipelineCache.RWLock(), SLT_ReadOnly);
|
|
FGraphEventRef& CompletionEvent = OutCachedState->CompletionEvent;
|
|
if (CompletionEvent.IsValid() && !CompletionEvent->IsComplete())
|
|
{
|
|
RHICmdList.QueueAsyncPipelineStateCompile(CompletionEvent);
|
|
}
|
|
}
|
|
#if PSO_TRACK_CACHE_STATS
|
|
OutCachedState->AddHit();
|
|
#endif
|
|
}
|
|
#endif
|
|
// return the state pointer
|
|
return OutCachedState;
|
|
}
|
|
|
|
#if RHI_RAYTRACING
|
|
|
|
class FCompileRayTracingPipelineStateTask
|
|
{
|
|
public:
|
|
|
|
UE_NONCOPYABLE(FCompileRayTracingPipelineStateTask)
|
|
|
|
FPipelineState* Pipeline;
|
|
|
|
FRayTracingPipelineStateInitializer Initializer;
|
|
const bool bBackgroundTask;
|
|
|
|
FCompileRayTracingPipelineStateTask(FPipelineState* InPipeline, const FRayTracingPipelineStateInitializer& InInitializer, bool bInBackgroundTask)
|
|
: Pipeline(InPipeline)
|
|
, Initializer(InInitializer)
|
|
, bBackgroundTask(bInBackgroundTask)
|
|
{
|
|
// Copy all referenced shaders and AddRef them while the task is alive
|
|
|
|
RayGenTable = CopyShaderTable(InInitializer.GetRayGenTable());
|
|
MissTable = CopyShaderTable(InInitializer.GetMissTable());
|
|
HitGroupTable = CopyShaderTable(InInitializer.GetHitGroupTable());
|
|
CallableTable = CopyShaderTable(InInitializer.GetCallableTable());
|
|
|
|
// Point initializer to shader tables owned by this task
|
|
|
|
Initializer.SetRayGenShaderTable(RayGenTable, InInitializer.GetRayGenHash());
|
|
Initializer.SetMissShaderTable(MissTable, InInitializer.GetRayMissHash());
|
|
Initializer.SetHitGroupTable(HitGroupTable, InInitializer.GetHitGroupHash());
|
|
Initializer.SetCallableTable(CallableTable, InInitializer.GetCallableHash());
|
|
}
|
|
|
|
static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
|
|
|
|
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
|
|
{
|
|
FTaskTagScope Scope(ETaskTag::EParallelRenderingThread);
|
|
FRayTracingPipelineState* RayTracingPipeline = static_cast<FRayTracingPipelineState*>(Pipeline);
|
|
check(!RayTracingPipeline->RHIPipeline.IsValid());
|
|
RayTracingPipeline->RHIPipeline = RHICreateRayTracingPipelineState(Initializer);
|
|
|
|
// References to shaders no longer need to be held by this task
|
|
|
|
ReleaseShaders(CallableTable);
|
|
ReleaseShaders(HitGroupTable);
|
|
ReleaseShaders(MissTable);
|
|
ReleaseShaders(RayGenTable);
|
|
|
|
Initializer = FRayTracingPipelineStateInitializer();
|
|
}
|
|
|
|
FORCEINLINE TStatId GetStatId() const
|
|
{
|
|
RETURN_QUICK_DECLARE_CYCLE_STAT(FCompileRayTracingPipelineStateTask, STATGROUP_TaskGraphTasks);
|
|
}
|
|
|
|
ENamedThreads::Type GetDesiredThread()
|
|
{
|
|
// NOTE: RT PSO compilation internally spawns high-priority shader compilation tasks and waits on them.
|
|
// FCompileRayTracingPipelineStateTask itself must run at lower priority to prevent deadlocks when
|
|
// there are multiple RTPSO tasks that all wait on compilation via WaitUntilTasksComplete().
|
|
return bBackgroundTask ? ENamedThreads::AnyBackgroundThreadNormalTask : ENamedThreads::AnyNormalThreadNormalTask;
|
|
}
|
|
|
|
private:
|
|
|
|
void AddRefShaders(TArray<FRHIRayTracingShader*>& ShaderTable)
|
|
{
|
|
for (FRHIRayTracingShader* Ptr : ShaderTable)
|
|
{
|
|
Ptr->AddRef();
|
|
}
|
|
}
|
|
|
|
void ReleaseShaders(TArray<FRHIRayTracingShader*>& ShaderTable)
|
|
{
|
|
for (FRHIRayTracingShader* Ptr : ShaderTable)
|
|
{
|
|
Ptr->Release();
|
|
}
|
|
}
|
|
|
|
TArray<FRHIRayTracingShader*> CopyShaderTable(const TArrayView<FRHIRayTracingShader*>& Source)
|
|
{
|
|
TArray<FRHIRayTracingShader*> Result(Source.GetData(), Source.Num());
|
|
AddRefShaders(Result);
|
|
return Result;
|
|
}
|
|
|
|
TArray<FRHIRayTracingShader*> RayGenTable;
|
|
TArray<FRHIRayTracingShader*> MissTable;
|
|
TArray<FRHIRayTracingShader*> HitGroupTable;
|
|
TArray<FRHIRayTracingShader*> CallableTable;
|
|
};
|
|
#endif // RHI_RAYTRACING
|
|
|
|
FRayTracingPipelineState* PipelineStateCache::GetAndOrCreateRayTracingPipelineState(
|
|
FRHICommandList& RHICmdList,
|
|
const FRayTracingPipelineStateInitializer& InInitializer,
|
|
ERayTracingPipelineCacheFlags Flags)
|
|
{
|
|
#if RHI_RAYTRACING
|
|
LLM_SCOPE(ELLMTag::PSO);
|
|
|
|
check(IsInRenderingThread() || IsInParallelRenderingThread());
|
|
|
|
const bool bDoAsyncCompile = IsAsyncCompilationAllowed(RHICmdList);
|
|
const bool bNonBlocking = !!(Flags & ERayTracingPipelineCacheFlags::NonBlocking);
|
|
|
|
FRayTracingPipelineState* Result = nullptr;
|
|
|
|
bool bWasFound = GRayTracingPipelineCache.Find(InInitializer, Result);
|
|
|
|
if (bWasFound)
|
|
{
|
|
if (!Result->IsCompilationComplete())
|
|
{
|
|
if (!bDoAsyncCompile)
|
|
{
|
|
// Pipeline is in cache, but compilation is not finished and async compilation is disallowed, so block here RHI pipeline is created.
|
|
Result->WaitCompletion();
|
|
}
|
|
else if (bNonBlocking)
|
|
{
|
|
// Pipeline is in cache, but compilation has not finished yet, so it can't be used for rendering.
|
|
// Caller must use a fallback pipeline now and try again next frame.
|
|
Result = nullptr;
|
|
}
|
|
else
|
|
{
|
|
// Pipeline is in cache, but compilation is not finished and caller requested blocking mode.
|
|
// RHI command list can't begin translation until this event is complete.
|
|
RHICmdList.AddDispatchPrerequisite(Result->CompletionEvent);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
checkf(Result->RHIPipeline.IsValid(), TEXT("If pipeline is in cache and it doesn't have a completion event, then RHI pipeline is expected to be ready"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
FPipelineFileCache::CacheRayTracingPSO(InInitializer);
|
|
|
|
// Copy the initializer as we may want to patch it below
|
|
FRayTracingPipelineStateInitializer Initializer = InInitializer;
|
|
|
|
// If explicit base pipeline is not provided then find a compatible one from the cache
|
|
if (GRHISupportsRayTracingPSOAdditions && InInitializer.BasePipeline == nullptr)
|
|
{
|
|
FRayTracingPipelineState* BasePipeline = nullptr;
|
|
bool bBasePipelineFound = GRayTracingPipelineCache.FindBase(Initializer, BasePipeline);
|
|
if (bBasePipelineFound)
|
|
{
|
|
Initializer.BasePipeline = BasePipeline->RHIPipeline;
|
|
}
|
|
}
|
|
|
|
// Remove old pipelines once per frame
|
|
const int32 TargetCacheSize = CVarRTPSOCacheSize.GetValueOnAnyThread();
|
|
if (TargetCacheSize > 0 && GRayTracingPipelineCache.GetLastTrimFrame() != GFrameCounter)
|
|
{
|
|
GRayTracingPipelineCache.Trim(TargetCacheSize);
|
|
}
|
|
|
|
Result = GRayTracingPipelineCache.Add(Initializer);
|
|
|
|
if (bDoAsyncCompile)
|
|
{
|
|
Result->CompletionEvent = TGraphTask<FCompileRayTracingPipelineStateTask>::CreateTask().ConstructAndDispatchWhenReady(
|
|
Result,
|
|
Initializer,
|
|
bNonBlocking);
|
|
|
|
// Partial or non-blocking pipelines can't be used for rendering, therefore this command list does not need to depend on them.
|
|
|
|
if (bNonBlocking)
|
|
{
|
|
Result = nullptr;
|
|
}
|
|
else if (!Initializer.bPartial)
|
|
{
|
|
RHICmdList.AddDispatchPrerequisite(Result->CompletionEvent);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Result->RHIPipeline = RHICreateRayTracingPipelineState(Initializer);
|
|
}
|
|
}
|
|
|
|
if (Result)
|
|
{
|
|
Result->AddHit();
|
|
}
|
|
|
|
return Result;
|
|
|
|
#else // RHI_RAYTRACING
|
|
return nullptr;
|
|
#endif // RHI_RAYTRACING
|
|
}
|
|
|
|
FRayTracingPipelineState* PipelineStateCache::GetRayTracingPipelineState(const FRayTracingPipelineStateSignature& Signature)
|
|
{
|
|
#if RHI_RAYTRACING
|
|
FRayTracingPipelineState* Result = nullptr;
|
|
bool bWasFound = GRayTracingPipelineCache.FindBySignature(Signature, Result);
|
|
if (bWasFound)
|
|
{
|
|
Result->AddHit();
|
|
}
|
|
return Result;
|
|
#else // RHI_RAYTRACING
|
|
return nullptr;
|
|
#endif // RHI_RAYTRACING
|
|
}
|
|
|
|
FRHIComputePipelineState* ExecuteSetComputePipelineState(FComputePipelineState* ComputePipelineState)
|
|
{
|
|
ensure(ComputePipelineState->RHIPipeline);
|
|
FRWScopeLock ScopeLock(GComputePipelineCache.RWLock(), SLT_Write);
|
|
ComputePipelineState->AddUse();
|
|
ComputePipelineState->CompletionEvent = nullptr;
|
|
return ComputePipelineState->RHIPipeline;
|
|
}
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
|
|
static std::atomic_uint64_t TotalPrecompileWaitTime;
|
|
static std::atomic_int64_t TotalNumPrecompileJobs;
|
|
static std::atomic_int64_t TotalNumPrecompileJobsCompleted;
|
|
|
|
void StatsStartPrecompile()
|
|
{
|
|
TotalNumPrecompileJobs++;
|
|
}
|
|
|
|
void StatsEndPrecompile(uint64 TimeToComplete)
|
|
{
|
|
TotalPrecompileWaitTime += TimeToComplete;
|
|
TotalNumPrecompileJobsCompleted++;
|
|
}
|
|
|
|
#endif
|
|
|
|
FGraphicsPipelineState* PipelineStateCache::GetAndOrCreateGraphicsPipelineState(FRHICommandList& RHICmdList, const FGraphicsPipelineStateInitializer& Initializer, EApplyRendertargetOption ApplyFlags)
|
|
{
|
|
LLM_SCOPE(ELLMTag::PSO);
|
|
|
|
if (GRHISupportsMeshShadersTier0)
|
|
{
|
|
checkf(Initializer.BoundShaderState.VertexShaderRHI || Initializer.BoundShaderState.GetMeshShader(), TEXT("GraphicsPipelineState must include a vertex or mesh shader"));
|
|
}
|
|
else
|
|
{
|
|
checkf(Initializer.BoundShaderState.VertexShaderRHI, TEXT("GraphicsPipelineState must include a vertex shader"));
|
|
|
|
}
|
|
|
|
check(Initializer.DepthStencilState && Initializer.BlendState && Initializer.RasterizerState);
|
|
|
|
#if !UE_BUILD_SHIPPING && !UE_BUILD_TEST
|
|
if (ApplyFlags == EApplyRendertargetOption::CheckApply)
|
|
{
|
|
// Catch cases where the state does not match
|
|
FGraphicsPipelineStateInitializer NewInitializer = Initializer;
|
|
RHICmdList.ApplyCachedRenderTargets(NewInitializer);
|
|
|
|
int32 AnyFailed = 0;
|
|
AnyFailed |= (NewInitializer.RenderTargetsEnabled != Initializer.RenderTargetsEnabled) << 0;
|
|
|
|
if (AnyFailed == 0)
|
|
{
|
|
for (int32 i = 0; i < (int32)NewInitializer.RenderTargetsEnabled; i++)
|
|
{
|
|
AnyFailed |= (NewInitializer.RenderTargetFormats[i] != Initializer.RenderTargetFormats[i]) << 1;
|
|
// as long as RT formats match, the flags shouldn't matter. We only store format-influencing flags in the recorded PSOs, so the check would likely fail.
|
|
//AnyFailed |= (NewInitializer.RenderTargetFlags[i] != Initializer.RenderTargetFlags[i]) << 2;
|
|
if (AnyFailed)
|
|
{
|
|
AnyFailed |= i << 24;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
AnyFailed |= (NewInitializer.DepthStencilTargetFormat != Initializer.DepthStencilTargetFormat) << 3;
|
|
AnyFailed |= (NewInitializer.DepthStencilTargetFlag != Initializer.DepthStencilTargetFlag) << 4;
|
|
AnyFailed |= (NewInitializer.DepthTargetLoadAction != Initializer.DepthTargetLoadAction) << 5;
|
|
AnyFailed |= (NewInitializer.DepthTargetStoreAction != Initializer.DepthTargetStoreAction) << 6;
|
|
AnyFailed |= (NewInitializer.StencilTargetLoadAction != Initializer.StencilTargetLoadAction) << 7;
|
|
AnyFailed |= (NewInitializer.StencilTargetStoreAction != Initializer.StencilTargetStoreAction) << 8;
|
|
|
|
checkf(!AnyFailed, TEXT("GetAndOrCreateGraphicsPipelineState RenderTarget check failed with: %i !"), AnyFailed);
|
|
}
|
|
#endif
|
|
|
|
bool DoAsyncCompile = IsAsyncCompilationAllowed(RHICmdList);
|
|
|
|
FGraphicsPipelineState* OutCachedState = nullptr;
|
|
|
|
bool bWasFound = GGraphicsPipelineCache.Find(Initializer, OutCachedState);
|
|
|
|
if (bWasFound == false)
|
|
{
|
|
FPipelineFileCache::CacheGraphicsPSO(GetTypeHash(Initializer), Initializer);
|
|
|
|
// create new graphics state
|
|
OutCachedState = new FGraphicsPipelineState();
|
|
OutCachedState->Stats = FPipelineFileCache::RegisterPSOStats(GetTypeHash(Initializer));
|
|
|
|
if (!Initializer.bFromPSOFileCache)
|
|
{
|
|
GraphicsPipelineCacheMisses++;
|
|
}
|
|
|
|
// create a compilation task, or just do it now...
|
|
if (DoAsyncCompile)
|
|
{
|
|
// Use normal task graph for non-precompile jobs.
|
|
if (!Initializer.bFromPSOFileCache || GPSOPrecompileThreadPoolSize <=0)
|
|
{
|
|
OutCachedState->CompletionEvent = TGraphTask<FCompilePipelineStateTask>::CreateTask().ConstructAndDispatchWhenReady(OutCachedState, Initializer);
|
|
RHICmdList.AddDispatchPrerequisite(OutCachedState->CompletionEvent);
|
|
}
|
|
else
|
|
{
|
|
// Here, PSO precompiles use a separate thread pool.
|
|
// Note that we do not add precompile tasks as cmdlist prerequisites.
|
|
FGraphEventRef CompletedEventGraph = FGraphEvent::CreateGraphEvent();
|
|
TUniquePtr<FCompilePipelineStateTask> ThreadPoolTask = MakeUnique<FCompilePipelineStateTask>(OutCachedState, Initializer);
|
|
OutCachedState->CompletionEvent = CompletedEventGraph;
|
|
uint64 StartTime = FPlatformTime::Cycles64();
|
|
#if PSO_TRACK_CACHE_STATS
|
|
StatsStartPrecompile();
|
|
#endif
|
|
OutCachedState->PrecompileTask = MakeUnique<FAsyncTaskLimitedReschedule<FPSOPrecompileTask>>(
|
|
[ThreadPoolTask = MoveTemp(ThreadPoolTask)]()
|
|
{
|
|
ThreadPoolTask->CompilePSO();
|
|
}
|
|
,
|
|
[CompletedEventGraph, StartTime]()
|
|
{
|
|
CompletedEventGraph->DispatchSubsequents();
|
|
#if PSO_TRACK_CACHE_STATS
|
|
StatsEndPrecompile(FPlatformTime::Cycles64() - StartTime);
|
|
#endif
|
|
}
|
|
);
|
|
OutCachedState->PrecompileTask->StartBackgroundTask(&GPSOThreadPool.Get(), EQueuedWorkPriority::Normal);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
OutCachedState->RHIPipeline = RHICreateGraphicsPipelineState(Initializer);
|
|
if (OutCachedState->RHIPipeline)
|
|
{
|
|
OutCachedState->SortKey = OutCachedState->RHIPipeline->GetSortKey();
|
|
}
|
|
else
|
|
{
|
|
HandlePipelineCreationFailure(Initializer);
|
|
}
|
|
}
|
|
|
|
GGraphicsPipelineCache.Add(Initializer, OutCachedState);
|
|
}
|
|
else
|
|
{
|
|
if (DoAsyncCompile && !Initializer.bFromPSOFileCache && !OutCachedState->IsComplete())
|
|
{
|
|
if (OutCachedState->PrecompileTask)
|
|
{
|
|
// if this is an in-progress threadpool precompile task then it could be seconds away in the queue.
|
|
// Reissue this task so that it jumps the precompile queue.
|
|
OutCachedState->PrecompileTask->Reschedule(&GPSOThreadPool.Get(), EQueuedWorkPriority::Highest);
|
|
#if PSO_TRACK_CACHE_STATS
|
|
UE_LOG(LogRHI, Log, TEXT("An incomplete precompile task was required for rendering!"));
|
|
#endif
|
|
}
|
|
RHICmdList.AddDispatchPrerequisite(OutCachedState->CompletionEvent);
|
|
}
|
|
|
|
#if PSO_TRACK_CACHE_STATS
|
|
OutCachedState->AddHit();
|
|
#endif
|
|
}
|
|
|
|
// return the state pointer
|
|
return OutCachedState;
|
|
}
|
|
|
|
FRHIGraphicsPipelineState* ExecuteSetGraphicsPipelineState(FGraphicsPipelineState* GraphicsPipelineState)
|
|
{
|
|
FRHIGraphicsPipelineState* RHIPipeline = GraphicsPipelineState->RHIPipeline;
|
|
|
|
GraphicsPipelineState->AddUse();
|
|
|
|
#if PIPELINESTATECACHE_VERIFYTHREADSAFE
|
|
int32 Result = GraphicsPipelineState->InUseCount.Decrement();
|
|
check(Result >= 0);
|
|
#endif
|
|
|
|
return RHIPipeline;
|
|
}
|
|
|
|
void DumpPipelineCacheStats()
|
|
{
|
|
#if PSO_TRACK_CACHE_STATS
|
|
double TotalTime = 0.0;
|
|
double MinTime = FLT_MAX;
|
|
double MaxTime = FLT_MIN;
|
|
|
|
int MinFrames = INT_MAX;
|
|
int MaxFrames = INT_MIN;
|
|
int TotalFrames = 0;
|
|
|
|
int NumUsedLastMin = 0;
|
|
int NumHits = 0;
|
|
int NumHitsAcrossFrames = 0;
|
|
int NumItemsMultipleFrameHits = 0;
|
|
|
|
int NumCachedItems = GGraphicsPipelineCache.CurrentMap->Num();
|
|
|
|
if (NumCachedItems == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
for (auto GraphicsPipeLine : *GGraphicsPipelineCache.CurrentMap)
|
|
{
|
|
FGraphicsPipelineState* State = GraphicsPipeLine.Value;
|
|
|
|
// calc timestats
|
|
double SinceUse = FPlatformTime::Seconds() - State->FirstUsedTime;
|
|
|
|
TotalTime += SinceUse;
|
|
|
|
if (SinceUse <= 30.0)
|
|
{
|
|
NumUsedLastMin++;
|
|
}
|
|
|
|
MinTime = FMath::Min(MinTime, SinceUse);
|
|
MaxTime = FMath::Max(MaxTime, SinceUse);
|
|
|
|
// calc frame stats
|
|
int FramesUsed = State->LastFrameUsed - State->FirstFrameUsed;
|
|
TotalFrames += FramesUsed;
|
|
MinFrames = FMath::Min(MinFrames, FramesUsed);
|
|
MaxFrames = FMath::Max(MaxFrames, FramesUsed);
|
|
|
|
NumHits += State->Hits;
|
|
|
|
if (State->HitsAcrossFrames > 0)
|
|
{
|
|
NumHitsAcrossFrames += State->HitsAcrossFrames;
|
|
NumItemsMultipleFrameHits++;
|
|
}
|
|
}
|
|
|
|
UE_LOG(LogRHI, Log, TEXT("Have %d GraphicsPipeline entries"), NumCachedItems);
|
|
UE_LOG(LogRHI, Log, TEXT("Threadpool precompile: %d GraphicsPipeline in flight, %d Jobs started, %d completed"), GPipelinePrecompileTasksInFlight.load(), TotalNumPrecompileJobs.load(), TotalNumPrecompileJobsCompleted.load());
|
|
UE_LOG(LogRHI, Log, TEXT("Threadpool precompile: %f s avg precompile time"), FPlatformTime::GetSecondsPerCycle64() * (TotalPrecompileWaitTime.load()/FMath::Max((int64_t)1, TotalNumPrecompileJobsCompleted.load())));
|
|
|
|
UE_LOG(LogRHI, Log, TEXT("Secs Used: Min=%.02f, Max=%.02f, Avg=%.02f. %d used in last 30 secs"), MinTime, MaxTime, TotalTime / NumCachedItems, NumUsedLastMin);
|
|
UE_LOG(LogRHI, Log, TEXT("Frames Used: Min=%d, Max=%d, Avg=%d"), MinFrames, MaxFrames, TotalFrames / NumCachedItems);
|
|
UE_LOG(LogRHI, Log, TEXT("Hits: Avg=%d, Items with hits across frames=%d, Avg Hits across Frames=%d"), NumHits / NumCachedItems, NumItemsMultipleFrameHits, NumHitsAcrossFrames / NumCachedItems);
|
|
|
|
size_t TrackingMem = sizeof(FGraphicsPipelineStateInitializer) * GGraphicsPipelineCache.CurrentMap->Num();
|
|
UE_LOG(LogRHI, Log, TEXT("Tracking Mem: %d kb"), TrackingMem / 1024);
|
|
#else
|
|
UE_LOG(LogRHI, Error, TEXT("DEfine PSO_TRACK_CACHE_STATS for state and stats!"));
|
|
#endif // PSO_VALIDATE_CACHE
|
|
}
|
|
|
|
/** Global cache of vertex declarations. Note we don't store TRefCountPtrs, instead we AddRef() manually. */
|
|
static TMap<uint32, FRHIVertexDeclaration*> GVertexDeclarationCache;
|
|
static FCriticalSection GVertexDeclarationLock;
|
|
|
|
void PipelineStateCache::Shutdown()
|
|
{
|
|
GGraphicsPipelineCache.WaitTasksComplete();
|
|
#if RHI_RAYTRACING
|
|
GRayTracingPipelineCache.Shutdown();
|
|
#endif
|
|
|
|
// call discard twice to clear both the backing and main caches
|
|
for (int i = 0; i < 2; i++)
|
|
{
|
|
GComputePipelineCache.Discard([](FComputePipelineState* CacheItem)
|
|
{
|
|
if(CacheItem != nullptr)
|
|
{
|
|
CacheItem->WaitCompletion();
|
|
delete CacheItem;
|
|
}
|
|
});
|
|
|
|
GGraphicsPipelineCache.DiscardAndSwap();
|
|
}
|
|
FPipelineFileCache::Shutdown();
|
|
|
|
for (auto Pair : GVertexDeclarationCache)
|
|
{
|
|
Pair.Value->Release();
|
|
}
|
|
GVertexDeclarationCache.Empty();
|
|
}
|
|
|
|
FRHIVertexDeclaration* PipelineStateCache::GetOrCreateVertexDeclaration(const FVertexDeclarationElementList& Elements)
|
|
{
|
|
// Actual locking/contention time should be close to unmeasurable
|
|
FScopeLock ScopeLock(&GVertexDeclarationLock);
|
|
uint32 Key = FCrc::MemCrc_DEPRECATED(Elements.GetData(), Elements.Num() * sizeof(FVertexElement));
|
|
FRHIVertexDeclaration** Found = GVertexDeclarationCache.Find(Key);
|
|
if (Found)
|
|
{
|
|
return *Found;
|
|
}
|
|
|
|
FVertexDeclarationRHIRef NewDeclaration = RHICreateVertexDeclaration(Elements);
|
|
|
|
// Add an extra reference so we don't have TRefCountPtr in the maps
|
|
NewDeclaration->AddRef();
|
|
GVertexDeclarationCache.Add(Key, NewDeclaration);
|
|
return NewDeclaration;
|
|
}
|