// Copyright Epic Games, Inc. All Rights Reserved. /*============================================================================= MeshDrawCommandSetup.cpp: Mesh draw command setup. =============================================================================*/ #include "MeshDrawCommands.h" #include "RendererModule.h" #include "ScenePrivate.h" #include "TranslucentRendering.h" #include "InstanceCulling/InstanceCullingManager.h" TGlobalResource GPrimitiveIdVertexBufferPool; static TAutoConsoleVariable CVarMeshDrawCommandsParallelPassSetup( TEXT("r.MeshDrawCommands.ParallelPassSetup"), 1, TEXT("Whether to setup mesh draw command pass in parallel."), ECVF_RenderThreadSafe); static TAutoConsoleVariable CVarMobileMeshSortingMethod( TEXT("r.Mobile.MeshSortingMethod"), 0, TEXT("How to sort mesh commands on mobile:\n") TEXT("\t0: Sort by state, roughly front to back (Default).\n") TEXT("\t1: Strict front to back sorting.\n"), ECVF_RenderThreadSafe); static int32 GAllowOnDemandShaderCreation = 1; static FAutoConsoleVariableRef CVarAllowOnDemandShaderCreation( TEXT("r.MeshDrawCommands.AllowOnDemandShaderCreation"), GAllowOnDemandShaderCreation, TEXT("How to create RHI shaders:\n") TEXT("\t0: Always create them on a Rendering Thread, before executing other MDC tasks.\n") TEXT("\t1: If RHI supports multi-threaded shader creation, create them on demand on tasks threads, at the time of submitting the draws.\n"), ECVF_RenderThreadSafe); FPrimitiveIdVertexBufferPool::FPrimitiveIdVertexBufferPool() : DiscardId(0) { } FPrimitiveIdVertexBufferPool::~FPrimitiveIdVertexBufferPool() { check(!Entries.Num()); } FPrimitiveIdVertexBufferPoolEntry FPrimitiveIdVertexBufferPool::Allocate(int32 BufferSize) { check(IsInRenderingThread()); FScopeLock Lock(&AllocationCS); BufferSize = Align(BufferSize, 1024); // First look for a smallest unused one. int32 BestFitBufferIndex = -1; for (int32 Index = 0; Index < Entries.Num(); ++Index) { // Unused and fits? if (Entries[Index].LastDiscardId != DiscardId && Entries[Index].BufferSize >= BufferSize) { // Is it a bet fit than current BestFitBufferIndex? if (BestFitBufferIndex == -1 || Entries[Index].BufferSize < Entries[BestFitBufferIndex].BufferSize) { BestFitBufferIndex = Index; if (Entries[BestFitBufferIndex].BufferSize == BufferSize) { break; } } } } if (BestFitBufferIndex >= 0) { // Reuse existing buffer. FPrimitiveIdVertexBufferPoolEntry ReusedEntry = MoveTemp(Entries[BestFitBufferIndex]); ReusedEntry.LastDiscardId = DiscardId; Entries.RemoveAt(BestFitBufferIndex); return ReusedEntry; } else { // Allocate new one. FPrimitiveIdVertexBufferPoolEntry NewEntry; NewEntry.LastDiscardId = DiscardId; NewEntry.BufferSize = BufferSize; FRHIResourceCreateInfo CreateInfo(TEXT("FPrimitiveIdVertexBufferPool")); NewEntry.BufferRHI = RHICreateVertexBuffer(NewEntry.BufferSize, BUF_Volatile, CreateInfo); return NewEntry; } } void FPrimitiveIdVertexBufferPool::ReturnToFreeList(FPrimitiveIdVertexBufferPoolEntry Entry) { // Entries can be returned from RHIT or RT, depending on if FParallelMeshDrawCommandPass::DispatchDraw() takes the parallel path FScopeLock Lock(&AllocationCS); Entries.Add(MoveTemp(Entry)); } void FPrimitiveIdVertexBufferPool::DiscardAll() { FScopeLock Lock(&AllocationCS); ++DiscardId; // Remove old unused pool entries. for (int32 Index = 0; Index < Entries.Num();) { if (DiscardId - Entries[Index].LastDiscardId > 1000u) { Entries.RemoveAtSwap(Index); } else { ++Index; } } } void FPrimitiveIdVertexBufferPool::ReleaseDynamicRHI() { Entries.Empty(); } uint32 BitInvertIfNegativeFloat(uint32 f) { unsigned mask = -int32(f >> 31) | 0x80000000; return f ^ mask; } /** * Update mesh sort keys with view dependent data. */ void UpdateTranslucentMeshSortKeys( ETranslucentSortPolicy::Type TranslucentSortPolicy, const FVector& TranslucentSortAxis, const FVector& ViewOrigin, const FMatrix& ViewMatrix, const TArray& PrimitiveBounds, ETranslucencyPass::Type TranslucencyPass, bool bInverseSorting, FMeshCommandOneFrameArray& VisibleMeshCommands ) { QUICK_SCOPE_CYCLE_COUNTER(STAT_UpdateTranslucentMeshSortKeys); for (int32 CommandIndex = 0; CommandIndex < VisibleMeshCommands.Num(); ++CommandIndex) { FVisibleMeshDrawCommand& VisibleCommand = VisibleMeshCommands[CommandIndex]; const int32 PrimitiveIndex = VisibleCommand.PrimitiveIdInfo.ScenePrimitiveId; const FVector BoundsOrigin = PrimitiveIndex >= 0 ? PrimitiveBounds[PrimitiveIndex].BoxSphereBounds.Origin : FVector::ZeroVector; float Distance = 0.0f; if (TranslucentSortPolicy == ETranslucentSortPolicy::SortByDistance) { //sort based on distance to the view position, view rotation is not a factor Distance = (BoundsOrigin - ViewOrigin).Size(); } else if (TranslucentSortPolicy == ETranslucentSortPolicy::SortAlongAxis) { // Sort based on enforced orthogonal distance const FVector CameraToObject = BoundsOrigin - ViewOrigin; Distance = FVector::DotProduct(CameraToObject, TranslucentSortAxis); } else { // Sort based on projected Z distance check(TranslucentSortPolicy == ETranslucentSortPolicy::SortByProjectedZ); Distance = ViewMatrix.TransformPosition(BoundsOrigin).Z; } // Apply distance offset from the primitive const uint32 PackedOffset = VisibleCommand.SortKey.Translucent.Distance; const float DistanceOffset = *((float*)&PackedOffset); Distance += DistanceOffset; // Sort front-to-back instead of back-to-front if (bInverseSorting) { const float MaxSortingDistance = 1000000.f; // 100km, arbitrary Distance = MaxSortingDistance - Distance; } // Patch distance inside translucent mesh sort key. FMeshDrawCommandSortKey SortKey; SortKey.PackedData = VisibleCommand.SortKey.PackedData; SortKey.Translucent.Distance = (uint32)~BitInvertIfNegativeFloat(*((uint32*)&Distance)); VisibleCommand.SortKey.PackedData = SortKey.PackedData; } } /** * Merge mobile BasePass with BasePassCSM based on CSM visibility in order to select appropriate shader for given command. */ void MergeMobileBasePassMeshDrawCommands( const FMobileCSMVisibilityInfo& MobileCSMVisibilityInfo, int32 ScenePrimitiveNum, FMeshCommandOneFrameArray& MeshCommands, FMeshCommandOneFrameArray& MeshCommandsCSM ) { if (MobileCSMVisibilityInfo.bMobileDynamicCSMInUse) { // determine per view CSM visibility if (MobileCSMVisibilityInfo.bAlwaysUseCSM) { #if DO_CHECK check(!MeshCommandsCSM.Num() || MeshCommands.Num() == MeshCommandsCSM.Num()); for (int32 Index = 0; Index < MeshCommandsCSM.Num(); ++Index) { check(MeshCommands[Index].PrimitiveIdInfo.ScenePrimitiveId == MeshCommandsCSM[Index].PrimitiveIdInfo.ScenePrimitiveId); } #endif if (MeshCommandsCSM.Num() > 0) { MeshCommands = MoveTemp(MeshCommandsCSM); } } else { checkf(MeshCommands.Num() == MeshCommandsCSM.Num(), TEXT("VisibleMeshDrawCommands of BasePass and MobileBasePassCSM are expected to match.")); for (int32 i = MeshCommands.Num() - 1; i >= 0; --i) { FVisibleMeshDrawCommand& MeshCommand = MeshCommands[i]; FVisibleMeshDrawCommand& MeshCommandCSM = MeshCommandsCSM[i]; if (MeshCommand.PrimitiveIdInfo.ScenePrimitiveId < ScenePrimitiveNum && MobileCSMVisibilityInfo.MobilePrimitiveCSMReceiverVisibilityMap[MeshCommand.PrimitiveIdInfo.ScenePrimitiveId]) { checkf(MeshCommand.PrimitiveIdInfo.ScenePrimitiveId == MeshCommandCSM.PrimitiveIdInfo.ScenePrimitiveId, TEXT("VisibleMeshDrawCommands of BasePass and MobileBasePassCSM are expected to match.")); // Use CSM's VisibleMeshDrawCommand. MeshCommand = MeshCommandCSM; } } MeshCommandsCSM.Reset(); } } } static uint64 GetMobileBasePassSortKey_FrontToBack(bool bMasked, bool bBackground, uint32 PipelineId, int32 StateBucketId, float PrimitiveDistance) { union { uint64 PackedInt; struct { uint64 StateBucketId : 25; // Order by state bucket uint64 PipelineId : 22; // Order by PSO uint64 DepthBits : 15; // Order by primitive depth uint64 Background : 1; // Non-background meshes first uint64 Masked : 1; // Non-masked first } Fields; } Key; union FFloatToInt { float F; uint32 I; }; FFloatToInt F2I; Key.Fields.Masked = bMasked; Key.Fields.Background = bBackground; F2I.F = PrimitiveDistance; Key.Fields.DepthBits = ((-int32(F2I.I >> 31) | 0x80000000) ^ F2I.I) >> 17; Key.Fields.PipelineId = PipelineId; Key.Fields.StateBucketId = StateBucketId; return Key.PackedInt; } static uint32 DepthBuckets_4(float Distance) { union FFloatToInt { float F; uint32 I; }; FFloatToInt F2I; F2I.F = Distance; // discard distance shorter than 64 int32 Exp = ((F2I.I >> 23u) & 0xff) - (127 + 6); // 16 buckets, ranging from 64 to 2km return (uint32)FMath::Clamp(Exp, 0, 15); } static uint64 GetMobileBasePassSortKey_ByState(bool bMasked, bool bBackground, uint32 PipelineId, uint32 StateBucketId, float PipelineDistance, float StateBucketDistance, float PrimitiveDistance) { // maximum primitive distance for bucketing, aprox 10.5km constexpr float PrimitiveMaxDepth = (1 << 20); constexpr float PrimitiveDepthQuantization = ((1 << 12) - 1); union { uint64 PackedInt; struct { uint64 DepthBits : 12; // Order by primitive depth uint64 StateBucketId : 20; // Order by state bucket uint64 StateBucketDepthBits : 4;// Order state buckets front to back uint64 PipelineId : 22; // Order by PSO uint64 PipelineDepthBits : 4; // Order PSOs front to back uint64 Background : 1; // Non-background meshes first uint64 Masked : 1; // Non-masked first } Fields; } Key; Key.PackedInt = 0; Key.Fields.Masked = bMasked; Key.Fields.Background = bBackground; Key.Fields.PipelineDepthBits = DepthBuckets_4(PipelineDistance); Key.Fields.PipelineId = PipelineId; Key.Fields.StateBucketDepthBits = DepthBuckets_4(StateBucketDistance); Key.Fields.StateBucketId = StateBucketId; Key.Fields.DepthBits = uint32((FMath::Min(PrimitiveDistance, PrimitiveMaxDepth) / PrimitiveMaxDepth) * PrimitiveDepthQuantization); return Key.PackedInt; } /** * Compute mesh sort keys for the mobile base pass */ void UpdateMobileBasePassMeshSortKeys( const FVector& ViewOrigin, const TArray& ScenePrimitiveBounds, FMeshCommandOneFrameArray& VisibleMeshCommands ) { QUICK_SCOPE_CYCLE_COUNTER(STAT_UpdateMobileBasePassMeshSortKeys); // Object radius past which we treat object as part of 'background' constexpr float MIN_BACKGROUND_OBJECT_RADIUS = 500000; int32 NumCmds = VisibleMeshCommands.Num(); int32 MeshSortingMethod = CVarMobileMeshSortingMethod.GetValueOnAnyThread(); if (MeshSortingMethod == 1) //strict front to back sorting { // compute sort key for each mesh command for (int32 CmdIdx = 0; CmdIdx < NumCmds; ++CmdIdx) { FVisibleMeshDrawCommand& Cmd = VisibleMeshCommands[CmdIdx]; // Set in MobileBasePass.cpp - GetBasePassStaticSortKey; bool bMasked = Cmd.SortKey.PackedData & 0x1 ? true : false; bool bBackground = Cmd.SortKey.PackedData & 0x2 ? true : false; float PrimitiveDistance = 0; if (Cmd.PrimitiveIdInfo.ScenePrimitiveId < ScenePrimitiveBounds.Num()) { const FPrimitiveBounds& PrimitiveBounds = ScenePrimitiveBounds[Cmd.PrimitiveIdInfo.ScenePrimitiveId]; PrimitiveDistance = (PrimitiveBounds.BoxSphereBounds.Origin - ViewOrigin).Size(); bBackground |= (PrimitiveBounds.BoxSphereBounds.SphereRadius > MIN_BACKGROUND_OBJECT_RADIUS); } uint32 PipelineId = Cmd.MeshDrawCommand->CachedPipelineId.GetId(); // use state bucket if dynamic instancing is enabled, otherwise identify same meshes by index buffer resource uint32 StateBucketId = Cmd.StateBucketId >= 0 ? Cmd.StateBucketId : PointerHash(Cmd.MeshDrawCommand->IndexBuffer); Cmd.SortKey.PackedData = GetMobileBasePassSortKey_FrontToBack(bMasked, bBackground, PipelineId, StateBucketId, PrimitiveDistance); } } else { struct FPipelineDistance { double DistanceSum = 0.0; int32 Num = 0; }; TMap PipelineDistances; PipelineDistances.Reserve(256); TMap StateBucketDistances; StateBucketDistances.Reserve(512); // pre-compute a distance to a group of meshes that share same PSO and state for (int32 CmdIdx = 0; CmdIdx < NumCmds; ++CmdIdx) { const FVisibleMeshDrawCommand& Cmd = VisibleMeshCommands[CmdIdx]; float PrimitiveDistance = 0.f; if (Cmd.PrimitiveIdInfo.ScenePrimitiveId < ScenePrimitiveBounds.Num()) { const FPrimitiveBounds& PrimitiveBounds = ScenePrimitiveBounds[Cmd.PrimitiveIdInfo.ScenePrimitiveId]; PrimitiveDistance = (PrimitiveBounds.BoxSphereBounds.Origin - ViewOrigin).Size(); } // group meshes by PSO and find distance to each group uint32 PipelineId = Cmd.MeshDrawCommand->CachedPipelineId.GetId(); FPipelineDistance& PipelineDistance = PipelineDistances.FindOrAdd(PipelineId); PipelineDistance.DistanceSum += PrimitiveDistance; PipelineDistance.Num += 1; // group meshes by StateBucketId or index buffer resource and find minimum distance to each group uint32 StateBucketId = Cmd.StateBucketId >= 0 ? Cmd.StateBucketId : PointerHash(Cmd.MeshDrawCommand->IndexBuffer); uint64 PipelineAndStateBucketId = ((uint64)PipelineId << 32u) | StateBucketId; float& StateBucketDistance = StateBucketDistances.FindOrAdd(PipelineAndStateBucketId, BIG_NUMBER); StateBucketDistance = FMath::Min(StateBucketDistance, PrimitiveDistance); } // compute sort key for each mesh command for (int32 CmdIdx = 0; CmdIdx < NumCmds; ++CmdIdx) { FVisibleMeshDrawCommand& Cmd = VisibleMeshCommands[CmdIdx]; // Set in MobileBasePass.cpp - GetBasePassStaticSortKey; bool bMasked = Cmd.SortKey.PackedData & 0x1 ? true : false; bool bBackground = Cmd.SortKey.PackedData & 0x2 ? true : false; float PrimitiveDistance = 0; if (Cmd.PrimitiveIdInfo.ScenePrimitiveId < ScenePrimitiveBounds.Num()) { const FPrimitiveBounds& PrimitiveBounds = ScenePrimitiveBounds[Cmd.PrimitiveIdInfo.ScenePrimitiveId]; PrimitiveDistance = (PrimitiveBounds.BoxSphereBounds.Origin - ViewOrigin).Size(); bBackground |= (PrimitiveBounds.BoxSphereBounds.SphereRadius > MIN_BACKGROUND_OBJECT_RADIUS); } uint32 PipelineId = Cmd.MeshDrawCommand->CachedPipelineId.GetId(); FPipelineDistance PipelineDistance = PipelineDistances.FindRef(PipelineId); float MeanPipelineDistance = PipelineDistance.DistanceSum / PipelineDistance.Num; uint32 StateBucketId = Cmd.StateBucketId >= 0 ? Cmd.StateBucketId : PointerHash(Cmd.MeshDrawCommand->IndexBuffer); uint64 PipelineAndStateBucketId = ((uint64)PipelineId << 32u) | StateBucketId; float StateBucketDistance = StateBucketDistances.FindRef(PipelineAndStateBucketId); Cmd.SortKey.PackedData = GetMobileBasePassSortKey_ByState(bMasked, bBackground, PipelineId, StateBucketId, MeanPipelineDistance, StateBucketDistance, PrimitiveDistance); } } } FORCEINLINE int32 TranslatePrimitiveId(int32 DrawPrimitiveIdIn, int32 DynamicPrimitiveIdOffset, int32 DynamicPrimitiveIdMax) { // INDEX_NONE means we defer the translation to later if (DynamicPrimitiveIdOffset == INDEX_NONE) { return DrawPrimitiveIdIn; } int32 DrawPrimitiveId = DrawPrimitiveIdIn; if ((DrawPrimitiveIdIn & GPrimIDDynamicFlag) != 0) { int32 DynamicPrimitiveIndex = DrawPrimitiveIdIn & (~GPrimIDDynamicFlag); DrawPrimitiveId = DynamicPrimitiveIdOffset + DynamicPrimitiveIndex; checkSlow(DrawPrimitiveId < DynamicPrimitiveIdMax); } // Append flag to mark this as a non-instance data index. // This value is treated as a primitive ID in the SceneData.ush loading. const uint32 VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG = 1U << 31U; return DrawPrimitiveId |= VF_TREAT_INSTANCE_ID_OFFSET_AS_PRIMITIVE_ID_FLAG; } /** * Build mesh draw command primitive Id buffer for instancing. * TempVisibleMeshDrawCommands must be presized for NewPassVisibleMeshDrawCommands. */ static void BuildMeshDrawCommandPrimitiveIdBuffer( bool bDynamicInstancing, FMeshCommandOneFrameArray& VisibleMeshDrawCommands, FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage, FMeshCommandOneFrameArray& TempVisibleMeshDrawCommands, int32& MaxInstances, int32& VisibleMeshDrawCommandsNum, int32& NewPassVisibleMeshDrawCommandsNum, EShaderPlatform ShaderPlatform, uint32 InstanceFactor, TFunctionRef WritePrimitiveDataFn) { QUICK_SCOPE_CYCLE_COUNTER(STAT_BuildMeshDrawCommandPrimitiveIdBuffer); const FVisibleMeshDrawCommand* RESTRICT PassVisibleMeshDrawCommands = VisibleMeshDrawCommands.GetData(); const int32 NumDrawCommands = VisibleMeshDrawCommands.Num(); uint32 PrimitiveIdIndex = 0; if (bDynamicInstancing) { QUICK_SCOPE_CYCLE_COUNTER(STAT_DynamicInstancingOfVisibleMeshDrawCommands); check(VisibleMeshDrawCommands.Num() <= TempVisibleMeshDrawCommands.Max() && TempVisibleMeshDrawCommands.Num() == 0); int32 CurrentStateBucketId = -1; uint32* RESTRICT CurrentDynamicallyInstancedMeshCommandNumInstances = nullptr; MaxInstances = 1; for (int32 DrawCommandIndex = 0; DrawCommandIndex < NumDrawCommands; DrawCommandIndex++) { const FVisibleMeshDrawCommand& RESTRICT VisibleMeshDrawCommand = PassVisibleMeshDrawCommands[DrawCommandIndex]; if (VisibleMeshDrawCommand.StateBucketId == CurrentStateBucketId && VisibleMeshDrawCommand.StateBucketId != -1) { if (CurrentDynamicallyInstancedMeshCommandNumInstances) { const int32 CurrentNumInstances = *CurrentDynamicallyInstancedMeshCommandNumInstances; *CurrentDynamicallyInstancedMeshCommandNumInstances = CurrentNumInstances + 1; MaxInstances = FMath::Max(MaxInstances, CurrentNumInstances + 1); } else { FVisibleMeshDrawCommand NewVisibleMeshDrawCommand = VisibleMeshDrawCommand; NewVisibleMeshDrawCommand.PrimitiveIdBufferOffset = PrimitiveIdIndex; TempVisibleMeshDrawCommands.Emplace(MoveTemp(NewVisibleMeshDrawCommand)); } } else { // First time state bucket setup CurrentStateBucketId = VisibleMeshDrawCommand.StateBucketId; if (VisibleMeshDrawCommand.StateBucketId != INDEX_NONE && VisibleMeshDrawCommand.MeshDrawCommand->PrimitiveIdStreamIndex >= 0 && VisibleMeshDrawCommand.MeshDrawCommand->NumInstances == 1 // Don't create a new FMeshDrawCommand for the last command and make it safe for us to look at the next command && DrawCommandIndex + 1 < NumDrawCommands // Only create a new FMeshDrawCommand if more than one draw in the state bucket && CurrentStateBucketId == PassVisibleMeshDrawCommands[DrawCommandIndex + 1].StateBucketId) { const int32 Index = MeshDrawCommandStorage.MeshDrawCommands.AddElement(*VisibleMeshDrawCommand.MeshDrawCommand); FMeshDrawCommand& NewCommand = MeshDrawCommandStorage.MeshDrawCommands[Index]; FVisibleMeshDrawCommand NewVisibleMeshDrawCommand; NewVisibleMeshDrawCommand.Setup( &NewCommand, VisibleMeshDrawCommand.PrimitiveIdInfo, VisibleMeshDrawCommand.StateBucketId, VisibleMeshDrawCommand.MeshFillMode, VisibleMeshDrawCommand.MeshCullMode, VisibleMeshDrawCommand.Flags, VisibleMeshDrawCommand.SortKey, VisibleMeshDrawCommand.RunArray, VisibleMeshDrawCommand.NumRuns); NewVisibleMeshDrawCommand.PrimitiveIdBufferOffset = PrimitiveIdIndex; TempVisibleMeshDrawCommands.Emplace(MoveTemp(NewVisibleMeshDrawCommand)); CurrentDynamicallyInstancedMeshCommandNumInstances = &NewCommand.NumInstances; } else { CurrentDynamicallyInstancedMeshCommandNumInstances = nullptr; FVisibleMeshDrawCommand NewVisibleMeshDrawCommand = VisibleMeshDrawCommand; NewVisibleMeshDrawCommand.PrimitiveIdBufferOffset = PrimitiveIdIndex; TempVisibleMeshDrawCommands.Emplace(MoveTemp(NewVisibleMeshDrawCommand)); } } //@todo - refactor into instance step rate in the RHI for (uint32 InstanceFactorIndex = 0; InstanceFactorIndex < InstanceFactor; InstanceFactorIndex++, PrimitiveIdIndex++) { WritePrimitiveDataFn(PrimitiveIdIndex, VisibleMeshDrawCommand.PrimitiveIdInfo.DrawPrimitiveId); } } // Setup instancing stats for logging. VisibleMeshDrawCommandsNum = VisibleMeshDrawCommands.Num(); NewPassVisibleMeshDrawCommandsNum = TempVisibleMeshDrawCommands.Num(); // Replace VisibleMeshDrawCommands FMemory::Memswap(&VisibleMeshDrawCommands, &TempVisibleMeshDrawCommands, sizeof(TempVisibleMeshDrawCommands)); TempVisibleMeshDrawCommands.Reset(); } else { QUICK_SCOPE_CYCLE_COUNTER(STAT_BuildVisibleMeshDrawCommandPrimitiveIdBuffers); for (int32 DrawCommandIndex = 0; DrawCommandIndex < NumDrawCommands; DrawCommandIndex++) { const FVisibleMeshDrawCommand& VisibleMeshDrawCommand = VisibleMeshDrawCommands[DrawCommandIndex]; for (uint32 InstanceFactorIndex = 0; InstanceFactorIndex < InstanceFactor; InstanceFactorIndex++, PrimitiveIdIndex++) { WritePrimitiveDataFn(PrimitiveIdIndex, VisibleMeshDrawCommand.PrimitiveIdInfo.DrawPrimitiveId); } } } } /** * Converts each FMeshBatch into a set of FMeshDrawCommands for a specific mesh pass type. */ void GenerateDynamicMeshDrawCommands( const FViewInfo& View, EShadingPath ShadingPath, EMeshPass::Type PassType, FMeshPassProcessor* PassMeshProcessor, const TArray& DynamicMeshElements, const TArray* DynamicMeshElementsPassRelevance, int32 MaxNumDynamicMeshElements, const TArray& DynamicMeshCommandBuildRequests, int32 MaxNumBuildRequestElements, FMeshCommandOneFrameArray& VisibleCommands, FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage, FGraphicsMinimalPipelineStateSet& MinimalPipelineStatePassSet, bool& NeedsShaderInitialisation ) { QUICK_SCOPE_CYCLE_COUNTER(STAT_GenerateDynamicMeshDrawCommands); check(PassMeshProcessor); check((PassType == EMeshPass::Num) == (DynamicMeshElementsPassRelevance == nullptr)); FDynamicPassMeshDrawListContext DynamicPassMeshDrawListContext( MeshDrawCommandStorage, VisibleCommands, MinimalPipelineStatePassSet, NeedsShaderInitialisation ); PassMeshProcessor->SetDrawListContext(&DynamicPassMeshDrawListContext); { const int32 NumCommandsBefore = VisibleCommands.Num(); const int32 NumDynamicMeshBatches = DynamicMeshElements.Num(); for (int32 MeshIndex = 0; MeshIndex < NumDynamicMeshBatches; MeshIndex++) { if (!DynamicMeshElementsPassRelevance || (*DynamicMeshElementsPassRelevance)[MeshIndex].Get(PassType)) { const FMeshBatchAndRelevance& MeshAndRelevance = DynamicMeshElements[MeshIndex]; const uint64 BatchElementMask = ~0ull; PassMeshProcessor->AddMeshBatch(*MeshAndRelevance.Mesh, BatchElementMask, MeshAndRelevance.PrimitiveSceneProxy); } } const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore; checkf(NumCommandsGenerated <= MaxNumDynamicMeshElements, TEXT("Generated %d mesh draw commands for DynamicMeshElements, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumDynamicMeshElements); } { const int32 NumCommandsBefore = VisibleCommands.Num(); const int32 NumStaticMeshBatches = DynamicMeshCommandBuildRequests.Num(); for (int32 MeshIndex = 0; MeshIndex < NumStaticMeshBatches; MeshIndex++) { const FStaticMeshBatch* StaticMeshBatch = DynamicMeshCommandBuildRequests[MeshIndex]; const uint64 DefaultBatchElementMask = ~0ul; PassMeshProcessor->AddMeshBatch(*StaticMeshBatch, DefaultBatchElementMask, StaticMeshBatch->PrimitiveSceneInfo->Proxy, StaticMeshBatch->Id); } const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore; checkf(NumCommandsGenerated <= MaxNumBuildRequestElements, TEXT("Generated %d mesh draw commands for DynamicMeshCommandBuildRequests, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumBuildRequestElements); } } /** * Special version of GenerateDynamicMeshDrawCommands for the mobile base pass. * Based on CSM visibility it will generate mesh draw commands using either normal base pass processor or CSM base pass processor. */ void GenerateMobileBasePassDynamicMeshDrawCommands( const FViewInfo& View, EShadingPath ShadingPath, EMeshPass::Type PassType, FMeshPassProcessor* PassMeshProcessor, FMeshPassProcessor* MobilePassCSMPassMeshProcessor, const TArray& DynamicMeshElements, const TArray* DynamicMeshElementsPassRelevance, int32 MaxNumDynamicMeshElements, const TArray& DynamicMeshCommandBuildRequests, int32 MaxNumBuildRequestElements, FMeshCommandOneFrameArray& VisibleCommands, FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage, FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet, bool& NeedsShaderInitialisation ) { QUICK_SCOPE_CYCLE_COUNTER(STAT_GenerateMobileBasePassDynamicMeshDrawCommands); check(PassMeshProcessor && MobilePassCSMPassMeshProcessor); check((PassType == EMeshPass::Num) == (DynamicMeshElementsPassRelevance == nullptr)); FDynamicPassMeshDrawListContext DynamicPassMeshDrawListContext( MeshDrawCommandStorage, VisibleCommands, GraphicsMinimalPipelineStateSet, NeedsShaderInitialisation ); PassMeshProcessor->SetDrawListContext(&DynamicPassMeshDrawListContext); MobilePassCSMPassMeshProcessor->SetDrawListContext(&DynamicPassMeshDrawListContext); const FMobileCSMVisibilityInfo& MobileCSMVisibilityInfo = View.MobileCSMVisibilityInfo; { const int32 NumCommandsBefore = VisibleCommands.Num(); const int32 NumDynamicMeshBatches = DynamicMeshElements.Num(); for (int32 MeshIndex = 0; MeshIndex < NumDynamicMeshBatches; MeshIndex++) { if (!DynamicMeshElementsPassRelevance || (*DynamicMeshElementsPassRelevance)[MeshIndex].Get(PassType)) { const FMeshBatchAndRelevance& MeshAndRelevance = DynamicMeshElements[MeshIndex]; const uint64 BatchElementMask = ~0ull; const int32 PrimitiveIndex = MeshAndRelevance.PrimitiveSceneProxy->GetPrimitiveSceneInfo()->GetIndex(); if (MobileCSMVisibilityInfo.bMobileDynamicCSMInUse && (MobileCSMVisibilityInfo.bAlwaysUseCSM || MobileCSMVisibilityInfo.MobilePrimitiveCSMReceiverVisibilityMap[PrimitiveIndex])) { MobilePassCSMPassMeshProcessor->AddMeshBatch(*MeshAndRelevance.Mesh, BatchElementMask, MeshAndRelevance.PrimitiveSceneProxy); } else { PassMeshProcessor->AddMeshBatch(*MeshAndRelevance.Mesh, BatchElementMask, MeshAndRelevance.PrimitiveSceneProxy); } } } const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore; checkf(NumCommandsGenerated <= MaxNumDynamicMeshElements, TEXT("Generated %d mesh draw commands for DynamicMeshElements, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumDynamicMeshElements); } { const int32 NumCommandsBefore = VisibleCommands.Num(); const int32 NumStaticMeshBatches = DynamicMeshCommandBuildRequests.Num(); for (int32 MeshIndex = 0; MeshIndex < NumStaticMeshBatches; MeshIndex++) { const FStaticMeshBatch* StaticMeshBatch = DynamicMeshCommandBuildRequests[MeshIndex]; const int32 PrimitiveIndex = StaticMeshBatch->PrimitiveSceneInfo->Proxy->GetPrimitiveSceneInfo()->GetIndex(); if (MobileCSMVisibilityInfo.bMobileDynamicCSMInUse && (MobileCSMVisibilityInfo.bAlwaysUseCSM || MobileCSMVisibilityInfo.MobilePrimitiveCSMReceiverVisibilityMap[PrimitiveIndex])) { const uint64 DefaultBatchElementMask = ~0ul; MobilePassCSMPassMeshProcessor->AddMeshBatch(*StaticMeshBatch, DefaultBatchElementMask, StaticMeshBatch->PrimitiveSceneInfo->Proxy, StaticMeshBatch->Id); } else { const uint64 DefaultBatchElementMask = ~0ul; PassMeshProcessor->AddMeshBatch(*StaticMeshBatch, DefaultBatchElementMask, StaticMeshBatch->PrimitiveSceneInfo->Proxy, StaticMeshBatch->Id); } } const int32 NumCommandsGenerated = VisibleCommands.Num() - NumCommandsBefore; checkf(NumCommandsGenerated <= MaxNumBuildRequestElements, TEXT("Generated %d mesh draw commands for DynamicMeshCommandBuildRequests, while preallocating resources only for %d of them."), NumCommandsGenerated, MaxNumBuildRequestElements); } } /** * Apply view overrides to existing mesh draw commands (e.g. reverse culling mode for rendering planar reflections). * TempVisibleMeshDrawCommands must be presized for NewPassVisibleMeshDrawCommands. */ void ApplyViewOverridesToMeshDrawCommands( EShadingPath ShadingPath, EMeshPass::Type PassType, bool bReverseCulling, bool bRenderSceneTwoSided, FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FExclusiveDepthStencil::Type DefaultBasePassDepthStencilAccess, FMeshCommandOneFrameArray& VisibleMeshDrawCommands, FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage, FGraphicsMinimalPipelineStateSet& MinimalPipelineStatePassSet, bool& NeedsShaderInitialisation, FMeshCommandOneFrameArray& TempVisibleMeshDrawCommands ) { QUICK_SCOPE_CYCLE_COUNTER(STAT_ApplyViewOverridesToMeshDrawCommands); check(VisibleMeshDrawCommands.Num() <= TempVisibleMeshDrawCommands.Max() && TempVisibleMeshDrawCommands.Num() == 0 && PassType != EMeshPass::Num); if ((FPassProcessorManager::GetPassFlags(ShadingPath, PassType) & EMeshPassFlags::MainView) != EMeshPassFlags::None) { if (bReverseCulling || bRenderSceneTwoSided || (BasePassDepthStencilAccess != DefaultBasePassDepthStencilAccess && PassType == EMeshPass::BasePass)) { for (int32 MeshCommandIndex = 0; MeshCommandIndex < VisibleMeshDrawCommands.Num(); MeshCommandIndex++) { MeshDrawCommandStorage.MeshDrawCommands.Add(1); FMeshDrawCommand& NewMeshCommand = MeshDrawCommandStorage.MeshDrawCommands[MeshDrawCommandStorage.MeshDrawCommands.Num() - 1]; const FVisibleMeshDrawCommand& VisibleMeshDrawCommand = VisibleMeshDrawCommands[MeshCommandIndex]; const FMeshDrawCommand& MeshCommand = *VisibleMeshDrawCommand.MeshDrawCommand; NewMeshCommand = MeshCommand; const ERasterizerCullMode LocalCullMode = bRenderSceneTwoSided ? CM_None : bReverseCulling ? FMeshPassProcessor::InverseCullMode(VisibleMeshDrawCommand.MeshCullMode) : VisibleMeshDrawCommand.MeshCullMode; FGraphicsMinimalPipelineStateInitializer PipelineState = MeshCommand.CachedPipelineId.GetPipelineState(MinimalPipelineStatePassSet); PipelineState.RasterizerState = GetStaticRasterizerState(VisibleMeshDrawCommand.MeshFillMode, LocalCullMode); if (BasePassDepthStencilAccess != DefaultBasePassDepthStencilAccess && PassType == EMeshPass::BasePass) { FMeshPassProcessorRenderState PassDrawRenderState; SetupBasePassState(BasePassDepthStencilAccess, false, PassDrawRenderState); PipelineState.DepthStencilState = PassDrawRenderState.GetDepthStencilState(); } const FGraphicsMinimalPipelineStateId PipelineId = FGraphicsMinimalPipelineStateId::GetPipelineStateId(PipelineState, MinimalPipelineStatePassSet, NeedsShaderInitialisation); NewMeshCommand.Finalize(PipelineId, nullptr); FVisibleMeshDrawCommand NewVisibleMeshDrawCommand; NewVisibleMeshDrawCommand.Setup( &NewMeshCommand, VisibleMeshDrawCommand.PrimitiveIdInfo, VisibleMeshDrawCommand.StateBucketId, VisibleMeshDrawCommand.MeshFillMode, VisibleMeshDrawCommand.MeshCullMode, VisibleMeshDrawCommand.Flags, VisibleMeshDrawCommand.SortKey, VisibleMeshDrawCommand.RunArray, VisibleMeshDrawCommand.NumRuns); TempVisibleMeshDrawCommands.Add(NewVisibleMeshDrawCommand); } // Replace VisibleMeshDrawCommands FMemory::Memswap(&VisibleMeshDrawCommands, &TempVisibleMeshDrawCommands, sizeof(TempVisibleMeshDrawCommands)); TempVisibleMeshDrawCommands.Reset(); } } } FAutoConsoleTaskPriority CPrio_FMeshDrawCommandPassSetupTask( TEXT("TaskGraph.TaskPriorities.FMeshDrawCommandPassSetupTask"), TEXT("Task and thread priority for FMeshDrawCommandPassSetupTask."), ENamedThreads::NormalThreadPriority, ENamedThreads::HighTaskPriority ); /** * Task for a parallel setup of mesh draw commands. Includes generation of dynamic mesh draw commands, sorting, merging etc. */ class FMeshDrawCommandPassSetupTask { public: FMeshDrawCommandPassSetupTask(FMeshDrawCommandPassSetupTaskContext& InContext) : Context(InContext) { } FORCEINLINE TStatId GetStatId() const { RETURN_QUICK_DECLARE_CYCLE_STAT(FMeshDrawCommandPassSetupTask, STATGROUP_TaskGraphTasks); } ENamedThreads::Type GetDesiredThread() { return CPrio_FMeshDrawCommandPassSetupTask.Get(); } static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; } void AnyThreadTask() { FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread); SCOPED_NAMED_EVENT(MeshDrawCommandPassSetupTask, FColor::Magenta); // Mobile base pass is a special case, as final lists is created from two mesh passes based on CSM visibility. const bool bMobileShadingBasePass = Context.ShadingPath == EShadingPath::Mobile && Context.PassType == EMeshPass::BasePass; // On SM5 Mobile platform, still want the same sorting const bool bMobileVulkanSM5BasePass = IsVulkanMobileSM5Platform(Context.ShaderPlatform) && Context.PassType == EMeshPass::BasePass; if (bMobileShadingBasePass) { MergeMobileBasePassMeshDrawCommands( Context.View->MobileCSMVisibilityInfo, Context.PrimitiveBounds->Num(), Context.MeshDrawCommands, Context.MobileBasePassCSMMeshDrawCommands ); GenerateMobileBasePassDynamicMeshDrawCommands( *Context.View, Context.ShadingPath, Context.PassType, Context.MeshPassProcessor, Context.MobileBasePassCSMMeshPassProcessor, *Context.DynamicMeshElements, Context.DynamicMeshElementsPassRelevance, Context.NumDynamicMeshElements, Context.DynamicMeshCommandBuildRequests, Context.NumDynamicMeshCommandBuildRequestElements, Context.MeshDrawCommands, Context.MeshDrawCommandStorage, Context.MinimalPipelineStatePassSet, Context.NeedsShaderInitialisation ); } else { GenerateDynamicMeshDrawCommands( *Context.View, Context.ShadingPath, Context.PassType, Context.MeshPassProcessor, *Context.DynamicMeshElements, Context.DynamicMeshElementsPassRelevance, Context.NumDynamicMeshElements, Context.DynamicMeshCommandBuildRequests, Context.NumDynamicMeshCommandBuildRequestElements, Context.MeshDrawCommands, Context.MeshDrawCommandStorage, Context.MinimalPipelineStatePassSet, Context.NeedsShaderInitialisation ); } if (Context.MeshDrawCommands.Num() > 0) { if (Context.PassType != EMeshPass::Num) { ApplyViewOverridesToMeshDrawCommands( Context.ShadingPath, Context.PassType, Context.bReverseCulling, Context.bRenderSceneTwoSided, Context.BasePassDepthStencilAccess, Context.DefaultBasePassDepthStencilAccess, Context.MeshDrawCommands, Context.MeshDrawCommandStorage, Context.MinimalPipelineStatePassSet, Context.NeedsShaderInitialisation, Context.TempVisibleMeshDrawCommands ); } // Update sort keys. if (bMobileShadingBasePass || bMobileVulkanSM5BasePass) { UpdateMobileBasePassMeshSortKeys( Context.ViewOrigin, *Context.PrimitiveBounds, Context.MeshDrawCommands ); } else if (Context.TranslucencyPass != ETranslucencyPass::TPT_MAX) { // When per-pixel OIT is enabled, sort primitive from front to back ensure avoid // constantly resorting front-to-back samples list. const bool bInverseSorting = OIT::IsEnabled(EOITSortingType::SortedPixels, Context.ShaderPlatform) && Context.View->AntiAliasingMethod != EAntiAliasingMethod::AAM_MSAA; UpdateTranslucentMeshSortKeys( Context.TranslucentSortPolicy, Context.TranslucentSortAxis, Context.ViewOrigin, Context.ViewMatrix, *Context.PrimitiveBounds, Context.TranslucencyPass, bInverseSorting, Context.MeshDrawCommands ); } { QUICK_SCOPE_CYCLE_COUNTER(STAT_SortVisibleMeshDrawCommands); Context.MeshDrawCommands.Sort(FCompareFMeshDrawCommands()); } if (Context.bUseGPUScene) { Context.InstanceCullingContext.SetupDrawCommands(Context.MeshDrawCommands, true, Context.MaxInstances, Context.VisibleMeshDrawCommandsNum, Context.NewPassVisibleMeshDrawCommandsNum); } } } void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { AnyThreadTask(); } private: FMeshDrawCommandPassSetupTaskContext& Context; }; /** * Task for shader initialization. This will run on the RenderThread after Commands have been generated. */ class FMeshDrawCommandInitResourcesTask { public: FMeshDrawCommandInitResourcesTask(FMeshDrawCommandPassSetupTaskContext& InContext) : Context(InContext) { } FORCEINLINE TStatId GetStatId() const { RETURN_QUICK_DECLARE_CYCLE_STAT(FMeshDrawCommandInitResourcesTask, STATGROUP_TaskGraphTasks); } ENamedThreads::Type GetDesiredThread() { return ENamedThreads::GetRenderThread_Local(); } static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; } void AnyThreadTask() { TRACE_CPUPROFILER_EVENT_SCOPE(MeshDrawCommandInitResourcesTask); if (Context.NeedsShaderInitialisation) { for (const FGraphicsMinimalPipelineStateInitializer& Initializer : Context.MinimalPipelineStatePassSet) { Initializer.BoundShaderState.LazilyInitShaders(); } } } void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { AnyThreadTask(); } private: FMeshDrawCommandPassSetupTaskContext& Context; }; /* * Used by various dynamic passes to sort/merge mesh draw commands immediately on a rendering thread. */ void SortAndMergeDynamicPassMeshDrawCommands( const FSceneView& SceneView, FMeshCommandOneFrameArray& VisibleMeshDrawCommands, FDynamicMeshDrawCommandStorage& MeshDrawCommandStorage, FRHIBuffer*& OutPrimitiveIdVertexBuffer, uint32 InstanceFactor, const FGPUScenePrimitiveCollector* DynamicPrimitiveCollector) { const ERHIFeatureLevel::Type FeatureLevel = SceneView.GetFeatureLevel(); const bool bUseGPUScene = UseGPUScene(GMaxRHIShaderPlatform, FeatureLevel); const int32 NumDrawCommands = VisibleMeshDrawCommands.Num(); if (NumDrawCommands > 0) { FMeshCommandOneFrameArray NewPassVisibleMeshDrawCommands; int32 MaxInstances = 1; int32 VisibleMeshDrawCommandsNum = 0; int32 NewPassVisibleMeshDrawCommandsNum = 0; VisibleMeshDrawCommands.Sort(FCompareFMeshDrawCommands()); if (bUseGPUScene) { // GPUCULL_TODO: workaround for the fact that DrawDynamicMeshPassPrivate et al. don't work with GPU-Scene instancing // we don't support dynamic instancing for this path since we require one primitive per draw command // This is because the stride on the instance data buffer is set to 0 so only the first will ever be fetched. const bool bDynamicInstancing = false; if (bDynamicInstancing) { NewPassVisibleMeshDrawCommands.Empty(NumDrawCommands); } // INDEX_NONE used in TranslatePrimitiveId to defer id translation int32 DynamicPrimitiveIdOffset = INDEX_NONE; int32 DynamicPrimitiveIdMax = 0; if (DynamicPrimitiveCollector) { const TRange DynamicPrimitiveIdRange = DynamicPrimitiveCollector->GetPrimitiveIdRange(); DynamicPrimitiveIdOffset = DynamicPrimitiveIdRange.GetLowerBoundValue(); DynamicPrimitiveIdMax = DynamicPrimitiveIdRange.GetUpperBoundValue(); } const uint32 PrimitiveIdBufferStride = FInstanceCullingContext::GetInstanceIdBufferStride(FeatureLevel); const int32 MaxNumPrimitives = InstanceFactor * NumDrawCommands; const int32 PrimitiveIdBufferDataSize = MaxNumPrimitives * PrimitiveIdBufferStride; FPrimitiveIdVertexBufferPoolEntry Entry = GPrimitiveIdVertexBufferPool.Allocate(PrimitiveIdBufferDataSize); OutPrimitiveIdVertexBuffer = Entry.BufferRHI; void* PrimitiveIdBufferData = RHILockBuffer(OutPrimitiveIdVertexBuffer, 0, PrimitiveIdBufferDataSize, RLM_WriteOnly); if (FeatureLevel == ERHIFeatureLevel::ES3_1) { check(sizeof(FGPUSceneCompactInstanceData) == PrimitiveIdBufferStride); auto WritePrimitiveDataFn = [&](int32 PrimitiveIndex, int32 PrimitiveId) { checkSlow(PrimitiveIndex < MaxNumPrimitives); FGPUSceneCompactInstanceData* PrimitiveData = reinterpret_cast(PrimitiveIdBufferData) + PrimitiveIndex; if ((PrimitiveId & GPrimIDDynamicFlag) != 0) { PrimitiveId = PrimitiveId & (~GPrimIDDynamicFlag); PrimitiveData->Init(DynamicPrimitiveCollector, PrimitiveId); } else { const FScene* Scene = SceneView.Family->Scene->GetRenderScene(); PrimitiveData->Init(Scene, PrimitiveId); } }; BuildMeshDrawCommandPrimitiveIdBuffer( bDynamicInstancing, VisibleMeshDrawCommands, MeshDrawCommandStorage, NewPassVisibleMeshDrawCommands, MaxInstances, VisibleMeshDrawCommandsNum, NewPassVisibleMeshDrawCommandsNum, GShaderPlatformForFeatureLevel[FeatureLevel], InstanceFactor, WritePrimitiveDataFn); } else { int32* RESTRICT PrimitiveIds = reinterpret_cast(PrimitiveIdBufferData); auto WritePrimitiveDataFn = [&](int32 PrimitiveIndex, int32 DrawPrimitiveId) { checkSlow(PrimitiveIndex < MaxNumPrimitives); PrimitiveIds[PrimitiveIndex] = TranslatePrimitiveId(DrawPrimitiveId, DynamicPrimitiveIdOffset, DynamicPrimitiveIdMax); }; BuildMeshDrawCommandPrimitiveIdBuffer( bDynamicInstancing, VisibleMeshDrawCommands, MeshDrawCommandStorage, NewPassVisibleMeshDrawCommands, MaxInstances, VisibleMeshDrawCommandsNum, NewPassVisibleMeshDrawCommandsNum, GShaderPlatformForFeatureLevel[FeatureLevel], InstanceFactor, WritePrimitiveDataFn); } RHIUnlockBuffer(OutPrimitiveIdVertexBuffer); GPrimitiveIdVertexBufferPool.ReturnToFreeList(Entry); } } } void FParallelMeshDrawCommandPass::DispatchPassSetup( FScene* Scene, const FViewInfo& View, FInstanceCullingContext&& InstanceCullingContext, EMeshPass::Type PassType, FExclusiveDepthStencil::Type BasePassDepthStencilAccess, FMeshPassProcessor* MeshPassProcessor, const TArray& DynamicMeshElements, const TArray* DynamicMeshElementsPassRelevance, int32 NumDynamicMeshElements, TArray& InOutDynamicMeshCommandBuildRequests, int32 NumDynamicMeshCommandBuildRequestElements, FMeshCommandOneFrameArray& InOutMeshDrawCommands, FMeshPassProcessor* MobileBasePassCSMMeshPassProcessor, FMeshCommandOneFrameArray* InOutMobileBasePassCSMMeshDrawCommands ) { TRACE_CPUPROFILER_EVENT_SCOPE(ParallelMdcDispatchPassSetup); check(!TaskEventRef.IsValid() && MeshPassProcessor != nullptr && TaskContext.PrimitiveIdBufferData == nullptr); check((PassType == EMeshPass::Num) == (DynamicMeshElementsPassRelevance == nullptr)); MaxNumDraws = InOutMeshDrawCommands.Num() + NumDynamicMeshElements + NumDynamicMeshCommandBuildRequestElements; TaskContext.MeshPassProcessor = MeshPassProcessor; TaskContext.MobileBasePassCSMMeshPassProcessor = MobileBasePassCSMMeshPassProcessor; TaskContext.DynamicMeshElements = &DynamicMeshElements; TaskContext.DynamicMeshElementsPassRelevance = DynamicMeshElementsPassRelevance; TaskContext.View = &View; TaskContext.Scene = Scene; TaskContext.ShadingPath = Scene->GetShadingPath(); TaskContext.ShaderPlatform = Scene->GetShaderPlatform(); TaskContext.PassType = PassType; TaskContext.bUseGPUScene = UseGPUScene(GMaxRHIShaderPlatform, View.GetFeatureLevel()); TaskContext.bDynamicInstancing = IsDynamicInstancingEnabled(View.GetFeatureLevel()); TaskContext.bReverseCulling = View.bReverseCulling; TaskContext.bRenderSceneTwoSided = View.bRenderSceneTwoSided; TaskContext.BasePassDepthStencilAccess = BasePassDepthStencilAccess; TaskContext.DefaultBasePassDepthStencilAccess = Scene->DefaultBasePassDepthStencilAccess; TaskContext.NumDynamicMeshElements = NumDynamicMeshElements; TaskContext.NumDynamicMeshCommandBuildRequestElements = NumDynamicMeshCommandBuildRequestElements; // Only apply instancing for ISR to main view passes const bool bIsMainViewPass = PassType != EMeshPass::Num && (FPassProcessorManager::GetPassFlags(TaskContext.ShadingPath, TaskContext.PassType) & EMeshPassFlags::MainView) != EMeshPassFlags::None; // GPUCULL_TODO: Note the InstanceFactor is ignored by the GPU-Scene supported instances, but is used for legacy primitives. TaskContext.InstanceFactor = (bIsMainViewPass && View.IsInstancedStereoPass()) ? 2 : 1; TaskContext.InstanceCullingContext = MoveTemp(InstanceCullingContext); // Setup translucency sort key update pass based on view. TaskContext.TranslucencyPass = ETranslucencyPass::TPT_MAX; TaskContext.TranslucentSortPolicy = View.TranslucentSortPolicy; TaskContext.TranslucentSortAxis = View.TranslucentSortAxis; TaskContext.ViewOrigin = View.ViewMatrices.GetViewOrigin(); TaskContext.ViewMatrix = View.ViewMatrices.GetViewMatrix(); TaskContext.PrimitiveBounds = &Scene->PrimitiveBounds; switch (PassType) { case EMeshPass::TranslucencyStandard: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_StandardTranslucency; break; case EMeshPass::TranslucencyAfterDOF: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterDOF; break; case EMeshPass::TranslucencyAfterDOFModulate: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterDOFModulate; break; case EMeshPass::TranslucencyAfterMotionBlur: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_TranslucencyAfterMotionBlur; break; case EMeshPass::TranslucencyAll: TaskContext.TranslucencyPass = ETranslucencyPass::TPT_AllTranslucency; break; } FMemory::Memswap(&TaskContext.MeshDrawCommands, &InOutMeshDrawCommands, sizeof(InOutMeshDrawCommands)); FMemory::Memswap(&TaskContext.DynamicMeshCommandBuildRequests, &InOutDynamicMeshCommandBuildRequests, sizeof(InOutDynamicMeshCommandBuildRequests)); if (TaskContext.ShadingPath == EShadingPath::Mobile && TaskContext.PassType == EMeshPass::BasePass) { FMemory::Memswap(&TaskContext.MobileBasePassCSMMeshDrawCommands, InOutMobileBasePassCSMMeshDrawCommands, sizeof(*InOutMobileBasePassCSMMeshDrawCommands)); } else { check(MobileBasePassCSMMeshPassProcessor == nullptr && InOutMobileBasePassCSMMeshDrawCommands == nullptr); } if (MaxNumDraws > 0) { // Preallocate resources on rendering thread based on MaxNumDraws. TaskContext.PrimitiveIdBufferDataSize = TaskContext.InstanceFactor * MaxNumDraws * sizeof(int32); TaskContext.PrimitiveIdBufferData = FMemory::Malloc(TaskContext.PrimitiveIdBufferDataSize); #if DO_GUARD_SLOW FMemory::Memzero(TaskContext.PrimitiveIdBufferData, TaskContext.PrimitiveIdBufferDataSize); #endif // DO_GUARD_SLOW PrimitiveIdVertexBufferPoolEntry = GPrimitiveIdVertexBufferPool.Allocate(TaskContext.PrimitiveIdBufferDataSize); TaskContext.MeshDrawCommands.Reserve(MaxNumDraws); TaskContext.TempVisibleMeshDrawCommands.Reserve(MaxNumDraws); const bool bExecuteInParallel = FApp::ShouldUseThreadingForPerformance() && CVarMeshDrawCommandsParallelPassSetup.GetValueOnRenderThread() > 0 && GIsThreadedRendering; // Rendering thread is required to safely use rendering resources in parallel. if (bExecuteInParallel) { if (IsOnDemandShaderCreationEnabled()) { TaskEventRef = TGraphTask::CreateTask(nullptr, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(TaskContext); } else { FGraphEventArray DependentGraphEvents; DependentGraphEvents.Add(TGraphTask::CreateTask(nullptr, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(TaskContext)); TaskEventRef = TGraphTask::CreateTask(&DependentGraphEvents, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(TaskContext); } } else { QUICK_SCOPE_CYCLE_COUNTER(STAT_MeshPassSetupImmediate); FMeshDrawCommandPassSetupTask Task(TaskContext); Task.AnyThreadTask(); if (!IsOnDemandShaderCreationEnabled()) { FMeshDrawCommandInitResourcesTask DependentTask(TaskContext); DependentTask.AnyThreadTask(); } } } } bool FParallelMeshDrawCommandPass::IsOnDemandShaderCreationEnabled() { // GL rhi does not support multithreaded shader creation, however the engine can be configured to not run mesh drawing tasks in threads other than the RT // (see FRHICommandListExecutor::UseParallelAlgorithms()): if this condition is true, on demand shader creation can be enabled. const bool bIsMobileRenderer = FSceneInterface::GetShadingPath(GMaxRHIFeatureLevel) == EShadingPath::Mobile; return GAllowOnDemandShaderCreation && (GRHISupportsMultithreadedShaderCreation || (bIsMobileRenderer && (!GSupportsParallelRenderingTasksWithSeparateRHIThread && IsRunningRHIInSeparateThread()))); } void FParallelMeshDrawCommandPass::WaitForMeshPassSetupTask(EWaitThread WaitThread) const { if (TaskEventRef.IsValid() && WaitThread != EWaitThread::TaskAlreadyWaited) { // Need to wait on GetRenderThread_Local, as mesh pass setup task can wait on rendering thread inside InitResourceFromPossiblyParallelRendering(). QUICK_SCOPE_CYCLE_COUNTER(STAT_WaitForMeshPassSetupTask); FTaskGraphInterface::Get().WaitUntilTaskCompletes(TaskEventRef, WaitThread == EWaitThread::Render ? ENamedThreads::GetRenderThread_Local() : ENamedThreads::AnyThread); } } void FParallelMeshDrawCommandPass::WaitForTasksAndEmpty(EWaitThread WaitThread) { // Need to wait in case if someone dispatched sort and draw merge task, but didn't draw it. WaitForMeshPassSetupTask(WaitThread); TaskEventRef = nullptr; DumpInstancingStats(); if (TaskContext.MeshPassProcessor) { TaskContext.MeshPassProcessor->~FMeshPassProcessor(); TaskContext.MeshPassProcessor = nullptr; } if (TaskContext.MobileBasePassCSMMeshPassProcessor) { TaskContext.MobileBasePassCSMMeshPassProcessor->~FMeshPassProcessor(); TaskContext.MobileBasePassCSMMeshPassProcessor = nullptr; } if (MaxNumDraws > 0) { GPrimitiveIdVertexBufferPool.ReturnToFreeList(PrimitiveIdVertexBufferPoolEntry); } FMemory::Free(TaskContext.PrimitiveIdBufferData); MaxNumDraws = 0; PassNameForStats.Empty(); TaskContext.DynamicMeshElements = nullptr; TaskContext.DynamicMeshElementsPassRelevance = nullptr; TaskContext.MeshDrawCommands.Empty(); TaskContext.MeshDrawCommandStorage.MeshDrawCommands.Empty(); FGraphicsMinimalPipelineStateId::AddSizeToLocalPipelineIdTableSize(TaskContext.MinimalPipelineStatePassSet.GetAllocatedSize()); TaskContext.MinimalPipelineStatePassSet.Empty(); TaskContext.MobileBasePassCSMMeshDrawCommands.Empty(); TaskContext.DynamicMeshCommandBuildRequests.Empty(); TaskContext.TempVisibleMeshDrawCommands.Empty(); TaskContext.PrimitiveIdBufferData = nullptr; TaskContext.PrimitiveIdBufferDataSize = 0; } FParallelMeshDrawCommandPass::~FParallelMeshDrawCommandPass() { check(TaskEventRef == nullptr); } class FDrawVisibleMeshCommandsAnyThreadTask : public FRenderTask { FRHICommandList& RHICmdList; const FInstanceCullingContext& InstanceCullingContext; const FMeshCommandOneFrameArray& VisibleMeshDrawCommands; const FGraphicsMinimalPipelineStateSet& GraphicsMinimalPipelineStateSet; const FMeshDrawCommandOverrideArgs OverrideArgs; uint32 InstanceFactor; int32 TaskIndex; int32 TaskNum; public: FDrawVisibleMeshCommandsAnyThreadTask( FRHICommandList& InRHICmdList, const FInstanceCullingContext& InInstanceCullingContext, const FMeshCommandOneFrameArray& InVisibleMeshDrawCommands, const FGraphicsMinimalPipelineStateSet& InGraphicsMinimalPipelineStateSet, const FMeshDrawCommandOverrideArgs& InOverrideArgs, uint32 InInstanceFactor, int32 InTaskIndex, int32 InTaskNum ) : RHICmdList(InRHICmdList) , InstanceCullingContext(InInstanceCullingContext) , VisibleMeshDrawCommands(InVisibleMeshDrawCommands) , GraphicsMinimalPipelineStateSet(InGraphicsMinimalPipelineStateSet) , OverrideArgs(InOverrideArgs) , InstanceFactor(InInstanceFactor) , TaskIndex(InTaskIndex) , TaskNum(InTaskNum) { } FORCEINLINE TStatId GetStatId() const { RETURN_QUICK_DECLARE_CYCLE_STAT(FDrawVisibleMeshCommandsAnyThreadTask, STATGROUP_TaskGraphTasks); } static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; } void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent) { FOptionalTaskTagScope Scope(ETaskTag::EParallelRenderingThread); SCOPED_NAMED_EVENT_TEXT("DrawVisibleMeshCommandsAnyThreadTask", FColor::Magenta); checkSlow(RHICmdList.IsInsideRenderPass()); // check for the multithreaded shader creation has been moved to FShaderCodeArchive::CreateShader() // Recompute draw range. const int32 DrawNum = VisibleMeshDrawCommands.Num(); const int32 NumDrawsPerTask = TaskIndex < DrawNum ? FMath::DivideAndRoundUp(DrawNum, TaskNum) : 0; const int32 StartIndex = TaskIndex * NumDrawsPerTask; const int32 NumDraws = FMath::Min(NumDrawsPerTask, DrawNum - StartIndex); InstanceCullingContext.SubmitDrawCommands( VisibleMeshDrawCommands, GraphicsMinimalPipelineStateSet, OverrideArgs, StartIndex, NumDraws, InstanceFactor, RHICmdList); RHICmdList.EndRenderPass(); RHICmdList.HandleRTThreadTaskCompletion(MyCompletionGraphEvent); } }; void FParallelMeshDrawCommandPass::BuildRenderingCommands( FRDGBuilder& GraphBuilder, const FGPUScene& GPUScene, FInstanceCullingDrawParams& OutInstanceCullingDrawParams) { if (TaskContext.InstanceCullingContext.IsEnabled()) { check(!bHasInstanceCullingDrawParameters); WaitForMeshPassSetupTask(); #if DO_CHECK for (const FVisibleMeshDrawCommand& VisibleMeshDrawCommand : TaskContext.MeshDrawCommands) { if (VisibleMeshDrawCommand.PrimitiveIdInfo.bIsDynamicPrimitive) { uint32 PrimitiveIndex = VisibleMeshDrawCommand.PrimitiveIdInfo.DrawPrimitiveId & ~GPrimIDDynamicFlag; checkf(TaskContext.View->DynamicPrimitiveCollector.IsPrimitiveProcessed(PrimitiveIndex, GPUScene), TEXT("Dynamic Primitive index %u has not been fully processed. It may be an invalid index for the collector or it has a pending GPU write."), PrimitiveIndex); } } #endif // 2. Run or queue finalize culling commands pass TaskContext.InstanceCullingContext.BuildRenderingCommands(GraphBuilder, GPUScene, TaskContext.View->DynamicPrimitiveCollector.GetInstanceSceneDataOffset(), TaskContext.View->DynamicPrimitiveCollector.NumInstances(), TaskContext.InstanceCullingResult, &OutInstanceCullingDrawParams); TaskContext.InstanceCullingResult.GetDrawParameters(OutInstanceCullingDrawParams); bHasInstanceCullingDrawParameters = true; check(!TaskContext.InstanceCullingContext.HasCullingCommands() || OutInstanceCullingDrawParams.DrawIndirectArgsBuffer && OutInstanceCullingDrawParams.InstanceIdOffsetBuffer); return; } OutInstanceCullingDrawParams.DrawIndirectArgsBuffer = nullptr; OutInstanceCullingDrawParams.InstanceIdOffsetBuffer = nullptr; OutInstanceCullingDrawParams.InstanceDataByteOffset = 0U; OutInstanceCullingDrawParams.IndirectArgsByteOffset = 0U; } void FParallelMeshDrawCommandPass::WaitForSetupTask() { WaitForMeshPassSetupTask(); } void FParallelMeshDrawCommandPass::DispatchDraw(FParallelCommandListSet* ParallelCommandListSet, FRHICommandList& RHICmdList, const FInstanceCullingDrawParams* InstanceCullingDrawParams) const { TRACE_CPUPROFILER_EVENT_SCOPE(ParallelMdcDispatchDraw); if (MaxNumDraws <= 0) { return; } FMeshDrawCommandOverrideArgs OverrideArgs; if (InstanceCullingDrawParams) { OverrideArgs = GetMeshDrawCommandOverrideArgs(*InstanceCullingDrawParams); } if (ParallelCommandListSet) { const ENamedThreads::Type RenderThread = ENamedThreads::GetRenderThread(); FGraphEventArray Prereqs; if (ParallelCommandListSet->GetPrereqs()) { Prereqs.Append(*ParallelCommandListSet->GetPrereqs()); } if (TaskEventRef.IsValid()) { Prereqs.Add(TaskEventRef); } // Distribute work evenly to the available task graph workers based on NumEstimatedDraws. // Every task will then adjust it's working range based on FVisibleMeshDrawCommandProcessTask results. const int32 NumThreads = FMath::Min(FTaskGraphInterface::Get().GetNumWorkerThreads(), ParallelCommandListSet->Width); const int32 NumTasks = FMath::Min(NumThreads, FMath::DivideAndRoundUp(MaxNumDraws, ParallelCommandListSet->MinDrawsPerCommandList)); const int32 NumDrawsPerTask = FMath::DivideAndRoundUp(MaxNumDraws, NumTasks); for (int32 TaskIndex = 0; TaskIndex < NumTasks; TaskIndex++) { const int32 StartIndex = TaskIndex * NumDrawsPerTask; const int32 NumDraws = FMath::Min(NumDrawsPerTask, MaxNumDraws - StartIndex); checkSlow(NumDraws > 0); FRHICommandList* CmdList = ParallelCommandListSet->NewParallelCommandList(); FGraphEventRef AnyThreadCompletionEvent = TGraphTask::CreateTask(&Prereqs, RenderThread) .ConstructAndDispatchWhenReady(*CmdList, TaskContext.InstanceCullingContext, TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, OverrideArgs, TaskContext.InstanceFactor, TaskIndex, NumTasks); ParallelCommandListSet->AddParallelCommandList(CmdList, AnyThreadCompletionEvent, NumDraws); } } else { QUICK_SCOPE_CYCLE_COUNTER(STAT_MeshPassDrawImmediate); WaitForMeshPassSetupTask(IsInActualRenderingThread() ? EWaitThread::Render : EWaitThread::Task); if (TaskContext.bUseGPUScene) { if (TaskContext.MeshDrawCommands.Num() > 0) { TaskContext.InstanceCullingContext.SubmitDrawCommands( TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, OverrideArgs, 0, TaskContext.MeshDrawCommands.Num(), TaskContext.InstanceFactor, RHICmdList); } } else { SubmitMeshDrawCommandsRange(TaskContext.MeshDrawCommands, TaskContext.MinimalPipelineStatePassSet, nullptr, 0, 0, TaskContext.bDynamicInstancing, 0, TaskContext.MeshDrawCommands.Num(), TaskContext.InstanceFactor, RHICmdList); } } } void FParallelMeshDrawCommandPass::DumpInstancingStats() const { if (!PassNameForStats.IsEmpty() && TaskContext.VisibleMeshDrawCommandsNum > 0) { UE_LOG(LogRenderer, Log, TEXT("Instancing stats for %s"), *PassNameForStats); UE_LOG(LogRenderer, Log, TEXT(" %i Mesh Draw Commands in %i instancing state buckets"), TaskContext.VisibleMeshDrawCommandsNum, TaskContext.NewPassVisibleMeshDrawCommandsNum); UE_LOG(LogRenderer, Log, TEXT(" Largest %i"), TaskContext.MaxInstances); UE_LOG(LogRenderer, Log, TEXT(" %.1f Dynamic Instancing draw call reduction factor"), TaskContext.VisibleMeshDrawCommandsNum / (float)TaskContext.NewPassVisibleMeshDrawCommandsNum); } } void FParallelMeshDrawCommandPass::SetDumpInstancingStats(const FString& InPassNameForStats) { PassNameForStats = InPassNameForStats; }