// Copyright Epic Games, Inc. All Rights Reserved. #include "MuT/DataPacker.h" #include "Containers/Array.h" #include "HAL/PlatformCrt.h" #include "HAL/UnrealMemory.h" #include "MuR/CodeVisitor.h" #include "MuR/Image.h" #include "MuR/Layout.h" #include "MuR/Mesh.h" #include "MuR/MeshBufferSet.h" #include "MuR/ModelPrivate.h" #include "MuR/MutableTrace.h" #include "MuR/Operations.h" #include "MuR/Ptr.h" #include "MuR/RefCounted.h" #include "MuT/ASTOpConstantResource.h" #include "MuT/ASTOpImageCompose.h" #include "MuT/ASTOpImageLayer.h" #include "MuT/ASTOpImageLayerColor.h" #include "MuT/ASTOpImageMultiLayer.h" #include "MuT/ASTOpInstanceAdd.h" #include "MuT/ASTOpMeshExtractLayoutBlocks.h" #include "MuT/ASTOpMeshRemoveMask.h" #include "MuT/ASTOpMeshDifference.h" #include "MuT/ASTOpMeshMorph.h" #include "MuT/ASTOpLayoutFromMesh.h" #include "MuT/ASTOpImageRasterMesh.h" #include "MuT/CompilerPrivate.h" namespace mu { //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- SubtreeSearchConstantVisitor::SubtreeSearchConstantVisitor( FProgram& program, OP::ADDRESS constant, OP_TYPE optype ) : m_program( program ), m_constant( constant ), m_opType( optype ) { m_visited.SetNum( program.m_opAddress.Num() ); if ( m_visited.Num() ) { FMemory::Memset( &m_visited[0], 0, m_visited.Num() ); } } //--------------------------------------------------------------------------------------------- bool SubtreeSearchConstantVisitor::Run( OP::ADDRESS root ) { // too spammy // \todo: called too many times! // MUTABLE_CPUPROFILER_SCOPE(SubtreeSearchConstantVisitor); bool found = false; TArray< TPair > pending; pending.Reserve(m_program.m_opAddress.Num()/4); pending.Add({ false,root }); while( pending.Num() ) { TPair item = pending.Pop(); if (item.Key) { // Item indicating we finished with all the children of a parent. // Propagate the "found" state upwards m_visited[item.Value]=1; //OP cop = m_program.m_code[item.Value]; ForEachReference( m_program, (OP::ADDRESS)item.Value, [&](OP::ADDRESS ref) { if (ref && m_visited[ref]==2) { m_visited[item.Value]=2; } }); } else { if ( !m_visited[item.Value] ) { OP_TYPE thisOpType = m_program.GetOpType(item.Value); if ( m_opType == thisOpType ) { OP::ResourceConstantArgs args = m_program.GetOpArgs(item.Value); if ( m_constant == args.value ) { found = true; m_visited[item.Value] = 2; } } if (!found) { pending.Add({ true,item.Value }); //OP cop = m_program.m_code[item.Value]; ForEachReference( m_program, item.Value, [&](OP::ADDRESS ref) { if (ref && !m_visited[ref]) { pending.Add({ false,ref }); } }); } } else { found = (m_visited[item.Value]==2); } } } return found; } //--------------------------------------------------------------------------------------------- //! Get all the parameters that affect the constant. //! "Affect" means that the constant may be used or not depending on the parameter. //--------------------------------------------------------------------------------------------- class GatherParametersVisitor : public UniqueConstCodeVisitorIterative< TArray > { public: GatherParametersVisitor( FProgram& program, OP::ADDRESS constant, OP_TYPE opType ) : m_constSearch( program, constant, opType ) { MUTABLE_CPUPROFILER_SCOPE(GatherParametersVisitor); TArray currentParams; currentParams.SetNumZeroed( program.m_parameters.Num() ); SetDefaultState(currentParams); // TODO: we can optimize by precalculating what ops have the required optype below m_opType = opType; m_constant = constant; m_allParams.SetNumZeroed( program.m_parameters.Num() ); FullTraverse( program ); for (int32 i=0; i(at); // If the constant is present in only one of the 2 branches bool foundYes = m_constSearch.Run( args.yes ); bool foundNo = m_constSearch.Run( args.no ); if ( foundYes!=foundNo ) { m_conditionVisitor.Run( args.condition, program ); TArray currentParams = GetCurrentState(); for (int32 p=0; p currentParams = GetCurrentState(); for (int32 p=0; p(at); if ( args.value == m_constant ) { // Accumulate the currently relevant parameters const TArray& currentParams = GetCurrentState(); for (int32 i=0; i(at); if ( args.value == m_constant ) { // Accumulate the currently relevant parameters const TArray& currentParams = GetCurrentState(); for (int32 i=0; i m_sortedParams; private: OP::ADDRESS m_constant; OP_TYPE m_opType; TArray m_allParams; SubtreeParametersVisitor m_conditionVisitor; SubtreeSearchConstantVisitor m_constSearch; }; //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- class AccumulateImageFormatsAST : public Visitor_TopDown_Unique_Const< TArray > { public: void Run( const ASTOpList& roots ) { MUTABLE_CPUPROFILER_SCOPE(AccumulateImageFormatsAST); // TODO: Use statically sized vectors (std::array) //vector initial( IF_COUNT, true ); //std::fill( m_supportedFormats.begin(), m_supportedFormats.end(), initial ); TArray defaultState; defaultState.SetNumZeroed(int32(EImageFormat::IF_COUNT)); Traverse( roots, defaultState ); } bool Visit( const Ptr& node ) override { bool recurse = true; const TArray& currentFormats = GetCurrentState(); TArray defaultState; defaultState.SetNumZeroed(int32(EImageFormat::IF_COUNT)); bool allFalse = currentFormats == defaultState; // Can we use the cache? if (allFalse) { if (m_visited.count(node)) { return false; } m_visited.insert(node); } switch ( node->GetOpType() ) { case OP_TYPE::IM_CONSTANT: { // Remove unsupported formats const ASTOpConstantResource* op = static_cast(node.get()); if (!m_supportedFormats.count(op)) { TArray initial; initial.Init( true, int32(EImageFormat::IF_COUNT) ); m_supportedFormats.insert( std::make_pair(op, std::move(initial)) ); } for ( unsigned f=0; f< unsigned(EImageFormat::IF_COUNT); ++f ) { if ( !currentFormats[f] ) { m_supportedFormats[op][f] = false; } } recurse = false; break; } case OP_TYPE::IM_SWITCH: case OP_TYPE::IM_CONDITIONAL: // Switches and conditionals don't change the supported formats break; case OP_TYPE::IM_COMPOSE: { recurse = false; const ASTOpImageCompose* op = static_cast(node.get()); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); RecurseWithState( op->Layout.child(), newState ); RecurseWithState( op->Base.child(), newState ); RecurseWithState( op->BlockImage.child(), newState ); if ( op->Mask ) { newState[(int32)EImageFormat::IF_L_UBIT_RLE] = true; RecurseWithState( op->Mask.child(), newState ); } break; } case OP_TYPE::IM_LAYERCOLOUR: { recurse = false; const ASTOpImageLayerColor* op = static_cast(node.get()); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); RecurseWithState( op->base.child(), newState ); RecurseWithState( op->color.child(), newState ); if ( op->mask ) { newState[(int32)EImageFormat::IF_L_UBYTE] = true; newState[(int32)EImageFormat::IF_L_UBYTE_RLE] = true; RecurseWithState( op->mask.child(), newState ); } break; } case OP_TYPE::IM_LAYER: { recurse = false; const ASTOpImageLayer* op = static_cast(node.get()); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); RecurseWithState( op->base.child(), newState ); RecurseWithState( op->blend.child(), newState ); if (op->mask) { newState[(int32)EImageFormat::IF_L_UBYTE] = true; newState[(int32)EImageFormat::IF_L_UBYTE_RLE] = true; RecurseWithState( op->mask.child(), newState ); } break; } case OP_TYPE::IM_MULTILAYER: { recurse = false; const ASTOpImageMultiLayer* op = static_cast(node.get()); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); RecurseWithState( op->base.child(), newState ); RecurseWithState( op->blend.child(), newState ); if (op->mask) { newState[(size_t)EImageFormat::IF_L_UBYTE] = true; newState[(size_t)EImageFormat::IF_L_UBYTE_RLE] = true; RecurseWithState( op->mask.child(), newState ); } break; } case OP_TYPE::IM_DISPLACE: { recurse = false; const ASTOpFixed* op = static_cast(node.get()); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); RecurseWithState( op->children[op->op.args.ImageDisplace.source].child(), newState ); newState[(int32)EImageFormat::IF_L_UBYTE ] = true; newState[(int32)EImageFormat::IF_L_UBYTE_RLE ] = true; RecurseWithState( op->children[op->op.args.ImageDisplace.displacementMap].child(), newState ); break; } default: { //m_currentFormats.Add(vector(IF_COUNT, false)); //Recurse(at, program); //m_currentFormats.pop_back(); TArray newState; newState.Init(false, int32(EImageFormat::IF_COUNT)); if (currentFormats != newState) { RecurseWithState(node, newState); recurse = false; } else { recurse = true; } break; } } return recurse; } public: //! Result of this visitor: //! Formats known to be supported by every constant image. std::unordered_map< Ptr, TArray > m_supportedFormats; private: //! Cache. Only valid is current formats are all false. std::unordered_set> m_visited; }; //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- class AccumulateMeshChannelUsageAST : public Visitor_TopDown_Unique_Const< uint64_t > { public: void Run( const ASTOpList& roots ) { MUTABLE_CPUPROFILER_SCOPE(AccumulateMeshChannelUsageAST); // Sanity check in case we add more semantics static_assert(MBS_COUNT& node ) override { bool recurse = true; uint64 currentSemantics = GetCurrentState(); switch ( node->GetOpType() ) { case OP_TYPE::ME_CONSTANT: { // Accumulate necessary semantics const ASTOpConstantResource* op = static_cast(node.get()); uint64 currentFlags = 0; if (m_requiredSemantics.count(op)) { currentFlags = m_requiredSemantics[op]; } else { m_requiredSemantics.insert(std::make_pair(op,currentFlags)); } currentFlags |= currentSemantics; m_requiredSemantics[op] = currentFlags; recurse = false; break; } // TODO: These could probably optimise something //case OP_TYPE::IM_RASTERMESH: break; case OP_TYPE::ME_DIFFERENCE: { recurse = false; const ASTOpMeshDifference* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1)<Base.child(), newState ); RecurseWithState( op->Target.child(), currentSemantics ); break; } case OP_TYPE::ME_REMOVEMASK: { recurse = false; const ASTOpMeshRemoveMask* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1)<source.child(), newState ); for( const TPair& r: op->removes ) { RecurseWithState( r.Value.child(), newState ); } break; } case OP_TYPE::ME_MORPH: { recurse = false; const ASTOpMeshMorph* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1)<Base.child(), newState ); RecurseWithState( op->Target.child(), newState ); break; } case OP_TYPE::ME_APPLYLAYOUT: { recurse = false; const ASTOpFixed* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1)<children[op->op.args.MeshApplyLayout.mesh].child(), newState ); RecurseWithState( op->children[op->op.args.MeshApplyLayout.layout].child(), currentSemantics ); break; } case OP_TYPE::ME_PROJECT: { recurse = false; const ASTOpFixed* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1) << MBS_LAYOUTBLOCK); RecurseWithState(op->children[op->op.args.MeshProject.mesh].child(), newState); RecurseWithState(op->children[op->op.args.MeshProject.projector].child(), currentSemantics); break; } case OP_TYPE::IM_RASTERMESH: { recurse = false; const ASTOpImageRasterMesh* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1) << MBS_LAYOUTBLOCK); RecurseWithState(op->mesh.child(), newState); RecurseWithState(op->image.child(), currentSemantics); RecurseWithState(op->angleFadeProperties.child(), currentSemantics); RecurseWithState(op->mask.child(), currentSemantics); RecurseWithState(op->projector.child(), currentSemantics); break; } case OP_TYPE::ME_EXTRACTLAYOUTBLOCK: { recurse = false; const ASTOpMeshExtractLayoutBlocks* op = static_cast(node.get()); // todo: check if we really need all of them uint64 newState = currentSemantics; newState |= (UINT64_C(1)<Source.child(), newState ); break; } case OP_TYPE::LA_FROMMESH: { recurse = false; const ASTOpLayoutFromMesh* op = static_cast(node.get()); uint64 newState = currentSemantics; newState |= (UINT64_C(1) << MBS_LAYOUTBLOCK); RecurseWithState(op->Mesh.child(), newState); break; } case OP_TYPE::IN_ADDMESH: { recurse = false; const ASTOpInstanceAdd* op = static_cast(node.get()); RecurseWithState( op->instance.child(), currentSemantics ); uint64 newState = GetDefaultState(); RecurseWithState( op->value.child(), newState ); break; } default: // Unhandled op, we may need everything? Recurse with current state? //uint64 newState = 0xffffffffffffffff; break; } return recurse; } public: //! Result of this visitor: //! Used mesh channel semantics for each constant mesh std::unordered_map< Ptr, uint64 > m_requiredSemantics; }; //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- //--------------------------------------------------------------------------------------------- // Todo: move to its own file inline void MeshRemoveUnusedBufferSemantics( Mesh* pMesh, uint64 usedSemantics ) { // right now we only remove entire buffers if no channel is used // TODO: remove from inside the buffer? for (int v=0; vGetVertexBuffers().GetBufferCount(); ) { bool used = false; for (int c=0; !used && cGetVertexBuffers().GetBufferChannelCount(v); ++c) { EMeshBufferSemantic semantic; pMesh->GetVertexBuffers().GetChannel(v,c,&semantic, nullptr, nullptr, nullptr, nullptr); used = (( (UINT64_C(1)<& buffers = pMesh->GetVertexBuffers().Buffers; buffers.RemoveAt(v); } else { ++v; } } // TODO: hack, if we don't need layouts, remove them. { uint64 layoutSemantics = 0; layoutSemantics |= (UINT64_C(1)<Layouts.Empty(); } } } //--------------------------------------------------------------------------------------------- void DataOptimise( const CompilerOptions* Options, ASTOpList& roots ) { int32 ImageCompressionQuality = Options->GetPrivate()->ImageCompressionQuality; const FModelOptimizationOptions& OptimizeOptions = Options->GetPrivate()->OptimisationOptions; // Images AccumulateImageFormatsAST accFormat; accFormat.Run( roots ); // See if we can convert some constants to more efficient formats ASTOp::Traverse_BottomUp_Unique_NonReentrant( roots, [&](Ptr& n) { if (n->GetOpType()==OP_TYPE::IM_CONSTANT) { ASTOpConstantResource* typed = static_cast(n.get()); Ptr pOld = static_cast(typed->GetValue().get()); FImageOperator ImOp = FImageOperator::GetDefault( Options->GetPrivate()->ImageFormatFunc ); // See if there is a better format for this image FVector4f PlainColor; if ( pOld->IsPlainColour(PlainColor) ) { // It is more efficient to just have an instruction for it instead, to avoid the overhead // of data loading. // Warning This eliminates the mips. \TODO: Add support for mips in plaincolour instruction? Ptr NewColor = new ASTOpFixed; NewColor->op.type = OP_TYPE::CO_CONSTANT; NewColor->op.args.ColourConstant.value[0] = PlainColor[0]; NewColor->op.args.ColourConstant.value[1] = PlainColor[1]; NewColor->op.args.ColourConstant.value[2] = PlainColor[2]; NewColor->op.args.ColourConstant.value[3] = PlainColor[3]; Ptr NewPlain = new ASTOpFixed; NewPlain->op.type = OP_TYPE::IM_PLAINCOLOUR; NewPlain->SetChild( NewPlain->op.args.ImagePlainColour.colour, NewColor ); NewPlain->op.args.ImagePlainColour.format = pOld->GetFormat(); NewPlain->op.args.ImagePlainColour.size[0] = pOld->GetSizeX(); NewPlain->op.args.ImagePlainColour.size[1] = pOld->GetSizeY(); NewPlain->op.args.ImagePlainColour.LODs = 1; ASTOp::Replace(n, NewPlain); } else if ( accFormat.m_supportedFormats[typed][(int32)EImageFormat::IF_L_UBIT_RLE] ) { ImagePtr pNew = ImOp.ImagePixelFormat( ImageCompressionQuality, pOld.get(), EImageFormat::IF_L_UBIT_RLE ); // Only replace if the compression was worth! int32 oldSize = pOld->GetDataSize(); int32 newSize = pNew->GetDataSize(); if (float(oldSize) > float(newSize) * OptimizeOptions.MinRLECompressionGain) { typed->SetValue(pNew, OptimizeOptions.DiskCacheContext); } } else if ( accFormat.m_supportedFormats[typed][(int32)EImageFormat::IF_L_UBYTE_RLE] ) { ImagePtr pNew = ImOp.ImagePixelFormat( ImageCompressionQuality, pOld.get(), EImageFormat::IF_L_UBYTE_RLE ); // Only replace if the compression was worth! int32 oldSize = pOld->GetDataSize(); int32 newSize = pNew->GetDataSize(); if (float(oldSize) > float(newSize) * OptimizeOptions.MinRLECompressionGain) { typed->SetValue(pNew, OptimizeOptions.DiskCacheContext); } } } }); // Meshes AccumulateMeshChannelUsageAST meshSemanticsVisitor; meshSemanticsVisitor.Run( roots ); // See if we can remove some buffers from the constants ASTOp::Traverse_BottomUp_Unique_NonReentrant( roots, [&](Ptr& n) { if (n->GetOpType()==OP_TYPE::ME_CONSTANT) { ASTOpConstantResource* typed = static_cast(n.get()); Ptr pMesh = static_cast(typed->GetValue().get())->Clone(); MeshRemoveUnusedBufferSemantics( pMesh.get(), meshSemanticsVisitor.m_requiredSemantics[typed]); typed->SetValue(pMesh, OptimizeOptions.DiskCacheContext); } }); } }