Files
UnrealEngineUWP/Engine/Source/Developer/MaterialBaking/Private/MaterialBakingModule.cpp

1055 lines
38 KiB
C++
Raw Normal View History

// Copyright Epic Games, Inc. All Rights Reserved.
#include "MaterialBakingModule.h"
#include "MaterialRenderItem.h"
#include "Engine/TextureRenderTarget2D.h"
#include "ExportMaterialProxy.h"
#include "Interfaces/IMainFrameModule.h"
#include "MaterialOptionsWindow.h"
#include "MaterialOptions.h"
#include "PropertyEditorModule.h"
#include "MaterialOptionsCustomization.h"
#include "UObject/UObjectGlobals.h"
#include "MaterialBakingStructures.h"
#include "Framework/Application/SlateApplication.h"
#include "MaterialBakingHelpers.h"
#include "Async/Async.h"
#include "Async/ParallelFor.h"
#include "Materials/MaterialInstance.h"
#include "Materials/MaterialInstanceConstant.h"
#include "MaterialEditor/MaterialEditorInstanceConstant.h"
#include "RenderingThread.h"
#include "RHISurfaceDataConversion.h"
#include "Misc/ScopedSlowTask.h"
#include "MeshDescription.h"
Async Texture Compilation - Feature can be activated in the Experimental section of the Editor Settings - Replace Texture2D/TextureCube resources by placeholders until their PlatformData is ready - Add a utility class allowing to encapsulate raw field pointers without breaking compatibility - Protect PlatformData from unsafe access through encapsulation. - Protect texture's resource from race conditions between game and render threads through encapsulation. - This allows to get rid of FlushRenderingCommands and long game-thread stutters when Updating a texture's resource. - UpdateResource was never safe to call without a FlushRenderingCommands and multiple call-site are doing exactly that, this will fix those cases. - Those were probably undetected due to their low occurence rate under normal conditions but can easily be reproed during async texture compilation on 32 cores. - Force wait on required texture compilations for MaterialBaking, ProxyMesh, Thumbnail generation for disk usage - Wait on all textures compilation whenever a wait for all shaders compilation is requested for safety (i.e. screenshot) - Compile UI and heightmap textures with higher priority to reduce visual artefacts - Increase priority of texture that have been rendered to improve time-to-usefulness of the editor under low core count - Async compilation is disabled for -game / non-editor mode as there is currently no support for async bulk data loading from external files - Properly cancel async tasks when UTexture is garbage collected before the compilation is finished - Show progress when explicitly waiting on compilation - Changing the mip settings in the texture editor (or any settings requiring the running platform data to be recomputed) will now be processed asynchronously. DEBUGGING - Can be forcibly enabled/disabled through command-line via -asynctexturecompilation=[off, on, paused] - Can pause texture compilation using Editor.AsyncTextureCompilation = 2 or -asynctexturecompilation=paused - Can manually resume a specified amount of paused compilation using Editor.AsyncTextureCompilationResume [Num] - Can forcibly wait on all compilation using Editor.AsyncTextureCompilationFlushAll BENCHMARKS - 3m15s to 1m20s when loading Apollo_Terrain with no textures in DDC (AMD TR 3970X) - 6m45s to 1m11s when loading Apollo_Terrain with no textures in DDC (-corelimit=8) - 3m10s to 1m54s when lauching PIE on Apollo_Terrain with no textures in DDC (AMD TR 3970X) - 7m43s to 1m36s when lauching PIE on Apollo_Terrain with no textures in DDC (-corelimit=8) - 0m57s to 0m42s when importing Attic_NVIDIA.usd with no textures in DDC (AMD TR 3970X) - 2m14s to 0m35s when importing Attic_NVIDIA.usd with no textures in DDC (-corelimit=4) TESTS - Success on all material baking tests from EngineTests with -asynctexturecompilation=paused - Runned with -corelimit=1 all the way to unlimited - Cooking worked - Opening the texture editor/material editor will force the compilation to finish like expected. - Changing a setting in the texture editor will recompile async, even allowing to close the editor and continue doing other changes. - Unpausing the compilation will update the texture thumbnails properly. - Started with -asynctexturecompilation=paused, and then unpaused after a map loading, and then into a PIE session to stresstest UpdateResources. - Tested both dx11/dx12 - Vulkan fails on Fortnite even with -asynctexturecompilation=off because of Landscape weigthmap, not this CL. - Compiled and tested FortniteGame / UE4 / ShooterGame projects #rb Uriel.Doyon, Francis.Hurteau [CL 13694814 by danny couture in ue5-main branch]
2020-06-16 22:16:25 -04:00
#include "TextureCompiler.h"
#include "RenderCaptureInterface.h"
#if WITH_EDITOR
Copying //UE4/Dev-Core to //UE4/Dev-Main (Source: //UE4/Dev-Core @ 3847469) #lockdown Nick.Penwarden #rb none ============================ MAJOR FEATURES & CHANGES ============================ Change 3805828 by Gil.Gribb UE4 - Fixed a bug in the lock free stalling task queue and adjusted a comment. The code is not current used, so this is not actually change the way the code works. Change 3806784 by Ben.Marsh UAT: Remove code to compile UBT when using UE4Build. It should already be compiled as a dependency of UAT. Change 3807549 by Graeme.Thornton Add a cook timer around VerifyCanCookPackage. A licensee reports this taking a lot of time so it'll be good to account for it. Change 3807727 by Graeme.Thornton Unhide the text asset format experimental editor option Change 3807746 by Josh.Engebretson Remove WER from iOS platform Change 3807928 by Robert.Manuszewski When async loading, GC Clusters will be created after packages have been processed to avoid situations where some of the objects that are being added to a cluster haven't been fully loaded yet Change 3808221 by Steve.Robb GitHub #4307 - Made GetModulePtr() thread safe by not using GetModule() ^ I'm not convinced by how much thread-safer this is really, but it's tidier anyway. Change 3809233 by Graeme.Thornton TBA: Misc changes to text asset commandlet - Rename mode to "loadsave" - Add -outputFormat option which can be assigned "text" or "binary" - When saving binary, use a differentiated filename so that source assets aren't overwritten Change 3809518 by Ben.Marsh Remove the outdated UnrealSync automation script. Change 3809643 by Steve.Robb GitHub #4277 : fix bug; FMath::FormatIntToHumanReadable 3rd comma and negative value #jira UE-53037 Change 3809862 by Steve.Robb GitHub #3342 : [FRotator.h] Fix to DecompressAxisFromByte to be more efficient and reflect its intent accurately #jira UE-42593 Change 3811190 by Graeme.Thornton Add support for writing specific log channels to their own files Change 3811197 by Graeme.Thornton Minor updates to output formatting and timing for the text asset commandlet Change 3811257 by Robert.Manuszewski Cluster creation will now be time-sliced Change 3811565 by Steve.Robb Define out non-monolithic module functions. Change 3812561 by Steve.Robb GitHub #3886 : Enable Brace-Initialization for Declaring Variables Incorrect semi-colon search removed after discussion with author. Test added. #jira UE-48242 Change 3812864 by Steve.Robb Removal of some unproven code which was supposed to fix hot reloading BP class functions in plugins. See: https://udn.unrealengine.com/questions/376978/aitask-blueprint-nodes-disappear-when-their-module.html #jira UE-53089 Change 3820358 by Ben.Marsh PR #4358: Incredibuild use ShowAgent by default (Contributed by projectgheist) Change 3822594 by Ben.Marsh UAT: Improvements to log file handling. - Always create log files in the final location, rather than writing to a temp directory and copying in later. - Now supports -Verbose and -VeryVerbose for increasing log verbosity, rather than -Verbose=XXX. - Keep a backlog of log output before the log system is initialized, and flush it to the log file once it is. - Allow buildmachines to specify the uebp_FinalLogFolder environment variable, which is used to form paths for display. When build machines copy log files elsewhere after UAT finishes (eg. a network share), this allows error messages to display the right location. Change 3823695 by Ben.Marsh UGS: Fix issue where precompiled binaries would not be shown as available for a change until scrolling the last submitted code change into the buffer (other symptoms, like de-focussing the main window would cause it to go back to an unavailable state, since the changes buffer was shrunk). Now always queries changes up to the last change for which zipped binaries are available. Change 3823845 by Ben.Marsh UBT: Exclude C# projects for unsupported platforms when generating project files. Change 3824180 by Ben.Marsh UGS: Add an option to show changes by build machines, and move the "only show reviewed" option in there too (Options > Show Changes). #jira Change 3825777 by Steve.Robb Fix to return value of StringToBytes. Change 3825810 by Ben.Marsh UBT: Reduce length of include paths for MSVC toolchain. Change 3825822 by Robert.Manuszewski Optimized PIE lazy pointer fixup. Should be up to 8x faster now. Change 3826734 by Ben.Marsh Remove code to disable TextureFormatAndroid on Linux. It seems to be an editor dependency. Change 3827730 by Steve.Robb Try to avoid decltype(auto) if it's not supported. See: https://udn.unrealengine.com/questions/395644/build-417-with-c11-on-linux-ttuple-errors.html Change 3827745 by Steve.Robb Initializer list support for TMap. Change 3827770 by Steve.Robb GitHub #4399 : Added a CONSTEXPR qualifiers to FVariant::GetType() #jira UE-53813 Change 3829189 by Ben.Marsh UBT: Now always writes a minimal log file. By default, just contains the regular console output and any reasons why actions are outdated and needed to be executed. UAT directs child UBT instances to output logs into its own log folder, so that build machines can save them off. Change 3830444 by Steve.Robb BuildVersion and ModuleManifest moved to Core, and parsing of these files reimplemented to avoid a JSON library. This should be revisited when Core has its own JSON library. Change 3830718 by Ben.Marsh Fix incorrect group name being returned by FStatNameAndInfo::GetGroupName() for stat groups. The editor populates the viewport stats list by calling this for every registered stat and stat group (via FLevelViewportCommands::HandleNewStatGroup). The menu entry attempts to show the stat name with STAT_XXX stripped from the start as the menu item label, with the free-form text description as a tooltip. For stat groups, the it would previously just return the stat group name as "Groups" (due to the raw naming convention of "//Groups//STATGROUP_Foo//..."). Since this didn't match the expected naming convention in FLevelViewportCommands::HandleNewStat (ie. STAT_XXX or STATGROUP_XXX), it would fail to add it. When the first actual stat belonging to that group is added, it would add a menu entry for the group based on that, but the stat description no longer makes sense as a tooltip for the group. As a result, all the editor tooltips were junk. #jira UE-53845 Change 3831064 by Ben.Marsh Fix log file contention when spawning UBT recursively. Change 3832654 by Ben.Marsh UGS: Fix error panel not being selected when opened, and weird alignment/color issues on it. Change 3832680 by Ben.Marsh UGS: Fix failing to detect workspace if synced to a different stream. Seems to be a regression caused by recent P4D upgrade. Change 3832695 by Ben.Marsh UGS: Invert the options in the 'Show Changes' submenu for simplicity. Change 3833528 by Ben.Marsh UAT: Script to rewrite source files with public include paths relative to the 'Public' folder. Usage is: RebasePublicIncludePaths -UpdateDir=<Dir> [-Project=<Dir>] [-Write]. Change 3833543 by Ben.Marsh UBT: Allow targets to opt-out of having public include paths added for every dependent module. This reduces the command line length when building a target, which has recently become a problem with larger games (due to Microsoft's compiler embedding the command line into each object file, with a maximum length of 64kb). All engine modules are compiled with this enabled; games may opt into it by setting bLegacyPublicIncludePaths = false; from their .target.cs, as may individual modules. Change 3834354 by Robert.Manuszewski Archetype pointer will now be cached to avoid locking the object tables when acquiring its info. It should also be faster this way regardless of any locks. #jira UE-52035 Change 3834400 by Robert.Manuszewski Fixing crash on exit caused by cached archetypes not being cleaned up before static exit cleanup. #jira UE-52035 Change 3834947 by Steve.Robb USE_FORMAT_STRING_TYPE_CHECKING removed from FMsg::Logf and FMsg::Logf_Internal. Change 3835004 by Ben.Marsh Fix code that relies on dubious behavior of requiring referenced "include path only" modules having their _API macros set to be empty, even if the module is actually implemented in a separate DLL. Change 3835340 by Ben.Marsh Fix errors making installed build from directories with spaces in the name. Change 3835972 by Ben.Marsh UBT: Improved diagnostic message for targets which don't need a version file. Change 3836019 by Ben.Marsh UBT: Fix warnings caused by defining linkage macros for third party libraries. Change 3836269 by Ben.Marsh Fix message box larger than the screen height being created when a large number of modules are incompatible on startup. Change 3836543 by Ben.Marsh Enable SoundMod plugin on Linux, since it's already supported through the editor. Change 3836546 by Ben.Marsh PR #4412: fix type mismatch (Contributed by nakapon) Change 3836805 by Ben.Marsh Fix commandlet to compile marketplace plugins. Change 3836829 by Ben.Marsh UBT: Fix ability to precompile plugins from installed engine builds. Change 3837036 by Ben.Marsh UBT: Write the previous and new contents of intermediate files to the log if they change. Makes it easier to debug unexpected rebuilds. Change 3837037 by Ben.Marsh UBT: Fix engine modules having inconsistent definitions depending on whether modules are only referenced for their include paths vs being linked into a binary (due to different _API macro). Change 3837040 by Ben.Marsh UBT: Remove code that initializes members in ModuleRules and TargetRules objects before the constructor is run. This is no longer necessary, now that the backwards-compatible default constructors have been removed. Change 3837247 by Ben.Marsh UBT: Remove UELinkerFixups module, now that plugins and precompiled modules do not require hacks to force initialization (since they're linked in as object files). Encryption and signing keys are now set via macros expanded from the IMPLEMENT_PRIMARY_GAME_MODULE macro, via project-specific macros added in the TargetRules constructor. Change 3837262 by Ben.Marsh UBT: Set whether a module is an engine module or not via a default value for the rules assembly. All non-program engine and enterprise modules are created with this flag set to true; program targets and modules are now created from a different assembly that sets it to false. This removes hacks from UEBuildModule needed to adjust behavior for different module types based on the directory containing the module. Also add a bUseBackwardsCompatibleDefaults flag to the TargetRules class, also initialized to a default value from a setting passed to the RulesAssembly constructor. This controls whether modules created for the target should be configured to allow breaking changes to default settings, and is set to false for all engine targets, and true for all project targets. Change 3837343 by Ben.Marsh UBT: Remove the OverrideExecutableFileExtension target property. Change the only current use for this (the MayaLiveLinkPlugin target) to use a post build step to copy the file instead. Change 3837356 by Ben.Marsh Fix invalid character encodings. Change 3837727 by Graeme.Thornton UnrealPak: KeyGenerator: Only generate prime table when required, not all the time Change 3837823 by Ben.Marsh UBT: Output warnings and errors when compiling module rules assembly in a way that allows them to be double-clicked in the Visual Studio output window. Change 3837831 by Graeme.Thornton UBT: When parsing crypto settings, always load legacy data first, then allow the new system to override it. Provides the same key backwards compatibility that the editor settings class gives Change 3837857 by Robert.Manuszewski PR #4404: Make FGCArrayPool singleton global instead of per-CU (Contributed by mhutch) Change 3837943 by Robert.Manuszewski PR #4405: Fix FGarbageCollectionTracer (Contributed by mhutch) Change 3838451 by Ben.Marsh UBT: Fix exceptions thrown on a background thread while caching C++ includes not being caught and logged correctly. Now captures exceptions and re-throws on the main thread. #jira UE-53996 Change 3839519 by Ben.Marsh UBT: Simplify configuring bPrecompile and bUsePrecompile settings for modules. Each rules assembly can now be configured as installed, which defaults the module rules it creates to use precompiled data. Change 3843790 by Graeme.Thornton UnrealPak: Log the size of all encrypted data Change 3844258 by Ben.Marsh Fix plugin compile failure when created via new plugin wizard. Passing -plugin on the command line is unnecessary, and is now reserved for packaging external plugins for the marketplace. Also extend the length of time that the error toast stays visible, and don't delete the plugin on failure. #jira UE-54157 Change 3845796 by Ben.Marsh Workaround for slow performance of String.EndsWith() on Mono. Change 3845823 by Ben.Marsh Fix case sensitive matching of platform names in -TargetPlatform=X argument to BuildCookRun. #jira UE-54123 Change 3845901 by Arciel.Rekman Linux: fix crash due to lambda lifetime issues (UE-54040). - The lambda goes out of scope in FBufferVisualizationMenuCommands::CreateVisualizationCommands, crashing the editor if compiled with a recent clang (5.0+). (Edigrating 3819174 to Dev-Core) Change 3846439 by Ben.Marsh Revert CL 3822742 to always call Process.WaitForExit(). The Android target platform module in the editor spawns ADB.EXE, which inherits the editor's stdout/stderr handles and forks itself. Process.WaitForExit() waits for EOF on those pipes, which never occurs because the forked process never terminates. Proper fix is probably to have the engine explicitly duplicate stdout/stderr handles for new pipes to output process, but too risky before copying up to Main. Change 3816608 by Ben.Marsh UBT: Use DirectoryReference objects for all include paths. Change 3816954 by Ben.Marsh UBT: Remove bIncludeDependentLibrariesInLibrary option. This is not widely supported by platform toolchains, and is not used anywhere. Change 3816986 by Ben.Marsh UBT: Remove UEBuildBinaryConfig; UEBuildBinary objects are now just created directly. Change 3816991 by Ben.Marsh UBT: Deprecate PlatformSpecificDynamicallyLoadedModules. We no longer have any special behavior for these modules. Change 3823090 by Ben.Marsh UAT: Improve logging for child UAT instances. - Calling RunUAT now requires an identifier for prefixing into the parent log, which is also used to determine the name of the log folder. - Stdout is no longer written to its own output file, since it's written to the parent stdout, the parent log file, and the child log file anyway. - Log folders for child UAT instances are left intact, rather than being copied to the parent folder. The derived names for the copied names were confusing and hard to read. - Output from UAT is no longer returned as a string. It should not be parsed anyway (but may be huge!). ProcessResult now supports running without capturing output. Change 3826082 by Ben.Marsh UBT: Add a check to make sure that all modules that are precompiled are correctly marked to enable it, even if they are part of the build target. Change 3827025 by Ben.Marsh UBT: Move the compile output directory into a property on the module, and explicitly pass it to the toolchain when compiling. Change 3829927 by James.Hopkin Made HTTP interface const correct Change 3833533 by Ben.Marsh Rewrite engine source files to base include paths relative to the "Public" directory. This allows reducing the number of public include paths that have to be added for engine modules. Change 3835826 by Ben.Marsh UBT: Precompiled targets now generate a separate manifest for each precompiled module, rather than adding object files to a library. This fixes issues where object files from static libraries would not be linked into a target if a symbol in them was not referenced. Change 3835969 by Ben.Marsh UBT: Fix cases where text is being written directly to the console rather than via logging functions. Change 3837777 by Steve.Robb Format string type checking added to FOutputDevice::Logf. Fixes for those. Change 3838569 by Steve.Robb Algo moved up a folder. [CL 3847482 by Ben Marsh in Main branch]
2018-01-20 11:19:29 -05:00
#include "Misc/FileHelper.h"
#endif
IMPLEMENT_MODULE(FMaterialBakingModule, MaterialBaking);
DEFINE_LOG_CATEGORY_STATIC(LogMaterialBaking, All, All);
#define LOCTEXT_NAMESPACE "MaterialBakingModule"
/** Cvars for advanced features */
static TAutoConsoleVariable<int32> CVarUseMaterialProxyCaching(
TEXT("MaterialBaking.UseMaterialProxyCaching"),
1,
TEXT("Determines whether or not Material Proxies should be cached to speed up material baking.\n")
TEXT("0: Turned Off\n")
TEXT("1: Turned On"),
ECVF_Default);
static TAutoConsoleVariable<int32> CVarSaveIntermediateTextures(
TEXT("MaterialBaking.SaveIntermediateTextures"),
0,
TEXT("Determines whether or not to save out intermediate BMP images for each flattened material property.\n")
TEXT("0: Turned Off\n")
TEXT("1: Turned On"),
ECVF_Default);
static TAutoConsoleVariable<int32> CVarMaterialBakingRDOCCapture(
TEXT("MaterialBaking.RenderDocCapture"),
0,
TEXT("Determines whether or not to trigger a RenderDoc capture.\n")
TEXT("0: Turned Off\n")
TEXT("1: Turned On"),
ECVF_Default);
namespace FMaterialBakingModuleImpl
{
// Custom dynamic mesh allocator specifically tailored for Material Baking.
// This will always reuse the same couple buffers, so searching linearly is not a problem.
class FMaterialBakingDynamicMeshBufferAllocator : public FDynamicMeshBufferAllocator
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// This must be smaller than the large allocation blocks on Windows 10 which is currently ~508K.
// Large allocations uses VirtualAlloc directly without any kind of buffering before
// releasing pages to the kernel, so it causes lots of soft page fault when
// memory is first initialized.
const uint32 SmallestPooledBufferSize = 256*1024;
TArray<FBufferRHIRef> IndexBuffers;
TArray<FBufferRHIRef> VertexBuffers;
template <typename RefType>
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
RefType GetSmallestFit(uint32 SizeInBytes, TArray<RefType>& Array)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
uint32 SmallestFitIndex = UINT32_MAX;
uint32 SmallestFitSize = UINT32_MAX;
for (int32 Index = 0; Index < Array.Num(); ++Index)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
uint32 Size = Array[Index]->GetSize();
if (Size >= SizeInBytes && (SmallestFitIndex == UINT32_MAX || Size < SmallestFitSize))
{
SmallestFitIndex = Index;
SmallestFitSize = Size;
}
}
RefType Ref;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Do not reuse the smallest fit if it's a lot bigger than what we requested
if (SmallestFitIndex != UINT32_MAX && SmallestFitSize < SizeInBytes*2)
{
Ref = Array[SmallestFitIndex];
Array.RemoveAtSwap(SmallestFitIndex);
}
return Ref;
}
virtual FBufferRHIRef AllocIndexBuffer(uint32 NumElements) override
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
uint32 BufferSize = GetIndexBufferSize(NumElements);
if (BufferSize > SmallestPooledBufferSize)
{
FBufferRHIRef Ref = GetSmallestFit(GetIndexBufferSize(NumElements), IndexBuffers);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (Ref.IsValid())
{
return Ref;
}
}
return FDynamicMeshBufferAllocator::AllocIndexBuffer(NumElements);
}
virtual void ReleaseIndexBuffer(FBufferRHIRef& IndexBufferRHI) override
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (IndexBufferRHI->GetSize() > SmallestPooledBufferSize)
{
IndexBuffers.Add(MoveTemp(IndexBufferRHI));
}
IndexBufferRHI = nullptr;
}
virtual FBufferRHIRef AllocVertexBuffer(uint32 Stride, uint32 NumElements) override
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
uint32 BufferSize = GetVertexBufferSize(Stride, NumElements);
if (BufferSize > SmallestPooledBufferSize)
{
FBufferRHIRef Ref = GetSmallestFit(BufferSize, VertexBuffers);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (Ref.IsValid())
{
return Ref;
}
}
return FDynamicMeshBufferAllocator::AllocVertexBuffer(Stride, NumElements);
}
virtual void ReleaseVertexBuffer(FBufferRHIRef& VertexBufferRHI) override
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (VertexBufferRHI->GetSize() > SmallestPooledBufferSize)
{
VertexBuffers.Add(MoveTemp(VertexBufferRHI));
}
VertexBufferRHI = nullptr;
}
};
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
class FStagingBufferPool
{
public:
FTexture2DRHIRef CreateStagingBuffer_RenderThread(FRHICommandListImmediate& RHICmdList, int32 Width, int32 Height, EPixelFormat Format, bool bIsSRGB)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
TRACE_CPUPROFILER_EVENT_SCOPE(CreateStagingBuffer_RenderThread)
auto StagingBufferPredicate =
[Width, Height, Format, bIsSRGB](const FTexture2DRHIRef& Texture2DRHIRef)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
return Texture2DRHIRef->GetSizeX() == Width && Texture2DRHIRef->GetSizeY() == Height && Texture2DRHIRef->GetFormat() == Format && bool(Texture2DRHIRef->GetFlags() & TexCreate_SRGB) == bIsSRGB;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
};
// Process any staging buffers available for unmapping
{
TArray<FTexture2DRHIRef> ToUnmapLocal;
{
FScopeLock Lock(&ToUnmapLock);
ToUnmapLocal = MoveTemp(ToUnmap);
}
for (int32 Index = 0, Num = ToUnmapLocal.Num(); Index < Num; ++Index)
{
RHICmdList.UnmapStagingSurface(ToUnmapLocal[Index]);
Pool.Add(MoveTemp(ToUnmapLocal[Index]));
}
}
// Find any pooled staging buffer with suitable properties.
int32 Index = Pool.IndexOfByPredicate(StagingBufferPredicate);
if (Index != -1)
{
FTexture2DRHIRef StagingBuffer = MoveTemp(Pool[Index]);
Pool.RemoveAtSwap(Index);
return StagingBuffer;
}
TRACE_CPUPROFILER_EVENT_SCOPE(RHICreateTexture2D)
FRHIResourceCreateInfo CreateInfo(TEXT("FStagingBufferPool_StagingBuffer"));
ETextureCreateFlags TextureCreateFlags = TexCreate_CPUReadback;
if (bIsSRGB)
{
TextureCreateFlags |= TexCreate_SRGB;
}
return RHICreateTexture2D(Width, Height, Format, 1, 1, TextureCreateFlags, CreateInfo);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
void ReleaseStagingBufferForUnmap_AnyThread(FTexture2DRHIRef& Texture2DRHIRef)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ReleaseStagingBufferForUnmap_AnyThread)
FScopeLock Lock(&ToUnmapLock);
ToUnmap.Emplace(MoveTemp(Texture2DRHIRef));
}
void Clear_RenderThread(FRHICommandListImmediate& RHICmdList)
{
TRACE_CPUPROFILER_EVENT_SCOPE(Clear_RenderThread)
for (FTexture2DRHIRef& StagingSurface : ToUnmap)
{
RHICmdList.UnmapStagingSurface(StagingSurface);
}
ToUnmap.Empty();
Pool.Empty();
}
~FStagingBufferPool()
{
check(Pool.Num() == 0);
}
private:
TArray<FTexture2DRHIRef> Pool;
// Not contented enough to warrant the use of lockless structures.
FCriticalSection ToUnmapLock;
TArray<FTexture2DRHIRef> ToUnmap;
};
struct FRenderItemKey
{
const FMeshData* RenderData;
const FIntPoint RenderSize;
FRenderItemKey(const FMeshData* InRenderData, const FIntPoint& InRenderSize)
: RenderData(InRenderData)
, RenderSize(InRenderSize)
{
}
bool operator == (const FRenderItemKey& Other) const
{
return RenderData == Other.RenderData &&
RenderSize == Other.RenderSize;
}
};
uint32 GetTypeHash(const FRenderItemKey& Key)
{
return HashCombine(GetTypeHash(Key.RenderData), GetTypeHash(Key.RenderSize));
}
}
void FMaterialBakingModule::StartupModule()
{
bEmissiveHDR = false;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Set which properties should enforce gamma correction
SetLinearBake(true);
// Set which pixel format should be used for the possible baked out material properties
PerPropertyFormat.Add(MP_EmissiveColor, PF_FloatRGBA);
PerPropertyFormat.Add(MP_Opacity, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_OpacityMask, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_BaseColor, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Metallic, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Specular, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Roughness, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Anisotropy, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Normal, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_Tangent, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_AmbientOcclusion, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_SubsurfaceColor, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_CustomData0, PF_B8G8R8A8);
PerPropertyFormat.Add(MP_CustomData1, PF_B8G8R8A8);
PerPropertyFormat.Add(TEXT("ClearCoatBottomNormal"), PF_B8G8R8A8);
// Register property customization
FPropertyEditorModule& Module = FModuleManager::Get().LoadModuleChecked<FPropertyEditorModule>("PropertyEditor");
Module.RegisterCustomPropertyTypeLayout(TEXT("PropertyEntry"), FOnGetPropertyTypeCustomizationInstance::CreateStatic(&FPropertyEntryCustomization::MakeInstance));
// Register callback for modified objects
FCoreUObjectDelegates::OnObjectModified.AddRaw(this, &FMaterialBakingModule::OnObjectModified);
// Register callback on garbage collection
FCoreUObjectDelegates::GetPreGarbageCollectDelegate().AddRaw(this, &FMaterialBakingModule::OnPreGarbageCollect);
}
void FMaterialBakingModule::ShutdownModule()
{
// Unregister customization and callback
FPropertyEditorModule* PropertyEditorModule = FModuleManager::GetModulePtr<FPropertyEditorModule>("PropertyEditor");
if (PropertyEditorModule)
{
PropertyEditorModule->UnregisterCustomPropertyTypeLayout(TEXT("PropertyEntry"));
}
FCoreUObjectDelegates::OnObjectModified.RemoveAll(this);
FCoreUObjectDelegates::GetPreGarbageCollectDelegate().RemoveAll(this);
CleanupMaterialProxies();
}
void FMaterialBakingModule::BakeMaterials(const TArray<FMaterialData*>& MaterialSettings, const TArray<FMeshData*>& MeshSettings, TArray<FBakeOutput>& Output)
{
// Translate old material data to extended types
TArray<FMaterialDataEx> MaterialDataExs;
MaterialDataExs.Reserve(MaterialSettings.Num());
for (const FMaterialData* MaterialData : MaterialSettings)
{
FMaterialDataEx& MaterialDataEx = MaterialDataExs.AddDefaulted_GetRef();
MaterialDataEx.Material = MaterialData->Material;
MaterialDataEx.bPerformBorderSmear = MaterialData->bPerformBorderSmear;
for (const TPair<EMaterialProperty, FIntPoint>& PropertySizePair : MaterialData->PropertySizes)
{
MaterialDataEx.PropertySizes.Add(PropertySizePair.Key, PropertySizePair.Value);
}
}
// Build an array of pointers to the extended type
TArray<FMaterialDataEx*> MaterialSettingsEx;
MaterialSettingsEx.Reserve(MaterialDataExs.Num());
for (FMaterialDataEx& MaterialDataEx : MaterialDataExs)
{
MaterialSettingsEx.Add(&MaterialDataEx);
}
TArray<FBakeOutputEx> BakeOutputExs;
BakeMaterials(MaterialSettingsEx, MeshSettings, BakeOutputExs);
// Translate extended bake output to old types
Output.Reserve(BakeOutputExs.Num());
for (FBakeOutputEx& BakeOutputEx : BakeOutputExs)
{
FBakeOutput& BakeOutput = Output.AddDefaulted_GetRef();
BakeOutput.EmissiveScale = BakeOutputEx.EmissiveScale;
for (TPair<FMaterialPropertyEx, FIntPoint>& PropertySizePair : BakeOutputEx.PropertySizes)
{
BakeOutput.PropertySizes.Add(PropertySizePair.Key.Type, PropertySizePair.Value);
}
for (TPair<FMaterialPropertyEx, TArray<FColor>>& PropertyDataPair : BakeOutputEx.PropertyData)
{
BakeOutput.PropertyData.Add(PropertyDataPair.Key.Type, MoveTemp(PropertyDataPair.Value));
}
for (TPair<FMaterialPropertyEx, TArray<FFloat16Color>>& PropertyDataPair : BakeOutputEx.HDRPropertyData)
{
BakeOutput.HDRPropertyData.Add(PropertyDataPair.Key.Type, MoveTemp(PropertyDataPair.Value));
}
}
}
void FMaterialBakingModule::BakeMaterials(const TArray<FMaterialDataEx*>& MaterialSettings, const TArray<FMeshData*>& MeshSettings, TArray<FBakeOutputEx>& Output)
{
UE_LOG(LogMaterialBaking, Verbose, TEXT("Performing material baking for %d materials"), MaterialSettings.Num());
for (int32 i = 0; i < MaterialSettings.Num(); i++)
{
if (MaterialSettings[i]->Material && MeshSettings[i]->MeshDescription)
{
UE_LOG(LogMaterialBaking, Verbose, TEXT(" [%5d] Material: %-50s Vertices: %8d Triangles: %8d"), i, *MaterialSettings[i]->Material->GetName(), MeshSettings[i]->MeshDescription->Vertices().Num(), MeshSettings[i]->MeshDescription->Triangles().Num());
}
}
RenderCaptureInterface::FScopedCapture RenderCapture(CVarMaterialBakingRDOCCapture.GetValueOnAnyThread() == 1, TEXT("MaterialBaking"));
TRACE_CPUPROFILER_EVENT_SCOPE(FMaterialBakingModule::BakeMaterials)
checkf(MaterialSettings.Num() == MeshSettings.Num(), TEXT("Number of material settings does not match that of MeshSettings"));
const int32 NumMaterials = MaterialSettings.Num();
const bool bSaveIntermediateTextures = CVarSaveIntermediateTextures.GetValueOnAnyThread() == 1;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
using namespace FMaterialBakingModuleImpl;
FMaterialBakingDynamicMeshBufferAllocator MaterialBakingDynamicMeshBufferAllocator;
FScopedSlowTask Progress(NumMaterials, LOCTEXT("BakeMaterials", "Baking Materials..."), true );
Progress.MakeDialog(true);
TArray<uint32> ProcessingOrder;
ProcessingOrder.Reserve(MeshSettings.Num());
for (int32 Index = 0; Index < MeshSettings.Num(); ++Index)
{
ProcessingOrder.Add(Index);
}
// Start with the biggest mesh first so we can always reuse the same vertex/index buffers.
// This will decrease the number of allocations backed by newly allocated memory from the OS,
// which will reduce soft page faults while copying into that memory.
// Soft page faults are now incredibly expensive on Windows 10.
Algo::SortBy(
ProcessingOrder,
[&MeshSettings](const uint32 Index){ return MeshSettings[Index]->MeshDescription ? MeshSettings[Index]->MeshDescription->Vertices().Num() : 0; },
TGreater<>()
);
Output.SetNum(NumMaterials);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
struct FPipelineContext
{
typedef TFunction<void (FRHICommandListImmediate& RHICmdList)> FReadCommand;
FReadCommand ReadCommand;
};
// Distance between the command sent to rendering and the GPU read-back of the result
// to minimize sync time waiting on GPU.
const int32 PipelineDepth = 16;
int32 PipelineIndex = 0;
FPipelineContext PipelineContext[PipelineDepth];
// This will create and prepare FMeshMaterialRenderItem for each property sizes we're going to need
auto PrepareRenderItems_AnyThread =
[&](int32 MaterialIndex)
{
TRACE_CPUPROFILER_EVENT_SCOPE(PrepareRenderItems);
TMap<FMaterialBakingModuleImpl::FRenderItemKey, FMeshMaterialRenderItem*>* RenderItems = new TMap<FRenderItemKey, FMeshMaterialRenderItem *>();
const FMaterialDataEx* CurrentMaterialSettings = MaterialSettings[MaterialIndex];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const FMeshData* CurrentMeshSettings = MeshSettings[MaterialIndex];
for (TMap<FMaterialPropertyEx, FIntPoint>::TConstIterator PropertySizeIterator = CurrentMaterialSettings->PropertySizes.CreateConstIterator(); PropertySizeIterator; ++PropertySizeIterator)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
FRenderItemKey RenderItemKey(CurrentMeshSettings, PropertySizeIterator.Value());
if (RenderItems->Find(RenderItemKey) == nullptr)
{
RenderItems->Add(RenderItemKey, new FMeshMaterialRenderItem(PropertySizeIterator.Value(), CurrentMeshSettings, &MaterialBakingDynamicMeshBufferAllocator));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
}
return RenderItems;
};
// We reuse the pipeline depth to prepare render items in advance to avoid stalling the game thread
int NextRenderItem = 0;
TFuture<TMap<FRenderItemKey, FMeshMaterialRenderItem*>*> PreparedRenderItems[PipelineDepth];
for (; NextRenderItem < NumMaterials && NextRenderItem < PipelineDepth; ++NextRenderItem)
{
PreparedRenderItems[NextRenderItem] =
Async(
EAsyncExecution::ThreadPool,
[&PrepareRenderItems_AnyThread, &ProcessingOrder, NextRenderItem]()
{
return PrepareRenderItems_AnyThread(ProcessingOrder[NextRenderItem]);
}
);
}
// Create all material proxies right away to start compiling shaders asynchronously and avoid stalling the baking process as much as possible
{
TRACE_CPUPROFILER_EVENT_SCOPE(CreateMaterialProxies)
for (int32 Index = 0; Index < NumMaterials; ++Index)
{
int32 MaterialIndex = ProcessingOrder[Index];
const FMaterialDataEx* CurrentMaterialSettings = MaterialSettings[MaterialIndex];
TArray<UTexture*> MaterialTextures;
CurrentMaterialSettings->Material->GetUsedTextures(MaterialTextures, EMaterialQualityLevel::Num, true, GMaxRHIFeatureLevel, true);
// Force load materials used by the current material
{
TRACE_CPUPROFILER_EVENT_SCOPE(LoadTexturesForMaterial)
Async Texture Compilation - Feature can be activated in the Experimental section of the Editor Settings - Replace Texture2D/TextureCube resources by placeholders until their PlatformData is ready - Add a utility class allowing to encapsulate raw field pointers without breaking compatibility - Protect PlatformData from unsafe access through encapsulation. - Protect texture's resource from race conditions between game and render threads through encapsulation. - This allows to get rid of FlushRenderingCommands and long game-thread stutters when Updating a texture's resource. - UpdateResource was never safe to call without a FlushRenderingCommands and multiple call-site are doing exactly that, this will fix those cases. - Those were probably undetected due to their low occurence rate under normal conditions but can easily be reproed during async texture compilation on 32 cores. - Force wait on required texture compilations for MaterialBaking, ProxyMesh, Thumbnail generation for disk usage - Wait on all textures compilation whenever a wait for all shaders compilation is requested for safety (i.e. screenshot) - Compile UI and heightmap textures with higher priority to reduce visual artefacts - Increase priority of texture that have been rendered to improve time-to-usefulness of the editor under low core count - Async compilation is disabled for -game / non-editor mode as there is currently no support for async bulk data loading from external files - Properly cancel async tasks when UTexture is garbage collected before the compilation is finished - Show progress when explicitly waiting on compilation - Changing the mip settings in the texture editor (or any settings requiring the running platform data to be recomputed) will now be processed asynchronously. DEBUGGING - Can be forcibly enabled/disabled through command-line via -asynctexturecompilation=[off, on, paused] - Can pause texture compilation using Editor.AsyncTextureCompilation = 2 or -asynctexturecompilation=paused - Can manually resume a specified amount of paused compilation using Editor.AsyncTextureCompilationResume [Num] - Can forcibly wait on all compilation using Editor.AsyncTextureCompilationFlushAll BENCHMARKS - 3m15s to 1m20s when loading Apollo_Terrain with no textures in DDC (AMD TR 3970X) - 6m45s to 1m11s when loading Apollo_Terrain with no textures in DDC (-corelimit=8) - 3m10s to 1m54s when lauching PIE on Apollo_Terrain with no textures in DDC (AMD TR 3970X) - 7m43s to 1m36s when lauching PIE on Apollo_Terrain with no textures in DDC (-corelimit=8) - 0m57s to 0m42s when importing Attic_NVIDIA.usd with no textures in DDC (AMD TR 3970X) - 2m14s to 0m35s when importing Attic_NVIDIA.usd with no textures in DDC (-corelimit=4) TESTS - Success on all material baking tests from EngineTests with -asynctexturecompilation=paused - Runned with -corelimit=1 all the way to unlimited - Cooking worked - Opening the texture editor/material editor will force the compilation to finish like expected. - Changing a setting in the texture editor will recompile async, even allowing to close the editor and continue doing other changes. - Unpausing the compilation will update the texture thumbnails properly. - Started with -asynctexturecompilation=paused, and then unpaused after a map loading, and then into a PIE session to stresstest UpdateResources. - Tested both dx11/dx12 - Vulkan fails on Fortnite even with -asynctexturecompilation=off because of Landscape weigthmap, not this CL. - Compiled and tested FortniteGame / UE4 / ShooterGame projects #rb Uriel.Doyon, Francis.Hurteau [CL 13694814 by danny couture in ue5-main branch]
2020-06-16 22:16:25 -04:00
FTextureCompilingManager::Get().FinishCompilation(MaterialTextures);
for (UTexture* Texture : MaterialTextures)
{
if (Texture != NULL)
{
UTexture2D* Texture2D = Cast<UTexture2D>(Texture);
if (Texture2D)
{
Texture2D->SetForceMipLevelsToBeResident(30.0f);
Texture2D->WaitForStreaming();
}
}
}
}
for (TMap<FMaterialPropertyEx, FIntPoint>::TConstIterator PropertySizeIterator = CurrentMaterialSettings->PropertySizes.CreateConstIterator(); PropertySizeIterator; ++PropertySizeIterator)
{
// They will be stored in the pool and compiled asynchronously
CreateMaterialProxy(CurrentMaterialSettings->Material, PropertySizeIterator.Key());
}
}
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
TAtomic<uint32> NumTasks(0);
FStagingBufferPool StagingBufferPool;
for (int32 Index = 0; Index < NumMaterials; ++Index)
{
TRACE_CPUPROFILER_EVENT_SCOPE(BakeOneMaterial)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
Progress.EnterProgressFrame(1.0f, FText::Format(LOCTEXT("BakingMaterial", "Baking Material {0}/{1}"), Index, NumMaterials));
int32 MaterialIndex = ProcessingOrder[Index];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
TMap<FRenderItemKey, FMeshMaterialRenderItem*>* RenderItems;
{
TRACE_CPUPROFILER_EVENT_SCOPE(WaitOnPreparedRenderItems)
RenderItems = PreparedRenderItems[Index % PipelineDepth].Get();
}
// Prepare the next render item in advance
if (NextRenderItem < NumMaterials)
{
check((NextRenderItem % PipelineDepth) == (Index % PipelineDepth));
PreparedRenderItems[NextRenderItem % PipelineDepth] =
Async(
EAsyncExecution::ThreadPool,
[&PrepareRenderItems_AnyThread, NextMaterialIndex = ProcessingOrder[NextRenderItem]]()
{
return PrepareRenderItems_AnyThread(NextMaterialIndex);
}
);
NextRenderItem++;
}
const FMaterialDataEx* CurrentMaterialSettings = MaterialSettings[MaterialIndex];
const FMeshData* CurrentMeshSettings = MeshSettings[MaterialIndex];
FBakeOutputEx& CurrentOutput = Output[MaterialIndex];
TRACE_CPUPROFILER_EVENT_SCOPE_TEXT(*CurrentMaterialSettings->Material->GetName())
TArray<FMaterialPropertyEx> MaterialPropertiesToBakeOut;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
CurrentMaterialSettings->PropertySizes.GenerateKeyArray(MaterialPropertiesToBakeOut);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const int32 NumPropertiesToRender = MaterialPropertiesToBakeOut.Num();
if (NumPropertiesToRender > 0)
{
TRACE_CPUPROFILER_EVENT_SCOPE(RenderOneMaterial)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Ensure data in memory will not change place passed this point to avoid race conditions
CurrentOutput.PropertySizes = CurrentMaterialSettings->PropertySizes;
for (int32 PropertyIndex = 0; PropertyIndex < NumPropertiesToRender; ++PropertyIndex)
{
const FMaterialPropertyEx& Property = MaterialPropertiesToBakeOut[PropertyIndex];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
CurrentOutput.PropertyData.Add(Property);
if (bEmissiveHDR && Property == MP_EmissiveColor)
{
CurrentOutput.HDRPropertyData.Add(Property);
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
for (int32 PropertyIndex = 0; PropertyIndex < NumPropertiesToRender; ++PropertyIndex)
{
const FMaterialPropertyEx& Property = MaterialPropertiesToBakeOut[PropertyIndex];
TRACE_CPUPROFILER_EVENT_SCOPE_TEXT(*Property.ToString())
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
FExportMaterialProxy* ExportMaterialProxy = CreateMaterialProxy(CurrentMaterialSettings->Material, Property);
if (!ExportMaterialProxy->IsCompilationFinished())
{
TRACE_CPUPROFILER_EVENT_SCOPE(WaitForMaterialProxyCompilation)
ExportMaterialProxy->FinishCompilation();
}
// Lookup gamma and format settings for property, if not found use default values
const EPropertyColorSpace* OverrideColorSpace = PerPropertyColorSpace.Find(Property);
const EPropertyColorSpace ColorSpace = OverrideColorSpace ? *OverrideColorSpace : DefaultColorSpace;
const EPixelFormat PixelFormat = PerPropertyFormat.Contains(Property) ? PerPropertyFormat[Property] : PF_B8G8R8A8;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// It is safe to reuse the same render target for each draw pass since they all execute sequentially on the GPU and are copied to staging buffers before
// being reused.
UTextureRenderTarget2D* RenderTarget = CreateRenderTarget((ColorSpace == EPropertyColorSpace::Linear), PixelFormat, CurrentOutput.PropertySizes[Property]);
if (RenderTarget != nullptr)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Perform everything left of the operation directly on the render thread since we need to modify some RenderItem's properties
// for each render pass and we can't do that without costly synchronization (flush) between the game thread and render thread.
// Everything slow to execute has already been prepared on the game thread anyway.
ENQUEUE_RENDER_COMMAND(RenderOneMaterial)(
[this, RenderItems, RenderTarget, Property, ExportMaterialProxy, &PipelineContext, PipelineIndex, &StagingBufferPool, &NumTasks, bSaveIntermediateTextures, &MaterialSettings, &MeshSettings, MaterialIndex, &Output](FRHICommandListImmediate& RHICmdList)
{
const FMaterialDataEx* CurrentMaterialSettings = MaterialSettings[MaterialIndex];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const FMeshData* CurrentMeshSettings = MeshSettings[MaterialIndex];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
FMeshMaterialRenderItem& RenderItem = *RenderItems->FindChecked(FRenderItemKey(CurrentMeshSettings, FIntPoint(RenderTarget->GetSurfaceWidth(), RenderTarget->GetSurfaceHeight())));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
FSceneViewFamily ViewFamily(FSceneViewFamily::ConstructionValues(RenderTarget->GetRenderTargetResource(), nullptr,
FEngineShowFlags(ESFIM_Game))
.SetTime(FGameTime())
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
.SetGammaCorrection(RenderTarget->GetRenderTargetResource()->GetDisplayGamma()));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
RenderItem.MaterialRenderProxy = ExportMaterialProxy;
RenderItem.ViewFamily = &ViewFamily;
Copying //UE4/Dev-AnimPhys to //UE4/Dev-Main (Source: //UE4/Dev-AnimPhys @ 3624379) #lockdown Nick.Penwarden #rb none ============================ MAJOR FEATURES & CHANGES ============================ Change 3536809 by Ben.Marsh Fixing case of files in "iOS" directory, pt 1. Change 3536814 by Ben.Marsh Fixing case of files in "iOS" directory, pt 2. Change 3596207 by Thomas.Sarkanen Copying //Tasks/UE4/Dev-UEAP-29-PhATUpgrade to Dev-AnimPhys (//UE4/Dev-AnimPhys) @ CL 3590250 PhAT Upgrade #jira UEAP-29 - New PhysicsAsset editor Changelists from task stream: Change 3380649 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Initial pass at allowing viewports to be extended more easily, still plenty TOD, but just unearthing this old shelf and getting it working. This gets the Persona skeleton tree and viewport into PhAT, without any PhAT functionality added. Change 3380685 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Renaming PhAT files to PhysicsAssetEditor Change 3380749 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Rename PhAT -> PhysicsAssetEditor Change 3380832 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed up PhAT to Physics Asset Editor Change 3380884 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Reverted some over-zealous renaming Change 3380970 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Tweaked ISkeletonTreeBuilder interface to make way for actually making a derived class of it Added the ability to hide filter menus to skeleton tree Change 3381017 on 2017/04/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added new physics asset skeleton tree builder Change 3384407 on 2017/04/07 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Skeleton tree extensions to support physics assets Only started this work - still much to do Change 3384460 on 2017/04/07 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Rearranged persona viewport menus Change 3392222 on 2017/04/13 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed body/constraint modes. Added graph editor Added edit mode - moved viewport client code over Got PhAT skel mesh rendering in viewport Change 3392268 on 2017/04/13 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Increased hit proxy priority to improve selection Change 3401648 on 2017/04/20 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Skeleton tree gets bodies & shapes back. Selection works in graph, now displaying the correct constraint in the detials panel. Still need to add selection from viewport. Added multi-select to bone proxy customization Re-tweaked editor layout Change 3403701 on 2017/04/21 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Selection sync work. Customization of anim viewport menus. Context menus for physics asset items, as well as masking of various context menu items via settings. Change 3405246 on 2017/04/24 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Started more work on viewport menu extensions, but need to refactor the toolbar system to use actual multiboxes. Up next! Change 3405274 on 2017/04/24 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen More viewport menu fixups (plus deleting duplicate functionality). Change 3409155 on 2017/04/26 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Got simulation working again - as we switched to the debug skel mesh comp, the normal tick path didnt work for post-blend physics (it tried to flip the buffer too early). Also tweaked debug skel mesh comp root motion consumption code to not reset transfor every frame if we are not using root motion. Cleaned up unused files & code Change 3410814 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Allow extensibility of viewport menu bars Slate changes: Allow menu bars to optionally specify an icon to use. This is intended to allow us to move viewport tool/menu bars over to use multibox, with all the attendant features and extension points. Allow menu bars to optionally invert-on-hover. Allow styling of menus to affect closed appearance of menu header. Previously only NoBorder was used. Adjusted core styling of menu bar elements. Other changes: Adjusted padding for various UI elements to preserve previoud behavior. Adjusted SAnimViewportToolbar to use the new menu bar builder. Exposed SEditorViewportViewMenu so that it can be used in a standard menu bar. Change 3410816 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added extension point to viewport menu bar Change 3410818 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Getting sim working again Moved over to using preview instance so we share functionality with Persona editors. Added time dilation options to persona preview scene. Removed PhAT specific recording functionality (it is in the viewport now). Change 3410840 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Recreate physics state on edit, not sim start This allows velocity to be inherited when simulation is started Change 3410863 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moving viewport to continually-invalidated one like animation editors Fixed crash in non-extended viewport toolbars Change 3410936 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Bodies start off non-expanded Selection now synced between viewport and graph Constraint selection in graph not works on the first try Change 3410943 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added missing icon Change 3410966 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed shape listing from graph nodes Change 3411013 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Double click on body node recenters graph Fixed graph disappearing on right-click Change 3411111 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Prevented cursor getting swallowed in sim mode Change 3411126 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed overlapping text Change 3411213 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Node layout now takes dimensions into account Change 3411320 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed crash opening Persona editors Renamed file Change 3411327 on 2017/04/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Tweaks to profiles menu Change 3420822 on 2017/05/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Profiles can now be edited in their own details panel Existing customizations folded into the new panel Tweaks to toolbar Added the ability for the persona details panel to have extra top/bottom content added Change 3420832 on 2017/05/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Add profile control to context menus Also delete old unused code Change 3422651 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Toolbar trimmed down & re-ordered Body/constraint ops moved to context menus Apply physmat now a context-menu option with an asset picker Change 3422654 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed extra warning dialog when auto-creating bodies Changed title of new asset dialog to "auto-create bodies" Change 3422680 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix "simulate selected" As we dont re-init the physics state each time we start simulating, our tweaked physics type was never applied. We now manually do this in the editor. Change 3422937 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Replaced EKCollisionPrimitiveType with EAggCollisionShape::Type Fixed up selection so body selection works & tree seleciton is properly synced with viewport Added recursion guard to selection delegate handlers. Removed vestigial instance property editing support (no longer needed). Removed unused old tree support code Change 3423034 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added constraints to tree Change 3423318 on 2017/05/04 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix bone proxiies stopping updating after initial viewport selection Change 3424993 on 2017/05/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed up selection issues when creating new bodies Added constraint context menu Change 3424998 on 2017/05/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moved icons to central location Change 3425445 on 2017/05/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Customized filtering of the skeleton tree Hide constraints by defualt Added option to hide parents when filtering (so the vertical space is nto wasted, but some idea of hierarchy is preserved). BREAKING CHANGE: changed skeleton tree filtering API to add args & removed bWillFilter bool. Change 3425488 on 2017/05/05 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Merge-Thomas.Sarkanen Merging //UE4/Dev-AnimPhys to Dev-UEAP-29-PhATUpgrade (//Tasks/UE4/Dev-UEAP-29-PhATUpgrade) @ CL 3425303 Change 3427886 on 2017/05/08 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moved physics sim options to viewport menu (so seleciton changing is not required to change them) Moved physics-related rendering options to show menu We no longer switch to sim options when nothing is selected. During simulation we now disable the details panel Constraint scaling now works correctly (rather than just scaling the screen size limit that axes only are rendered) Change 3428040 on 2017/05/08 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Small fixes based on feedback: Exposed Mirror tool to menus Exposed constraint quick actions to menus Added edit condition to Position & Velocity strength for physical animation Fixed up some tooltips & display names Change 3428143 on 2017/05/08 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Defaulted to constraints as points Change 3428216 on 2017/05/08 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Request from Nick D: Update in-level primitive transforms immediately, rather than on mouse up. We only do this for non-convex primitives however, to avoid re-cooking meshes. Change 3430326 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Tweaks to rendering of constraints and shapes to allow for better seleciton & interaction with editor widgets. Slightly increased point-constraint rendering size and added crosshair cursor to constraints Change 3430327 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed object-reuse issue in skeleton tree items with sanem names (use a GUID instead) Change 3430391 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed duplicate time dilation (can just use viewport menu!) Change 3430419 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixup post-merge Prevent crash by attaching to root component in the correct place Add IWYU include for TArrayView Remove more unused code Change 3430443 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix constraint/body selection one final time Move constraint drawing to SDPG_World (apart from point mode) Remove depth offset in material Change 3430495 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Enabling/disabling collision between bodies is now clearer Menu items are now enabled and disabled correctly depending on collision state Tooltip reflects what actually gets done when the operation is enacted Also corrected a few functions that still reference constraint & body mode Change 3430553 on 2017/05/09 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added enable/disable collision with all Change 3432386 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Color code graph items based on current profile Change 3432401 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Color code tree items too Change 3432418 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Bone selection & manipulation now possible - allows for pose setup before simulation Item expansion now expands leaf nodes when selecting - helps with constraint selection etc. Change 3432427 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix compile error Color code according to simulated/kinematic status Change 3432428 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen File i missed Change 3432540 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added physics asset factory so physics assets can be created form the "new asset" menu. Skeletal mesh is picked then a defualt asset is generated Change 3432556 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Improve interactions with bones & bodies Clear bone selection when selecting bodies/constraints Always hide gizmo in simulate Change 3432703 on 2017/05/10 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed unused selection lock feature Fixed selection working incorrectly with details panel closed Change 3434710 on 2017/05/11 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Selection improvements Multiselect in tree now only selects non-collapsed tree elements Selection API revamped in shared data, so multiselect of constraints can work correctly (they appear more than once in the tree, so the preivous single-point-of-access API was insufficent). Change 3489030 on 2017/06/14 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Merge-Thomas.Sarkanen Merging //UE4/Dev-AnimPhys to Dev-UEAP-29-PhATUpgrade (//Tasks/UE4/Dev-UEAP-29-PhATUpgrade) @ CL 3488994 Change 3491459 on 2017/06/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixup post-merge issues Change 3491486 on 2017/06/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Simulation now works in a simlar way to the level editor Only on 'simulate' button, which controls repeating the last simulation (be it selected or not). Options are on a dropdown. Change 3491529 on 2017/06/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed selection color of wireframe drawing (this broke ages ago!) Fixed initialized environment color/intensity Change 3491537 on 2017/06/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Tweaked materials so they dont repend on seperate translucency (which is optional, and disabled currently) Change 3491791 on 2017/06/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix crash when simulating selected new bodies Make sure we recreate physics state appropriately (it used to be done on simulation start, so wasnt needed each time) Change 3494359 on 2017/06/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Select all is now a menu option Context menu pops when right-clicking nothing now too Menu no longer grows enormous when multiple types of objects are selected Change 3494373 on 2017/06/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Enlarged constraint rendering size Show constraints (rather than points) by default Change 3511708 on 2017/06/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Physics Assets now appear in the asset family shortcut bar Physics Assets now render thumbnails Skeleton tree can now work in 'picker' mode Constraints can now be created manually in the graph, tree and viewport Fixed double-click and mousewheel not working right sometimes Change 3513121 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed clicks incorrectly selecting bones in simulate mode Change 3513160 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Physics Asset config is now loaded/saved Fixed antoher corner case with viewport clicks in sim Change 3513540 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moved body creation params over to a details panel & settings object Moved initial creation dialog over to use the new system too Change 3513591 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Renamed shapes and constraints in the tree view Change 3513752 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Constraints are now not filtered by default Change 3513797 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Selecting constraints now shows them (and the bodies involved) in the graph Change 3513859 on 2017/06/28 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed "Show Kinematic Bodies" We now always show kinematic status in simulate mode Change 3515732 on 2017/06/29 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen PhAT rendering settings are now persisted across sessions. Access to sim/edit settings is now not gated on state of the editor. Sim/edit settings are always both available. Added editable opacity to collision rendering. Change 3515735 on 2017/06/29 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen New materials with opacity parameter Change 3515757 on 2017/06/29 by thomas.sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Re-saved materials Change 3515759 on 2017/06/29 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added ability to only show selected bodies as solid Change 3515812 on 2017/06/29 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix focus 'F' shortcut sometimes not working Change 3515984 on 2017/06/29 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix a bunch of selection issues with the graph not keeping in sync Change 3517456 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Merge-Thomas.Sarkanen Merging //UE4/Dev-AnimPhys to Dev-UEAP-29-PhATUpgrade (//Tasks/UE4/Dev-UEAP-29-PhATUpgrade) @ CL 3516853 Change 3517514 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed disappearing convex meshes on simulate Also fixes crash in thumbnail rendering Change 3517556 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Disabled selection on mesh. Fixes selection issues. Also made the hit proxy use a crosshair when over bodies, for easier selection Change 3517642 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added body/body collision buttons back to the main toolbar Fixed solid body drawing using the wrong material when no bodies are selected Change 3517828 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix delete shortcut not working when tree is focused Change 3517927 on 2017/06/30 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Integrated per-bone primitive generation with the new tab method Removed context menu item for bones (fixes duplicate popup) Fixed undo/redo not working for regenerating all bodies Change 3519931 on 2017/07/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Disabled body regeneration when simulation is running Fixed up tab icons Change 3519978 on 2017/07/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Preview mesh is now set like every other Persona editor (via toolbar picker of via preview scene settings) Animation picker removed from toolbar (we use the preview scene settings for this now) Fixed profiles tab icon Change 3519982 on 2017/07/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Show attached assets in tree Change 3519995 on 2017/07/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix broken multi-selection of bone proxies Change 3532799 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed code that prevented parts of the UI (like simulation) from working in PIE Removed graph overlays & added "PHYSICS" label Change 3532837 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed arrows from graph Fixed dragging off constraints/input pins/bodies in constraint-created graphs Constraint names now include both bodies Change 3532880 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Switched from colors to icons in the skeleton tree Removed bold fonts Change 3532907 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Layout fixes Added border around generate button in tools panel Removed skeleton tree header in contexts where it is not needed Change 3532932 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added slow task dialog for body generation Change 3532992 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Rearranged context menus to be not so huge Change 3533134 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Rearranged menus some more Change 3533135 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Colorized details customization of swing/twist items Change 3533174 on 2017/07/12 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Auto-open assets when creating from skeletal mesh Tweaked tooltip on suggestion from Nick D Change 3535652 on 2017/07/13 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed mirroring changes not showing up straight away Change 3535731 on 2017/07/13 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moved over to Persona-style floor adjustment Change 3539689 on 2017/07/17 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Tweaked tooltips for filtering items Change 3539693 on 2017/07/17 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added "deselect all" option (Esc) Change 3539731 on 2017/07/17 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Graph selection tweaks Selected bodies in the viewport/tree are now also selected in the graph. Selection outline is now matched to the graph outline instead of using default outline. Pin allocation no longer happens twice Change 3539750 on 2017/07/17 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Switched simulate shortcut to Alt+Enter Avoids conflict with clobal PIS/SIE shortcuts Change 3539933 on 2017/07/17 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Minor body regeneration refactor Label for tools tab button is dynamic depending on selection context Generation setttings are now re-used by creation dialog too Added in per-bone and per-body regeneration menu items. Bone regeneration now deletes the old body(s) instead of aborting Change 3543884 on 2017/07/19 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Resetting animation to default now correctly applies the animation Change 3544101 on 2017/07/19 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed up physics asset editor's use of debug skel mesh component This broke post-merge from Dev-AnimPhys. Kinda hacky, but we need to double-flip the buffers in this case as we want to force non-threaded work AND also wait on the physics tick group to complete (to blend in physics). This also requires making ShouldBlendPhysicsBones protected, otherwise the buffers are never flipped in the non=simulating case (before simulation is enabled in the physics asset editor). Change 3547893 on 2017/07/21 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Moved code to add/remove/assign/unassign profiles to details customization Also allowed dupication again (via the menu) Allows correct naming of new profiles as before (as this is handled in PostEdit) #jira UE-47448 - Deleting profiles in Physics Asset Editor does not update the current profile #jira UE-47514 - Unable to duplicate profiles in Physics Asset Editor #jira UE-47384 - New profiles in Physics Asset Editor are all named the same #jira UE-47375 - Physics Asset Editor 'None' current profile Delete option is available #jira UE-47378 - Current Profile name boxes in Physics Asset Editor are size limited and overlap buttons if too long #jira UE-47374 - Physics Asset Editor 'None' current profile text box is editable but doesn't save Change 3547925 on 2017/07/21 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Prevented ctrl+selection of constraints from re-selecting Avoided defered broadcast of seleciton event from the graph #jira UE-47515 - Ctrl + click and Shift + click does not remove constraints from skeleton tree in Physics Asset Editor Change 3550332 on 2017/07/24 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed bodies incorrectly simulating outside of 'simulate' mode Forced all bodies to be non-simulated when simulation is disabled. Also removed non-functioning motor menu options & disabled more menu options when simulating #jira UE-47579 - Entire mesh rotates uncontrollably after rotating a simulated body in Physics Asset Editor Change 3550355 on 2017/07/24 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed crash when failing to create a physics asset with multi convex hull #jira UE-47590 - Crash when New Physics Asset window is closed with no asset being created Change 3558007 on 2017/07/27 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed typo that disabled editability of profile names incorrectly #jira UE-47374 - Physics Asset Editor 'None' current profile text box is editable but doesn't save Change 3566157 on 2017/08/01 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed crash when opening a physics asset with a deleted preview skeletal mesh Now assigns default mesh as before If the mesh is then reset, the asset editor must be re-opened as the skeleton will have changed underneath it. #jira UE-47918 - Crash when opening certain Physics Assets Change 3568327 on 2017/08/02 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Prevent "set bodies below" from improperly enabling simulation on bodies #jira UE-47752 - Set all bodies below to simulated causes the viewport to simulate those bodies immediately in Physics Asset Editor Change 3570436 on 2017/08/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Physics assets with simulated bodies no longer simulate when first opened #jira UE-48000 - Physics assets with simulated bodies begin simulating when first opened Change 3570470 on 2017/08/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix excessive gravity crash when actors pop out of the world Also restrict gravity to non NaN-causing levels. #jira UE-48002 - Crash when mesh falls out of world due to high gravity simulation in Physics Asset Editor Change 3570717 on 2017/08/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Merge-Thomas.Sarkanen Merging //UE4/Dev-AnimPhys to Dev-UEAP-29-PhATUpgrade (//Tasks/UE4/Dev-UEAP-29-PhATUpgrade) @ CL 3570581 Change 3570781 on 2017/08/03 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix merge issues Change 3587760 on 2017/08/15 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed delegate for skeleton tree context menu extension, now uses an empty section Change 3589915 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Added comments to bone proxy & physics asset editor shared data Removed unused variables Change 3589976 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixed constraint 'all positions' rendering Removed empty override of unregister tab spawners Change 3589983 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fix crash when setting skeletal mesh Toast is not displayed when the skeleton is changed as well as the skeletal mesh. Toolkit was getting invalidated as setting the preview mesh to a different skeleton ends up restarting the sub-editor #jira UE-48196 - Crash when changing preview mesh of Physics Asset and applying Change 3589990 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Physics asset selection color now uses editor settings Change 3589994 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed unused functions Change 3589997 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Commented SetBodiesBelowPhysicsType as per code review Change 3590007 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Disabled physical material menu in simulate Change 3590130 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Removed unused code Commented a few functions Re-instated preview mesh selection Removed delegate allowing viewport client class creation Change 3590154 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Remove unused code Change 3590197 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Merge-Thomas.Sarkanen Merging //UE4/Dev-AnimPhys to Dev-UEAP-29-PhATUpgrade (//Tasks/UE4/Dev-UEAP-29-PhATUpgrade) @ CL 3589965 Change 3590250 on 2017/08/16 by Thomas.Sarkanen@Dev-UEAP-29-PhATUpgrade-Thomas.Sarkanen Fixup merge errors Change 3596227 by Jonathan.Poncelet Fixed physics substepping interpolation using the wrong starting value. #jira UE-48150 Physics Substepping doesn't have the same effect from 4.15 to 4.16 Change 3596241 by Jonathan.Poncelet Fixed cloth not being drawn correctly in the editor, due to bounds not being computed accurately. #jira UE-48243 Clothing disappears during cloth paint mode once you navigate to a section far from the origin Change 3596247 by Thomas.Sarkanen Fixup CIS errors post PhAT Upgrade merge Change 3596250 by Thomas.Sarkanen More CIS fixes Change 3596255 by Benn.Gallagher Fixed compilation errors when nativizing animation blueprints that use subinstances #jira UE-46522 Change 3596256 by Benn.Gallagher Fixed orphaned sub anim instance pins hanging around #jira UE-46545 Change 3596257 by Benn.Gallagher Fixed skel surf particles being misplaced when clothing was active. And fixed particles spawning on disabled cloth proxy sections. #jira UE-48045 Change 3596258 by Benn.Gallagher Hide mass override when selecting skeletal meshes. Mass overrides are taken from physics asset and will be ignored on the component so it makes no sense to have this visible #jira UE-47755 Change 3596259 by Benn.Gallagher Fixed mismatch between paint values and view values for clothing tools #jira UE-48110 Change 3596260 by Benn.Gallagher Stopped property context menus killing the whole window stack when an item is clicked #jira UE-48158 Change 3596261 by Thomas.Sarkanen One last Mac CIS fix (hopefully) Change 3596308 by Benn.Gallagher Removed outdated references to APEX in clothing example map. Change 3596360 by Martin.Wilson Fixing inconsistent animation entries in blueprint context menu (displaying differently depending on whether the asset is loaded) + Cache correct tooltip when asset isn't loaded #jira UE-48452 Change 3596459 by Benn.Gallagher Fixed anim curves not correctly being updated to post process instances. Change made to curve update in Dev-General fixed main and sub instances but missed post process instances. #jira UE-47567 Change 3596967 by Aaron.McLeran Adding setting default reverb send level in audio settings. Change 3596974 by Ethan.Geller Merge in fix from Christopher Oliver Change 3597243 by Aaron.McLeran Checking in missing files. Change 3597686 by Ethan.Geller Fix warnings/errors from CL 3597452 Change 3597846 by Ethan.Geller Fix errors, take 2 Change 3598290 by Ethan.Geller Panning Angle Issue Change 3598412 by Ethan.Geller Change Core.h header to CoreMinimal.h, fix warnings Change 3599797 by Jurre.deBaare LODs from Merge Actor tool have bad normals #jira UE-47129 #fix normals weren't wrong but user was complaining about the lightmap resolution behaviour, so added a new feature that calculates the lightmap resolution according to: 1) Summing all lightmap pixel counts for each mesh component being merged 2) Calculating fitting texture dimension by taking square root of the total pixels Change 3599863 by Lina.Halper PR #3919: rename flag 'DEPERCATED_PHYSBLEND_UPDATES_PHYSX' to 'DEPRECATED_PHYSBLEND_UPDATES_PHYSX' to fix the typo (Contributed by aziot) Change 3599883 by Jurre.deBaare HLOD: update outliner tooltip when UE docs arrive #jira UE-20352 Change 3599944 by Martin.Wilson Smart name refactor - Remove guids entirely - Remove automatic fix up - Simplify smart name mapping container - Make animations deterministic for cooking #jira UEAP-264 Change 3600133 by Benn.Gallagher Fixed crash shutting down editor with active cloth paint tab, as mode manager was being used unsafely. #jira UE-48612 Change 3600166 by Benn.Gallagher Fixed cloth paint gradient allowing invalid values #jira UE-48114 Change 3600719 by Lina.Halper PR #3894: PlayMontage node bug Fix (Contributed by ArCorvus) Change 3601668 by Jurre.deBaare Improve BlendSpace preview pin dragging controls #fix Click and drag now also works for the preview pin which should allign it with other pins on the grid and makes the preview functionality more discoverable #misc Also added tooltips on the grid to make the functionality more discoverable #jira UE-43011 Change 3601669 by Jurre.deBaare No easy way to tell which Blend Sample in the blend graph matches up to which Blend Sample in the Asset Details panel #fix I've added the SampleIndex to the names to make it easier recognizing which one is which #jira UE-46892 Change 3601731 by Benn.Gallagher Fixed cloth paint falloff to actually calculate falloff, and take brush strength into account. #jira UE-48329 Change 3601897 by Lina.Halper fixing issue with sequencer reinitialization #jira: UE-48556 Change 3602339 by Benn.Gallagher Fixed comment/tooltip typo Change 3602502 by Benn.Gallagher Fixed clothing gradient tool renderer not showing selected points when camera was moving #jira UE-48331 Change 3602664 by Ethan.Geller Unshelved fixes from Dev-VR Change 3602726 by Lina.Halper Back out revision 3 from //UE4/Dev-AnimPhys/QAGame/QAGame.uproject #jira: UE-48700 Change 3603011 by Lina.Halper Fix build error Change 3604139 by Benn.Gallagher Restricted painter processing to no longer attempt painting while in simulation previews in cloth paint mode. #jira UE-47960 Change 3604284 by Benn.Gallagher Fixed crashes in physics asset editor and skeletal mesh editor when the preview scene clears out the preview mesh while clothing is running #jira UE-48687 Change 3604612 by Lina.Halper Fix curve issue from automation test - It was actual bug. Change 3604614 by Lina.Halper - Fix crash with macro anim notify - Make sure macro anim notify doesn't show up in the menu #jira: UE-45036 Change 3604725 by Lina.Halper fixed issue with opening state machine from anim graph #jira: UE-48726 Change 3604971 by Aaron.McLeran #jira UE-48738 Launching Oculus Rift without -VR plays audio in the oculus rift. Bringing fix from 4.17 to Dev-AnimPhys Change 3605787 by Aaron.McLeran Adding ability to pass in an optional owner in PlaySound2D and PlaySoundAtLocation BP calls - This is necessary in order to use the sound concurrency "limit by owner" feature Change 3606851 by Jurre.deBaare UE4Editor Static Analysis Win64 - Warning fix Change 3607022 by Lina.Halper Fix static analysis warning Change 3607229 by Jurre.deBaare RemoveAllCurveData should not allow removing data from the Skeleton #jira UE-48107 Change 3607660 by Martin.Wilson Live link client can run in cooked builds too #jira UEAP-306 Change 3607668 by Ethan.Geller #jira UE-48792 fix null dereference case in audiodevice.cpp Change 3607734 by Lina.Halper LOD linking to curve - consolidated to one param - curve eval option - for long time, looking at why morphtarget wasn't working on LOD 1, later realized it was due to simplified :( - fixed to make sure param to clear is always checking with default value - this is correct behavior and it's not too bad for perf because internally the default value is also in the TMap - flipped meaning to align with bAllowCurveEvaluation - also fixed issue with orion cooking - where transform curves are added as normal curves #jira: UE-37996, UE-48782 Change 3607859 by Martin.Wilson Missed files from live link editor checkin Change 3607958 by Martin.Wilson Redo Jurre's changes from CL 3607229 (were removed by CL 3607734) Change 3608566 by Ethan.Geller change include to avoid header conflicts on Linux Change 3609074 by Ethan.Geller Take 2: Fix capitalization on include, fix Linux build. Change 3610024 by Lina.Halper Fix issue with material editor crashing due to missing load module of AdvancedPreviewScene - we used to load advanced preview setting by persona module - this has been moved to persona tool kit, and now all other modules are crashing - If we want to do it for tool kit, we have to make sure all other editor's loading should change also. #jira: UE-48809 Change 3610081 by Jurre.deBaare Animations can't be set on blend samples from the dropdown #fix Skeleton asset registry tag now includes 'AssetTypeName' PathToAsset, so replacing compare with contains #jira UE-48746 Change 3610088 by Jurre.deBaare Editor crashes if you CtrlZ several times after adding animations to a 1D blendspace #fix removed the hacky OnObjectPropertyChanged and tied the refresh into propertyhandles instead #misc found out of sync widget values due to incorrect encapsulation inside of lambdas #jira UE-48741 Change 3610862 by Ethan.Geller Fix submix effects for situations where number of input channels does not equal output channels Change 3611346 by Aaron.McLeran Using audio thread platform affinity mask for audio render thread. Change 3613297 by Ethan.Geller Simple delay submix Change 3614435 by Martin.Wilson CIS fix Change 3614482 by Martin.Wilson Store root motion on anim instance instead of proxy to avoid thread safety stalls #jira UE-46896 Change 3614483 by Martin.Wilson Evaluate curves in anim offsets #jira UE-47119 Change 3614495 by Jurre.deBaare Reimport alembic file with new source option does not automatically tick any tracks #fix If no tracks are set to import, reset them all to do so (we're assuming here the user is importing something completely different, and we wouldn't want her to import an empty animation either) #jira UE-46141 Change 3614645 by Thomas.Sarkanen Fixed physics assets not simulating when BlockAll was globally overridden Persona viewport was overriding the collision profile back to BlockAll, which projects can override. Setting to the internal PhysicsActor profile prevents this, as it used to in PhAT #jira UE-48591 - Physics assets not simulating correctly in Orion Change 3614683 by Lina.Halper Fixed crash when modifying default physicsasset #jira: UE-48844 Change 3614721 by Jurre.deBaare Vertex painting on skeletal meshes bound by physics asset #fix Now try and find intersecting triangle if we do hit the mesh bounds, but not any physics bodies #jira UE-48004 Change 3614730 by Thomas.Sarkanen Fixed crash when regenerating multi convex hulls from zero-vert bones We handled this in the single convex hull case, but multi did not. #jira UE-48780 - Editor crashes if you regenerate a box body to a complex hull body Change 3614763 by Jurre.deBaare Moving over: HLOD crash when dragging and dropping actors into their own cluster in the HLOD outliner - ALODActor #jira UE-48249 #fix ensure that we nullptr check the static mesh as a LODActor can be reset to have a null static mesh Change 3615029 by Lina.Halper Fix issue with highlight #jira: UE-48855 Change 3617593 by Thomas.Sarkanen Fixed crash when regenerating large amounts of bodies We were overflowing the PhysX shape limit for aggregates - this refers to shapes, not bodies, it seems #jira UE-48606 - Crash when adding new multi convex hull body to bone on skeleton that already has multi convex hull bodies Change 3617609 by Jonathan.Poncelet Fixed crash that could occur when opening a physics asset and deleting bones. #jira UE-48971 Editor crashes if you clear a preview mesh on a physics asset and delete the bones when reopening it Change 3617723 by Thomas.Sarkanen Prevented actors & components of anim preview scenes (and the preview scenes themselves) from persisting after editors are shut down Fixed up 2 locations where the persona toolkit was being held onto by a strong ptr (cloth paint and new PhAT). This should stop the preview scene from persisting. Moved AddToRoot pattern used for anim preview scene to FGCObject #jira UE-47227 - [CrashReport] UE4Editor_Persona!TSharedPtr<IEditableSkeleton,0>::ToSharedRef() [sharedpointer.h:794] #jira UE-47717 - SkelMesh Editor creates preview World, but it never gets destroyed Change 3617818 by Benn.Gallagher Final v1 UX changes for clothing tool, and removed experimental flag Change 3617937 by Jurre.deBaare Default bounds for Alembic skel-mesh are too large #fix bounds was initialised to zero and +-ed which meant that it would always include (0,0,0) and enlarge the bounds #jira UE-47139 Change 3618187 by Ethan.Geller Implement Audiomixer in HTML5 Change 3618188 by Lina.Halper Fix issue with highlight in persona #jira: UE-49020 Change 3618229 by Lina.Halper Fix crash on exit when modify is causing it to serialize again in the middle of tear down #jira: UE-48025 Change 3618248 by Lina.Halper fix issue by workaround where clamp is not happening with allowspin is false #jira: UE-47001 Change 3618289 by Aaron.McLeran Removing audio format types we're not using for simplicity Change 3618291 by Martin.Wilson Fix duplicate of curve name appearing in list when renaming #jira UE-49041 Change 3618390 by Aaron.McLeran Removing a case for DTYPE_Xenon since this is never used. Change 3618425 by Martin.Wilson Keep notify UI up to data across multiple editors when adding notifies to an animation #jira UE-48104 Change 3619023 by Aaron.McLeran Removing DTYPE_Xenon from XAudio2Buffer.cpp since it's not used Change 3619129 by Aaron.McLeran Source bus feature. - New architectural feature for audio mixer that allows audio sources to route to other audio sources. - Buses can be routed to each other - Buses have a duration which can be set in bus asset - Buses can choose between mono and stereo channels - Sources can send to buses and also toggle to *only* output to buses (and bypass submixing) - Will allow persistent source effects on different source audio, while also maintaining 3d spatialization capabilities. Lots of future features will build on this change: 3d audio-volume-based submixing, sidechaining, environment reflections, diagetic microphones, etc. - Some engine changes and optimizations: - Format conversion to float is done in async workers for decode vs the render callback - Procedural sound waves can opt to output only float vs int16 PCM data (avoids a format conversion in audio mixer) - Apply master attenuation at the final output vs per-source - Fixed code that performs fade in/fade out for smooth startup and shutdown. - Moved FSourceParam to FParam into DSP utility so others can use it. - Some engine fixes: - Audio spat plugins that are external sends will not send audio to default/base submix. But will also allow their audio to be panned and sent to submix sends (e.g. reverb) so external HRTF rendering can also get reverb effects, etc. - Fixed an issue with pause - Fixed an issue with the final source buffer in a source voice not getting properly rendered and causing discontinuties - Fixed an issue with WorldID not getting set for listeners TODO: - fill out source bus details panel customization to hide USoundBase params which aren't relevant to source buses Change 3619159 by Ethan.Geller #jira UE-48950 fix steam audio crash on editor exit Change 3619555 by Jonathan.Poncelet Fixed constraint debug drawing arrows in the physics asset editor being too large. #jira UE-48863 Limited constraints and free constraints are much larger on screen Change 3619574 by Thomas.Sarkanen Fixed debug link for animation blueprints not persisting when changing preview mesh Anim instance is no longer re-created all the time when setting skeletal mesh, so we need to re-init the preview instance and re-set the linked skeletal mesh component manually when the mesh changes. #jira UE-46642 - Switching Preview mesh when you've selected an AnimBP breaks the link between the AnimBP and PIE session Change 3619586 by Thomas.Sarkanen Fixed physics asset shortcut not working correctly in certain circumstances FBox was using uninitialized memory #jira UE-49034 - Pressing F to focus on a physics body focuses on the area in between the root and the physics body and not the selected body Change 3619640 by Thomas.Sarkanen Assets with no preview mesh now no longer allow access to other skeleton's physics assets in their shortcut bars Unified the skeleton/mesh search code between FPersonaAssetFamily and FPersonaToolkit, so they bot *look* for a compatible skeletal mesh if one was not found on the asset (but still dont set it automatically). #jira UE-49038 - If you open a skeleton or an animation it won't open persona with the correct physics asset in the quick switch bar Change 3619644 by James.Golding Change FBodyInstance::InstanceBodyIndex back to int32 (need to support ISMC with many instances) #jira UE-47652 Change 3619654 by Martin.Wilson Fix removing a curve when it isn't used on any animations #jira UE-49048 Change 3619771 by Thomas.Sarkanen Make sure the physics asset editor floor has collision, regardless of what BlockAll does #jira UE-49088 - PhysicsAsset Editor Floor should not depend on BlockAll config Change 3619803 by Jonathan.Poncelet Fixed localization warnings caused by duplicate keys. #jira UE-48580 //UE4/Main: Step "Build Engine Localization" has completed with 4 Warnings Change 3619813 by Jurre.deBaare Baked bones using a pose animation are rotated in the wrong direction #fix root bone transform wasn't being taken into account while generating final bone transforms #misc added debug logging for future work #jira UE-47362 Change 3619830 by Jurre.deBaare Biased Texture Size option is not functioning when Merging Actors #fix Fixed up material baking setup after refactoring, now sets correct texture sizes again according to texture sizing type, this will be removed in the long term anyhow #misc Found a bug in material rendering if previous render size < current render size it would not set the viewport size/projection matrix correctly which broke the material bake #jira UE-48108 Change 3619859 by Thomas.Sarkanen Fixed HLOD selection sphere persisting on undo/redo Removed HLOD selection actors when the outliner is refreshed #jira UE-47032 - HLOD Cluster radius sphere remains in level if you move an actor in a cluster and then undo the movement. Change 3619871 by Martin.Wilson Calculate root motion over the correct segment times, not the track times #jira UE-43719 Change 3619898 by Thomas.Sarkanen Improve UI feedback around bounds/in-game bounds in animation editor viewports Tooltip for in-game bounds is now more detailed In-game bounds cannot be selected if bounds is not also selected #jira UE-47958 - Bound vs In-game Bound in Viewport Show menu in Physics Asset Editor is confusing Change 3619908 by Thomas.Sarkanen Fixed tooltip for PhysicsType #jira UE-48421 - Incorrect tooltip for Physics Type Change 3620014 by Jurre.deBaare Only the first mesh bake material property in the array can be set to diffuse, diffuse cannot be selected on the other array elements #fix Changed the way the restriction is setup and retrieve the UMaterialOptions from the details view instead of GetDefault<> #misc Also added more delegates to ensure the restriction is up to date #jira UE-46980 Change 3620104 by Jurre.deBaare HLOD doesn't support renaming in levels #fix ensure that during renaming of UWorld we also rename the HLOD assets into their respective new HLOD package outer #jira UE-48072 Change 3620151 by Thomas.Sarkanen Undo/redo now correctly affects animation preview scene settings Preview scene desc is now transactional & state is correctly set up on undo/redo according to the current preview scene desc #jira UE-47816 - Undoing setting the animation mode to Refrence pose doesn't update the UI Change 3620152 by Thomas.Sarkanen Exposed LOD menu in PhAT This allows auto LOD to be optionaly selected. It was hidden and we forced to LOD 0 before. We still default to forcing LOD 0 to preserve the old behavior. #jira UE-47970 - LODs not working in Physics Asset Editor Change 3620177 by Benn.Gallagher PR #3696: Fix for USkinnedMeshComponent::GetCPUSkinnedVertices() (Contributed by Koderz) Change 3620250 by Jurre.deBaare HLOD assets left in HLOD folder when clusters are deleted #fix some added lifetime management for HLOD assets, keeping list of 'stale' HLOD assets which if not Undo-ed will either be deleted when LODActor is saved, or marked PendingKill when LODActor is destroyed #jira UE-47450 Change 3620273 by James.Golding PR #3908: Removing duplicated forward declation (Contributed by celsodantas) #jira UE-48530 Change 3620274 by James.Golding PR #3909: Removing unnecessary conditional (Contributed by celsodantas) #jira UE-48531 Change 3620275 by James.Golding Add icon for destruction plugin Change 3620401 by Ethan.Geller #jira UE-47684 Remove SDL dependencies from Win64 Change 3620586 by Jurre.deBaare Linux CIS fix Change 3620660 by Martin.Wilson Fixes for state machines getting reinitialized in situations that users don't want them to. -Added option to state machine to allow it to skip reinitialization when it becomes relevant -Added option to slot nodes to keep source pose relevant while montage slot is playing. #jira UE-43578 Change 3620665 by Aaron.McLeran Making source buses only show relevant source bus data. - hiding sound wave categories that aren't relevant to source buses Change 3621087 by Ethan.Geller #jira UE-49000 implement device change listener to ensure we are properly handling when audio is disabled. Change 3621144 by Aaron.McLeran #jira UE-49147 #jira UE-49145 Fixing concurrency and volume stats Change 3621148 by Aaron.McLeran Fixing typo Change 3621180 by Ethan.Geller #jira UE-49151 Fix for browser preview on bus only sounds Change 3621421 by Ethan.Geller #jira UE-49165 Fix real time audio slider. Change 3621604 by Ethan.Geller #jira UE-44847 fix iOS panning algorithm on non-audio mixer Change 3621626 by Lina.Halper Fix issue with anim montage displaying when selecting animation #jira: UE-48749 Change 3621813 by Thomas.Sarkanen Fixing undo/redo of bone modifications in Physics Asset Editor (and others) Bone proxy objects now get recycled (instead of the pool constantly growing) as their names are stable and unique. Fixed broken skeleton tree RTTI (so selection persistance now works correctly on undo/redo again) We no longer force a re-selection on phyiscs asset changes (the tree takes care of that anyway). #jira UE-47862 - Undoing Bone transformations in Physics Asset Editor does not work Change 3621831 by Jurre.deBaare Crash fix for Material baking when trying to analyse a MP_MAX material property #jira UE-49172 Change 3621936 by Thomas.Sarkanen Fixed CIS error from merge Change 3621937 by Thomas.Sarkanen Fix merge issue with API change in USynthComponent Change 3622173 by Thomas.Sarkanen Fixed ortho viewports being bright white in sub-editors Preview scenes in general are responsible by default for the background color. Advanced preview scenes now use background color from settings. Previously only te animation editors did this. #jira UE-48841 - The background of the orthographic viewports is bright white Change 3622730 by Ethan.Geller #jira UE-49182 UE-49198 UE-49201 Fix for channel mismatch in procedural sound waves, remove singleton behavior for MMNotificationClient. CL by Aaron.McLeran Change 3622759 by Ethan.Geller #jira 49170 reduce static analysis warnings for audiodevice.cpp Change 3622901 by Benn.Gallagher Bumped PhysX DDC key after change in Orion caused verify failures Change 3623458 by Aaron.McLeran #jira UE-49204 Delores monologue cut short in Odin elevator Change 3623667 by Aaron.McLeran #jira UE-49204 UE-49243 Delores monologue cut short in Odin elevator Change 3623752 by Aaron.McLeran #jira UE-49247 Sound Source Bus Properties Are Inappropriate Fixing issues with new source bus uobject so properties show up appropriately. Change 3624058 by Ben.Marsh Fix stale module being enumerated when running UE4Editor-Cmd.exe, causing warning when running incremental automated tests. Module and version manifest filenames are derived from the executable filename, so when running the executable compiled for the console subsystem, we need to strip the -Cmd suffix from the executable name to find the correct path. Change 3624193 by Ethan.Geller #jira UE-49170 Static analysis fix, take 2 Change 3354003 by Thomas.Sarkanen Back out changelist 3353914 Change 3355932 by Thomas.Sarkanen Back out changelist 3354003 Reinstating merge from Main: Merging //UE4/Dev-Main to Dev-AnimPhys (//UE4/Dev-AnimPhys) @ CL 3353839 Change 3477632 by Jurre.deBaare Automated test content and ground truths for Actor Merging and Material baking functionality Change 3491464 by Jurre.deBaare Updated automation content for MergeActor behaviour Change 3587878 by Thomas.Sarkanen Merging //UE4/Dev-Main to Dev-AnimPhys (//UE4/Dev-AnimPhys) @ CL 3587489 Change 3597452 by Ethan.Geller #jira UEAP-304, UEAP-280, UEAP-281: Major structural refactor of Audio Plugin interfaces, Oculus Audio plugin, Steam Audio Plugin. Introduction of Sony Audio3D plugin. Change 3602935 by Lina.Halper Allow curve evaluation to be controlled by users #jira: UE-46446 Change 3606120 by Ethan.Geller Move Tap Delay Submix to Synthesis library, modify tap delay API Change 3621830 by Thomas.Sarkanen Merging //UE4/Dev-Main to Dev-AnimPhys (//UE4/Dev-AnimPhys) @ CL 3621691 Change 3622807 by Ethan.Geller #jira UE-49201 Fixing volume issues Issue is that these platforms weren't using the proper public function and an audio mixer refactor changed how volume is calculated to seperate out distance attenuation vs other volume gains. [CL 3624383 by Thomas Sarkanen in Main branch]
2017-09-04 04:17:46 -04:00
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
FTextureRenderTargetResource* RenderTargetResource = RenderTarget->GetRenderTargetResource();
FCanvas Canvas(RenderTargetResource, nullptr, FGameTime::GetTimeSinceAppStart(), GMaxRHIFeatureLevel);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
Canvas.SetAllowedModes(FCanvas::Allow_Flush);
Canvas.SetRenderTargetRect(FIntRect(0, 0, RenderTarget->GetSurfaceWidth(), RenderTarget->GetSurfaceHeight()));
Canvas.SetBaseTransform(Canvas.CalcBaseTransform2D(RenderTarget->GetSurfaceWidth(), RenderTarget->GetSurfaceHeight()));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Do rendering
Canvas.Clear(RenderTarget->ClearColor);
FCanvas::FCanvasSortElement& SortElement = Canvas.GetSortElement(Canvas.TopDepthSortKey());
SortElement.RenderBatchArray.Add(&RenderItem);
Canvas.Flush_RenderThread(RHICmdList);
SortElement.RenderBatchArray.Empty();
FTexture2DRHIRef StagingBufferRef = StagingBufferPool.CreateStagingBuffer_RenderThread(RHICmdList, RenderTargetResource->GetSizeX(), RenderTargetResource->GetSizeY(), RenderTarget->GetFormat(), RenderTarget->IsSRGB());
FGPUFenceRHIRef GPUFence = RHICreateGPUFence(TEXT("MaterialBackingFence"));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
FResolveRect Rect(0, 0, RenderTargetResource->GetSizeX(), RenderTargetResource->GetSizeY());
RHICmdList.CopyToResolveTarget(RenderTargetResource->GetRenderTargetTexture(), StagingBufferRef, FResolveParams(Rect));
RHICmdList.WriteGPUFence(GPUFence);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Prepare a lambda for final processing that will be executed asynchronously
NumTasks++;
auto FinalProcessing_AnyThread =
[&NumTasks, bSaveIntermediateTextures, CurrentMaterialSettings, &StagingBufferPool, &Output, Property, MaterialIndex, bEmissiveHDR = bEmissiveHDR](FTexture2DRHIRef& StagingBuffer, void * Data, int32 DataWidth, int32 DataHeight)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
TRACE_CPUPROFILER_EVENT_SCOPE(FinalProcessing)
FBakeOutputEx& CurrentOutput = Output[MaterialIndex];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
TArray<FColor>& OutputColor = CurrentOutput.PropertyData[Property];
FIntPoint& OutputSize = CurrentOutput.PropertySizes[Property];
OutputColor.SetNum(OutputSize.X * OutputSize.Y);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (Property.Type == MP_EmissiveColor)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
// Only one thread will write to CurrentOutput.EmissiveScale since there can be only one emissive channel property per FBakeOutputEx
FMaterialBakingModule::ProcessEmissiveOutput((const FFloat16Color*)Data, DataWidth, OutputSize, OutputColor, CurrentOutput.EmissiveScale);
if (bEmissiveHDR)
{
TArray<FFloat16Color>& OutputHDRColor = CurrentOutput.HDRPropertyData[Property];
OutputHDRColor.SetNum(OutputSize.X * OutputSize.Y);
ConvertRawR16G16B16A16FDataToFFloat16Color(OutputSize.X, OutputSize.Y, (uint8*)Data, DataWidth * sizeof(FFloat16Color), OutputHDRColor.GetData());
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
else
{
TRACE_CPUPROFILER_EVENT_SCOPE(ConvertRawB8G8R8A8DataToFColor)
check(StagingBuffer->GetFormat() == PF_B8G8R8A8);
ConvertRawB8G8R8A8DataToFColor(OutputSize.X, OutputSize.Y, (uint8*)Data, DataWidth * sizeof(FColor), OutputColor.GetData());
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
// We can't unmap ourself since we're not on the render thread
StagingBufferPool.ReleaseStagingBufferForUnmap_AnyThread(StagingBuffer);
if (CurrentMaterialSettings->bPerformBorderSmear)
{
// This will resize the output to a single pixel if the result is monochrome.
FMaterialBakingHelpers::PerformUVBorderSmearAndShrink(OutputColor, OutputSize.X, OutputSize.Y);
}
#if WITH_EDITOR
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// If saving intermediates is turned on
if (bSaveIntermediateTextures)
{
TRACE_CPUPROFILER_EVENT_SCOPE(SaveIntermediateTextures)
FString TrimmedPropertyName = Property.ToString();
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const FString DirectoryPath = FPaths::ConvertRelativePathToFull(FPaths::ProjectIntermediateDir() + TEXT("MaterialBaking/"));
FString FilenameString = FString::Printf(TEXT("%s%s-%d-%s.bmp"), *DirectoryPath, *CurrentMaterialSettings->Material->GetName(), MaterialIndex, *TrimmedPropertyName);
FFileHelper::CreateBitmap(*FilenameString, CurrentOutput.PropertySizes[Property].X, CurrentOutput.PropertySizes[Property].Y, CurrentOutput.PropertyData[Property].GetData());
}
#endif // WITH_EDITOR
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
NumTasks--;
};
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Run previous command if we're going to overwrite it meaning pipeline depth has been reached
if (PipelineContext[PipelineIndex].ReadCommand)
{
PipelineContext[PipelineIndex].ReadCommand(RHICmdList);
}
// Generate a texture reading command that will be executed once it reaches the end of the pipeline
PipelineContext[PipelineIndex].ReadCommand =
[FinalProcessing_AnyThread, StagingBufferRef = MoveTemp(StagingBufferRef), GPUFence = MoveTemp(GPUFence)](FRHICommandListImmediate& RHICmdList) mutable
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
TRACE_CPUPROFILER_EVENT_SCOPE(MapAndEnqueue)
void * Data = nullptr;
int32 Width; int32 Height;
RHICmdList.MapStagingSurface(StagingBufferRef, GPUFence.GetReference(), Data, Width, Height);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Schedule the copy and processing on another thread to free up the render thread as much as possible
Async(
EAsyncExecution::ThreadPool,
[FinalProcessing_AnyThread, Data, Width, Height, StagingBufferRef = MoveTemp(StagingBufferRef)]() mutable
{
FinalProcessing_AnyThread(StagingBufferRef, Data, Width, Height);
}
);
};
}
);
PipelineIndex = (PipelineIndex + 1) % PipelineDepth;
}
}
}
// Destroying Render Items must happen on the render thread to ensure
// they are not used anymore.
ENQUEUE_RENDER_COMMAND(DestroyRenderItems)(
[RenderItems](FRHICommandListImmediate& RHICmdList)
{
for (auto RenderItem : (*RenderItems))
{
delete RenderItem.Value;
}
delete RenderItems;
}
);
}
ENQUEUE_RENDER_COMMAND(ProcessRemainingReads)(
[&PipelineContext, PipelineDepth, PipelineIndex](FRHICommandListImmediate& RHICmdList)
{
// Enqueue remaining reads
for (int32 Index = 0; Index < PipelineDepth; Index++)
{
int32 LocalPipelineIndex = (PipelineIndex + Index) % PipelineDepth;
if (PipelineContext[LocalPipelineIndex].ReadCommand)
{
PipelineContext[LocalPipelineIndex].ReadCommand(RHICmdList);
}
}
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Wait until every tasks have been queued so that NumTasks is only decreasing
FlushRenderingCommands();
// Wait for any remaining final processing tasks
while (NumTasks.Load(EMemoryOrder::Relaxed) > 0)
{
FPlatformProcess::Sleep(0.1f);
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Wait for all tasks to have been processed before clearing the staging buffers
FlushRenderingCommands();
ENQUEUE_RENDER_COMMAND(ClearStagingBufferPool)(
[&StagingBufferPool](FRHICommandListImmediate& RHICmdList)
{
StagingBufferPool.Clear_RenderThread(RHICmdList);
}
);
// Wait for StagingBufferPool clear to have executed before exiting the function
FlushRenderingCommands();
if (!CVarUseMaterialProxyCaching.GetValueOnAnyThread())
{
CleanupMaterialProxies();
}
}
bool FMaterialBakingModule::SetupMaterialBakeSettings(TArray<TWeakObjectPtr<UObject>>& OptionObjects, int32 NumLODs)
{
TSharedRef<SWindow> Window = SNew(SWindow)
.Title(LOCTEXT("WindowTitle", "Material Baking Options"))
.SizingRule(ESizingRule::Autosized);
TSharedPtr<SMaterialOptions> Options;
Window->SetContent
(
SAssignNew(Options, SMaterialOptions)
.WidgetWindow(Window)
.NumLODs(NumLODs)
.SettingsObjects(OptionObjects)
);
TSharedPtr<SWindow> ParentWindow;
if (FModuleManager::Get().IsModuleLoaded("MainFrame"))
{
IMainFrameModule& MainFrame = FModuleManager::LoadModuleChecked<IMainFrameModule>("MainFrame");
ParentWindow = MainFrame.GetParentWindow();
FSlateApplication::Get().AddModalWindow(Window, ParentWindow, false);
return !Options->WasUserCancelled();
}
return false;
}
void FMaterialBakingModule::SetEmissiveHDR(bool bHDR)
{
bEmissiveHDR = bHDR;
}
void FMaterialBakingModule::SetLinearBake(bool bCorrectLinear)
{
// PerPropertyGamma ultimately sets whether the render target is linear
PerPropertyColorSpace.Reset();
if (bCorrectLinear)
{
DefaultColorSpace = EPropertyColorSpace::Linear;
PerPropertyColorSpace.Add(MP_BaseColor, EPropertyColorSpace::sRGB);
PerPropertyColorSpace.Add(MP_EmissiveColor, EPropertyColorSpace::sRGB);
PerPropertyColorSpace.Add(MP_SubsurfaceColor, EPropertyColorSpace::sRGB);
}
else
{
DefaultColorSpace = EPropertyColorSpace::sRGB;
PerPropertyColorSpace.Add(MP_Normal, EPropertyColorSpace::Linear);
PerPropertyColorSpace.Add(MP_Opacity, EPropertyColorSpace::Linear);
PerPropertyColorSpace.Add(MP_OpacityMask, EPropertyColorSpace::Linear);
PerPropertyColorSpace.Add(TEXT("ClearCoatBottomNormal"), EPropertyColorSpace::Linear);
}
}
static void DeleteCachedMaterialProxy(FExportMaterialProxy* Proxy)
{
ENQUEUE_RENDER_COMMAND(DeleteCachedMaterialProxy)(
[Proxy](FRHICommandListImmediate& RHICmdList)
{
delete Proxy;
});
}
void FMaterialBakingModule::CleanupMaterialProxies()
{
for (auto Iterator : MaterialProxyPool)
{
DeleteCachedMaterialProxy(Iterator.Value.Value);
}
MaterialProxyPool.Reset();
}
UTextureRenderTarget2D* FMaterialBakingModule::CreateRenderTarget(bool bInForceLinearGamma, EPixelFormat InPixelFormat, const FIntPoint& InTargetSize)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FMaterialBakingModule::CreateRenderTarget)
UTextureRenderTarget2D* RenderTarget = nullptr;
Copying //UE4/Dev-Rendering to //UE4/Dev-Main (Source: //UE4/Dev-Rendering @ 4041614) #lockdown Nick.Penwarden ============================ MAJOR FEATURES & CHANGES ============================ Change 3774677 by Arne.Schober DR - Deprecated SetLocal from the RHICmdlist Fixed some unnecessary PSO collisions. Change 3809579 by Chris.Bunner Back out changelist 3774677. #jira UE-53483 Change 3810363 by Mark.Satterthwaite More random fixes to mtlpp: most important is the extension to Buffer that allows creation of sub-buffers that are merely views onto a sub-range of the parent. These sub-buffers are valid to use throughout the mtlpp API with two exceptions: they may not be used for visibilityResultsBuffers and Set*BufferOffset functions cannot take this offset into account (as the encoder does not hold onto the buffers and I don't want it to). In the case of Set*BufferOffset the caller has to know what is going on and in the case of visibilityResultsBuffers it'll just assert as it isn't sensible. This makes it *much* easier to do things like sub-buffer allocation, though the caller must be aware of the alignment restrictions of their intended usage as they are not possible to enforce. For example, a call to SetVertexBuffer requires an offset alignment must match the alignment of the data-type in the shader for "device" resources, or for "constant" data it must be max(4, sizeof(datatype)) on iOS and 256 on macOS. This should allow for much more tightly packed sub-allocations than earlier approaches, though older drivers (e.g. Mac OS X 10.11) enforce only the coarser "constant" data restriction everywhere. Change 3810407 by Marcus.Wassmer PR #4322: ShadowSetup Bug Fix: Only stencil mask drawn meshes (Contributed by DSDambuster) Change 3810676 by Guillaume.Abadie Makes r.Test.SecondaryUpscaleOverride work with any arbitrary pixel size. Change 3810696 by Guillaume.Abadie Adds support for #include "../MyFile.ush" in the shader compiler. Change 3810698 by Guillaume.Abadie Implements enum class based shader permutation dimension. Change 3810699 by Guillaume.Abadie Implements Diaphragm DOF ground work. Change 3811536 by Guillaume.Abadie Pulls the trigger on CircleDOF's setup pass for DiaphragmDOF. Change 3811958 by Mark.Satterthwaite More fixes for mtlpp. Change 3811964 by Mark.Satterthwaite Only views onto a mtlpp::Buffer should return a valid parent-buffer. Change 3812604 by Guillaume.Abadie Changes Diaphragm DOF's source file layout. Change 3812827 by Mark.Satterthwaite More missing/broken functionality in mtlpp fixed and fixed obvious leaks. Change 3812920 by Guillaume.Abadie Adds support for per mip level UAV in FSceneRenderTarget. Change 3812926 by Mark.Satterthwaite Change the way we handle mtlpp resource construction to avoid leaks. Change 3812960 by Rolando.Caloca DR - vk - Disable DFGI Change 3812968 by Rolando.Caloca DR - Linker fix Change 3813318 by Mark.Satterthwaite Fix linear texture allocation from a buffer sub-view. Change 3813326 by Mark.Satterthwaite Fix another Metal mtlpp sub-buffer allocation failure. Change 3813328 by Guillaume.Abadie Removes global samplers in TAA for GL4, Vulkan and Switch. Change 3813937 by Rolando.Caloca DR - Fix logs not getting dumped when r.DumpSCWQueuedJobs is on Change 3813947 by Rolando.Caloca DR - noshaderworker should override r.XGEShaderCompile Change 3817017 by Uriel.Doyon Fixed texture editor black screen #jira UE-53653 Change 3818568 by Rolando.Caloca DR - Fix log when shader jobs crash - Move log10 to common - Added COMPILER_VULKAN define Change 3818603 by Uriel.Doyon Fix to static analysis warning Change 3818623 by Rolando.Caloca DR - Workaround hlslcc loop unrolling bug Change 3819070 by Uriel.Doyon Fix to stat duplication. Change 3819105 by Uriel.Doyon Refactored volume sample shader to avoid using texture dimension. Change 3819136 by Rolando.Caloca DR - vk - Per platform files (empty) Change 3819180 by Rolando.Caloca DR - vk - Move defines out of config into per platform Change 3819247 by Rolando.Caloca DR - vk - Remove more defines into platform settings Change 3819318 by Rolando.Caloca DR - vk - Fixes for linking Change 3819868 by Rolando.Caloca DR - vk - Linux & Android fixes Change 3819873 by Guillaume.Abadie Adds support for PermutationId on r.DumpShaderDebugInfo=1 Change 3819940 by Rolando.Caloca DR - vk - Fix Linux issues Change 3819956 by Rolando.Caloca DR - vk - Invalid check Change 3819961 by Michael.Lentine Hide attributes when plugin is not present Change 3819980 by Rolando.Caloca DR - vk - Standard validation always Change 3820039 by Rolando.Caloca DR - vk - Fix invalid ensure Change 3820326 by Rolando.Caloca DR - vk - Linux compile fix Change 3820422 by Michael.Lentine Add back GBufferAO. Change 3820433 by Rolando.Caloca DR - Fix D3D12 crash on 20 thread (10x2 cores) machines Change 3821677 by Rolando.Caloca DR - vk - Win32 compile fix Change 3821961 by Rolando.Caloca DR - Vulkan uses real UB by default on non-Android Change 3821968 by Rolando.Caloca DR - vk - Update glslang 1.0.65.1 Change 3821969 by Uriel.Doyon Added support for stat groups that must be sorted by name. Defined by DECLARE_STATS_GROUP_SORTBYNAME. Change 3821983 by Rolando.Caloca DR - vk - Change to static array (0.1ms on 10k draw calls) Change 3824141 by Rolando.Caloca DR - vk - Fix static analysis - Bumped up some (c) 2017->2018 Change 3824355 by Rolando.Caloca DR - vk - Accessor to find out if a cmd buffer has been submitted Change 3824420 by Rolando.Caloca DR - Sanity check number of queries per batch on D3D11 as to not break other RHIs Change 3824463 by Rolando.Caloca DR - Removed dummy ensure for D3D12 Change 3824609 by Rolando.Caloca DR - vk - Linux compile fix Change 3826074 by Mark.Satterthwaite Start IMP-caching the various descriptor types in mtlpp. Change 3826098 by Rolando.Caloca DR - vk - Dump layer compile fixes Change 3826113 by Rolando.Caloca DR - vk - Missing dump functions Change 3826302 by Rolando.Caloca DR - vk - Compile fix - Change dump handles to %p Change 3826635 by Mark.Satterthwaite Forward declarations required for mtlpp compilation without exposing Metal headers - plus fixes to the mtlpp test compiler. Change 3827072 by Mark.Satterthwaite Switch some more mtlpp descriptors over to IMPTables from objc_msgSend. Change 3827909 by Guillaume.Abadie Replaces diaphragm DOF's prefiltering with LDS bank coherent bilateral reduction, and implements 1/8 res background gathering pass. Change 3827952 by Guillaume.Abadie Updates copy right to year 2018 on diaphragm DOF's new files. Change 3828055 by Rolando.Caloca DR - vk - Rename in prep for changes Change 3828229 by Guillaume.Abadie Avoids to log multiple time global shader type name that have multiple permutations when verifying global shader map. Change 3828427 by Guillaume.Abadie Reimplements Max3x3 gathering post filtering for Diaphragm DOF with proper shader permutation. Change 3829979 by Guillaume.Abadie Fixes a color NaN source in diaphragm DOF's TAA pass. Change 3830116 by Rolando.Caloca DR - vk - Fix GPU queries/frame time on old system - New system in place, disabled temporarily Change 3830169 by Rolando.Caloca DR - vk - Fix async pso creation crash Change 3830193 by Rolando.Caloca DR - vk - CPU RHI thread improvement Change 3830291 by Guillaume.Abadie Automatically lower the number of gathering rings on background half res gather pass as far CoC is getting smaller. Change 3830300 by Rolando.Caloca DR - vk - Static analysis fix: Split VulkanCommon.h out of VulkanConfiguration.h Change 3830589 by Mark.Satterthwaite In mtlpp cache the IMPTables for all the Metal @protocol's that are dependent on the MTLDevice, this avoids a mutex & map lookup. Also make all the concrete types store their IMPTable statically as it won't change. Change 3830793 by Mark.Satterthwaite Fix a small number of bugs introduced with the mtlpp descriptor and table caching. Change 3831491 by Jian.Ru Fix driver version unknown #jira UE-53688 Change 3832335 by Rolando.Caloca DR - vk - Change include Change 3832550 by Rolando.Caloca DR - vk - Occlusion query rewrite WIP Change 3832589 by Rolando.Caloca DR - vk - Minor refactor to pools in prep for timestamps Change 3832618 by Rolando.Caloca DR - vk - Do not block timestamp queries Change 3832636 by Rolando.Caloca DR - vk - Fix old timestamp queries Change 3833138 by Rolando.Caloca DR - vk - Fix timestamp queries Change 3833249 by Rolando.Caloca DR - vk - Test lock Change 3833667 by Rolando.Caloca DR - vk - Old queries wait on the RHI thread now instead of the driver (disabled) Change 3833907 by Daniel.Wright Fixed NextStartOffset UAV index out of bounds Change 3833918 by Daniel.Wright D3D12 RHI: only refcount uniform buffers if GRHINeedsExtraDeletionLatency is false, which is no longer the case for PC or Xbox. The refcounting was heavy on performance as reported by a licensee because FRHIResource uses atomics for refcounting, which is only necessary when GRHINeedsExtraDeletionLatency is disabled. Change 3834852 by Rolando.Caloca DR - vk - Missing file Change 3834858 by Guillaume.Abadie Implements r.DOF.MinimalFullresBlurringRadius Change 3834979 by Rolando.Caloca DR - vk - Fix Change 3836117 by Rolando.Caloca DR - vk - Update to 1.0.65.1 Change 3836122 by Rolando.Caloca DR - vk - Added r.Vulkan.SubmitOcclusionBatchCmdBuffer - Added new error codes/messages Change 3836421 by Mark.Satterthwaite For the purposes of debugging and conformance testing mtlpp make it possible to compile *without* the IMP cache so that we call the underlying Objective-C. Change 3836896 by Uriel.Doyon Fixed concurrency and exit issues around d3d12 pipeline states on windows. Change 3837385 by Rolando.Caloca DR - vk - Dump memory on OOM Change 3837427 by Rolando.Caloca DR - vk - Change some arrays to array views Change 3837800 by Guillaume.Abadie Implements SHADER_PERMUTATION_RANGE_INT to make contiguous integer permutations that does not start to 0. Change 3838128 by Rolando.Caloca DR - vk - Support for non-cached memory types Change 3838540 by Guillaume.Abadie Refactors Diaphragm DOF's CoC tile buffer under a single API for better maintainability. Change 3838731 by Rolando.Caloca DR - vk - Descriptor pools per command buffer pool (turned off) Change 3838961 by Rolando.Caloca DR - vk - Use ring buffer for per frame uniform buffers - Enable descriptor pools per layout recycled per command buffer Change 3839087 by Rolando.Caloca DR - vk - Compile fixes for Android Change 3839106 by Marcus.Wassmer PR #4413: Removing unnecessary call to FString::ToLower (Contributed by gsfreema) Change 3839252 by Mark.Satterthwaite Fix mtlpp::Resource move operators. Change 3839426 by Marcus.Wassmer Duplicate 380972 Make PC GPU Benchmarks more reliable Change 3840041 by Guillaume.Abadie Fixes shader compilation failure in TAA with alpha channel through post processing support. Change 3840257 by Chris.Bunner Swapping a mul() to * in HLSLTranslator::Dot to allow scalar transformations per a UDN ticket. Change 3840308 by Rolando.Caloca DR - vk - Support for UB & non-UB on emulation mode Change 3840586 by Rolando.Caloca DR - Copy 3840577 Fix for CPUs with more than 16 cores Change 3840671 by Rolando.Caloca DR - vk - Copy from 3840663 Fix for layout ensure on HMD projects on Vulkan Change 3840980 by Rolando.Caloca DR - vk - Android compile fixes Change 3841989 by Guillaume.Abadie Slices Diaphragm DOF's Gather pass in multi shader files, and CFLAG_StandardOptimization flag for faster iteration time. Change 3842216 by Guillaume.Abadie Fixes DDOF's foreground alpha channel. Change 3842217 by Guillaume.Abadie Implements r.DOF.MaximalForegroundBlurringRadius Change 3842353 by Guillaume.Abadie Allows to disable foreground gathering with r.DOF.MaximalForegroundBlurringRadius=0 Change 3842747 by Rolando.Caloca DR - vk - Missing use of GPoolSizeVRAMPercentage - Support for smaller allocations if page size is not available Change 3842791 by Rolando.Caloca DR - vk - Use 95% of available GPU memory to handle some fragmentation Change 3843690 by Guillaume.Abadie Fixes diaphragm DOF's foreground after all this refactoring. Change 3844439 by Guillaume.Abadie Improves Coc dilate pass to make the gather pass as fast as possible, but still without artifacts caused by the fast gathering optimisation. Change 3844946 by Mark.Satterthwaite rd_route v1.1.1 with attached TPS approval. For macOS function interposition which is useful for debugging and the occasional workaround. Change 3845164 by Mark.Satterthwaite Add LLM support for macOS, including tracking of memory allocated in Objective-C. This makes use of runtime method swizzling in the Objective-C runtime and the rd_route library I added for Richard Wallis, which allows for arbitrary runtime function interposition and allows me to hook the custom allocators used in Apple's many Objective-C frameworks on which the whole macOS edifice is built. Objective-C objects are charged to the calling scope as they are too common to impose their own without murdering frame rate. We would need a TPS approval for an iOS function interposition library for this to work fully on iOS, if desired in the short term discarding LowLevelFree events that aren't in the map rather than asserting will workaround the problem. Change 3845849 by Marcus.Wassmer Fix clang and some normal refactor errors Change 3846026 by Rolando.Caloca DR - vk - Descriptor set allocation scheme rewrite - Type hash for each pool - Desc sets Pool on device Change 3846169 by Rolando.Caloca DR - vk - Remove old code for non-layout descriptor set pools Change 3846205 by Mark.Satterthwaite Disambiguate the PatchControlPointOut struct definitions in Metal tessellation shaders at Apple's suggestion to avoid a metallib gotcha. Change 3846346 by Arne.Schober DR - Missing Vector instructions Change 3847037 by Arne.Schober DR - Fix issue with GPU skincache where the offset of the clothbuffer is not relative to the offset of the actual vertexbuffer. Fixed MorphTarget Skincache Offset mixxup Change 3847275 by Marcus.Wassmer Copying MGPU to Dev-Rendering (//UE4/Dev-Rendering) Change 3847464 by Rolando.Caloca DR - vk - Fix static analysis warning Change 3847707 by Michael.Lentine Only use MorphTargetOffset when the shader enables morph targets. Change 3848533 by Richard.Wallis Handle Metal adding FirstInstance into [[ instance_id ]] which is different to other APIs. SV_InstanceID and SV_VertexID should now have their respective base instance and base vertex ID's subtracted before use in the shader. #jira UE-51716 Change 3848625 by Richard.Wallis Compile Fix Change 3848725 by Rolando.Caloca DR - Remove use of Build/SetLocalGraphicsPipelineState Change 3848797 by Rolando.Caloca DR - Deprecate Build/SetLocalGraphicsPipelineState Change 3849237 by Arne.Schober DR - AddCustom Ver for ModelVertex Serialization Change 3851247 by Rolando.Caloca DR - vk - Util functions Change 3851523 by Arne.Schober DR - Update Reflection Comparission shot from the BuildFarm. Change 3851859 by Rolando.Caloca DR - vk - Skip loader Change 3851889 by Krzysztof.Narkowicz Removed lights with lighting channels out of tiled deferred light list. Tiled deferred lights do not support lighting channels and it's wasn't worth to add extra complexity to this shader in order support this special case. #jira UE-51512 Change 3852181 by Rolando.Caloca DR - vk - Linux compile fix Change 3852547 by Uriel.Doyon Fixed Pre-Exposure shader compilation and Temporal AA issue. #jira UE-54276 Change 3852637 by Arne.Schober DR - Fixing Normal Automated Test Result Change 3853167 by Richard.Wallis AvfPlayer - support for streaming media. Due to an operator new/delete mismatch in Apples CFNetwork - we've had to change out one of that framework allocators using rd_route to avoid the memory corruption. #jira UE-35637 Change 3853447 by Chris.Bunner Fixing typos. Change 3853645 by Krzysztof.Narkowicz Fixed light functions on subsurface materials Removed strange code from blending between static and dynamic shadows #jira UE-50275 Change 3853660 by Rolando.Caloca DR - Fix OpenGL overwriting texture samplers on forward renderer Change 3853945 by Mark.Satterthwaite Duplicate #3831616 Fix the black ground scattering on Metal - we've had issues with the atmospheric fog calculations for a long time - one or more intermediate operations generates different precision on Metal so we end up passing -ve values into sqrt which then generates NaN/INF. For Metal when compiling this file and this file only #define sqrt() to sqrt(abs()) so that we don't see anymore unexpected black in atmospheric rendering. This is far from ideal but I don't want to make abs all inputs into every sqrt because AFAIK this is the only case where we have an issue, and until we to investigate each intermediate calculation that isn't ridiculously, soul-crushingly tedious, it isn't practical to identify the source of the error. #jira UE-53720 Change 3853966 by Mark.Satterthwaite Duplicate #3835852 Fix tessellation shaders in Metal with Manual Vertex Fetch enabled: - The control points idnex buffer shouldn't collide with anything else. - We can't use the optimisation of loading texture width & height from the buffer meta-table in tessellation shaders as the combined stages don't guarantee not to clobber unused buffer slots and screw it up when we use linear textures. #jira UE-53851 Change 3854250 by Uriel.Doyon Fix fbx automation tests Change 3854736 by Uriel.Doyon Added a tooltip to the EV100 slider in the exposure menu. Using game settings now disables the slider. #jira UE-53945 Change 3855047 by Jian.Ru Fix DFAO getting NANs when samples out of ViewRect #jira UE-54403 Change 3858197 by Krzysztof.Narkowicz View frustum shadow caster culling for pointlights/spotlights #jira UE-54381 Change 3860081 by Krzysztof.Narkowicz Tighter bounding sphere for a spotlight Replaced IntersectSphere(LightProxy->Origin, LightProxy->Radius) with LightProxy->SphereBounds for tighter culling of spotlights Directional light GetBoundingSphere() now everywhere returns Sphere((0,0,0),HALF_WORLD_MAX) for consistency and proper SphereBounds #jira UE-54258 Change 3860324 by Mark.Satterthwaite Update the macOS deployment target version to 10.12 from 10.11 as we officially ended support for El Capitan a while ago. Should mean that libraries compiled for 10.12 and up won't cause link warnings. Change 3860945 by Arne.Schober DR - Fix not releaseing SRV on render thread for FPositionVertexBuffer, FStaticMeshVertexBuffer, FColorVertexBuffer, FStaticMeshInstanceBuffer. #jira UE-54587 Change 3861129 by Jian.Ru Prevent distance culled objects from casting distance field direct shadows #jira UE-54533 Change 3861502 by Jian.Ru Exclude distance culled objects from DFAO calculation #jira UE-54533 Change 3862243 by Krzysztof.Narkowicz Changed radius of a directional light's bounding sphere from HALF_WORLD_MAX to WORLD_MAX in order to encopass entire WORLD_MAX box Change 3863476 by Krzysztof.Narkowicz Added BuildReflections option to ResavePackages commandlet #jira UE-54581 Change 3863717 by Rolando.Caloca DR - vk - Missed using pipeline cache on compute PSOs Change 3865332 by Arne.Schober DR - Fix UE-52356 Bone Weight Change 3866220 by Rolando.Caloca DR - vk - Fixed GetNativeResource missing on textures - Added support for -preferNvidia|AMD|Intel - Added VulkanRHIBridge.h - Minor fixes Change 3866222 by Rolando.Caloca DR - vk - Missed file Change 3866951 by Krzysztof.Narkowicz Fixed FreezeRendering on non editor builds: ComputeAndMarkRelevanceForViewParallel was calling FrozenMatricesGuard on multiple threads, reading and writing view matrices state in parallel. #jira UE-53640 Change 3867231 by Guillaume.Abadie Adds alpha mode to allow the tonemapper to passthrough the alpha channel for broadcast industry. Change 3867233 by Guillaume.Abadie Fixes a compilation failures in TAAU with r.PostProcessing.PropagateAlpha==2 Change 3867594 by Daniel.Wright Removed EditorOnlyDefaultMaterials, which added 79s of shader compilation during startup Added a dialog when opening the Material Editor on a Default Material, warning of advanced workflow Preventing Material Editor Apply or Save for a Default Material when the preview material has compilation errors Change 3870048 by Daniel.Wright Cleaned up formatting in TranslucentRendering from merges Change 3870106 by Krzysztof.Narkowicz Fixed some FArchive Tell()/Seek() 64bit->32bit truncations Change 3870211 by Rolando.Caloca DR - vk - Added -vulkanvalidation=N/-vulkanstandardvalidation/-novulkanstandardvalidation to set validation layer behaviour from cmd line Change 3870225 by Rolando.Caloca DR - vk - Some platforms do not use a standard swapchain Change 3870267 by Arne.Schober DR - SafeRelease SRVs that might be hold by the Vertexfactories (maybe due to indirect use in GlobalResources) Note that the VFs are not owners of the data, e.g the underlying Buffers might be released before this and this reference counting should be uneccessary Change 3870647 by Daniel.Wright Moved FogRendering.h to Renderer Change 3872130 by Krzysztof.Narkowicz Disable USE_GLOBAL_CLIP_PLANE for MATERIAL_DOMAIN_POSTPROCESS and MERIAL_DOMAIN_UI Merging GitHub Pull request #4459 "When material domain is not needing global clip plane there is no need to generate any code involving it. This does not alter output but removes lot of code at vertex shader and pixel shaders. At least on mobile rendered was actually generating clipping code for ui materials." #jira UE-54616 Change 3872145 by Rolando.Caloca DR - vk - Optional SupportsMarkersWithoutExtension Change 3872404 by Uriel.Doyon Added some guards when streaming virtual textures. Fixed optimized UCanvasRenderTarget2D::RepaintCanvas() to prevent resolving the texture twice. Fixed bad mipmap generation with UCanvasRenderTarget2D. Change 3872507 by Arne.Schober Back out changelist 3870267 Change 3874176 by Ben.Marsh IncludeTool: Add an flag to prevent scanning source files for exported symbols. Change 3874935 by Krzysztof.Narkowicz Fixed white thumbnails and other issues with sky lighting on ES3_1 path, by disabling GGX prefiltering, as mobile path doesn't have a single cubemap with all initialized mips. Instead it ping-pongs between 2 partially initialized. #jira UE-54656 Change 3875710 by Daniel.Wright Renamed uniform buffer member macros to be much shorter for readability Change 3876665 by Guillaume.Abadie Cherry-pick 3870715: Implements DOF's hybrid scatering bare bones. Change 3876666 by Guillaume.Abadie Cherry-pick 3871786: DOF hybrid scatering: fixes NaN source, transition to gather on close to screen edge and low intensity. Change 3876677 by Guillaume.Abadie Cherry-pick 3872348: Implements neighbor comparison for DOF's scattering compilation pass. Change 3876680 by Guillaume.Abadie Cherry-pick 3872357: Oups... fixes build... Change 3876683 by Guillaume.Abadie Cherry-pick 3872475: Controls number of mip to generate with DOF's reduce pass. Change 3876687 by Guillaume.Abadie Cherry-pick 3874104: Fixes various bugs in diaphragm DOF's hybrid scattering. Change 3876690 by Guillaume.Abadie Cherry-pick 3874144: Packs multiple DOF scattering group into same draw instance. Change 3876694 by Guillaume.Abadie Cherry-pick 3874275: Switches hybrid scattering with indexed indirect draw call to reduce scatter vertex shader invocation. Change 3876695 by Guillaume.Abadie Cherry-pick 3874674: Records min and max coc on DOF's setup's draw event. Change 3876783 by Rolando.Caloca DR - Static analysis fix Change 3876845 by Guillaume.Abadie Implements USceneCaptureComponent::ProfilingEventName Change 3877197 by Rolando.Caloca DR - vk - OQ fixes (disabled) Change 3877428 by Krzysztof.Narkowicz Merged with tiny tweaks Ansel photography plugin improvements from Adam Moss (GitHub pull request #4426): -The free-roaming photography camera has new constraints by default, i.e. it can't pass through walls -Photography session can be started and stopped programmatically, e.g. making it possible to bind photography to an alternative hotkey or button combo. This was an often-requested feature. -Tweakables and utilities are now exposed through a Blueprint Function Library (rather than direct manipulation of console variables) -The Ansel photography session UI now exposes some engine effect tweakables as sliders. For example, if the game is using depth-of-field then sliders are made available to allow the photographer to change the focal depth etc. The developer may suppress this behavior through the Blueprint Function Library. -Letterboxing is now removed during multi-part capture, d'oh. -Tiled shots are taken at full resolution even if ScreenPercentage < 100 -SSR is enabled during super-resolution shots since Ansel is now better at hiding any ensuing artifacts -Postprocess settings are frozen at session start to avoid discontinuities during photography, i.e. wandering between postprocess volumes when the camera auto-moves for stereo and 360 shots. #jira UE-54244 #4426 Change 3879086 by Krzysztof.Narkowicz Fixed sky/reflection capture (without owner) update - they are now updated only with a correspoding world Change 3879090 by Guillaume.Abadie Fixes tones of regressions on diaphragm DOF's recombine passes. Change 3879198 by Rolando.Caloca DR - vk - Support for real uniform buffers on Android platforms Change 3879993 by Krzysztof.Narkowicz -Fixed int64->int32 FArchive offset truncation in TShaderMap, VertexFactory and TextureDerivedData -Fixed FSerializationHistory bug, when trying to serialize 0 bytes #jira UE-43203 Change 3881462 by Guillaume.Abadie Implements full res DOF's setup pass for cheaper full res gathering in recombine pass. Change 3881524 by Krzysztof.Narkowicz Fixed compilation by removing FTickableEditorObject from FPreviewScene Change 3881724 by Chris.Bunner Static analysis fix. #jira UE-54762 Change 3881861 by Rolando.Caloca DR - vk - Fix layout warning when generating mip chain Change 3881864 by Rolando.Caloca DR - Use render passes on HZB Change 3882236 by Yuriy.ODonnell IndirectLightingColorScale is now applied to SubsurfaceLighting and DiffuseLighting. Was previously only applied to DiffuseLighting. #jira UE-42534 #github 3326 Change 3882325 by Guillaume.Abadie Implements FocusOnly lower gathering pass for Diaphragm DOF's slight out focus temporal stability. Change 3882340 by Rolando.Caloca DR - vk - Fix api dump Change 3882430 by Rolando.Caloca DR - vk - KHR_maintenance2 Change 3882563 by Rolando.Caloca DR - Add depth-stencil access mode to PSO initializer Change 3882929 by Rolando.Caloca DR - vk - Proper fix for maintenance extension macros Change 3883087 by Mark.Satterthwaite Allow disabling VSync in windowed mode for macOS 10.13.4+ and above. Change 3883597 by Guillaume.Abadie Collapses full and half res DOF setup passes together. Change 3883702 by Guillaume.Abadie Fixes mac's build. Change 3884747 by Uriel.Doyon Fix for static analysis warning Change 3884975 by Rolando.Caloca DR - vk - Move some platform defines to platform properties Change 3884988 by Rolando.Caloca DR - vk - Make an override per platform Change 3885832 by Rolando.Caloca DR - vk - Cosmetic change to group similar members Change 3885891 by Rolando.Caloca DR - vk - Some _RenderThread functions to avoid stalls Change 3886044 by Rolando.Caloca DR - Added RHI api _RenderThread version of RHICreateTextureReference RHICreateShaderLibrary RHICreateRenderQuery Change 3886560 by Guillaume.Abadie Fixes strong aliasing on TAAU's fast shader permutation. This adds a 6th neighbor sampling, and switch AA_TONE ON as TAA does for its fast shader permutation. Change 3886749 by Guillaume.Abadie Cherry-pick 3884748: Implements DOF's BuildBokehLUT for diaphragm blades simulation. Only used in hybrid scattering for now. Change 3886750 by Guillaume.Abadie Cherry-pick 3885457: Simulates diaphragm blades' curvature on bokeh. Change 3886752 by Rolando.Caloca DR - Fix metal static analysis Change 3887460 by Uriel.Doyon Fixed to more static analysis warning. Change 3888201 by Rolando.Caloca DR - vk - Added r.Vulkan.SubmitAfterEveryEndRenderPass - Fixed bad layout on rendering back buffer Change 3888209 by Rolando.Caloca DR - vk - Unity compile fix Change 3888254 by Rolando.Caloca DR - vk - Fix async texture layout Change 3888893 by Guillaume.Abadie Simulates bokeh in DOF's slight out of focus. Change 3889085 by Guillaume.Abadie Fixes DOF's reduce pass sampling outside viewport. Change 3889924 by Rolando.Caloca DR - vk - Skip seemingly bad validation error Change 3890573 by Daniel.Wright Only initialize FDiaphragmDOFGlobalResource in Feature Level 5 Change 3890590 by Arne.Schober DR - Fix Paper2d crash. When addMesh is called the Vertex and Indexbuffers are nulled out. re-create Dynamic Mesh builder for every Mesh instead. #jira UE-55063 Change 3890638 by Arne.Schober DR - Better fix for Paper2d which honors batching #jira UE-55063 Change 3891099 by Krzysztof.Narkowicz 1.5 texel shadow offset fix inside Manual2x2PCF based on #4485 GitHub pull request #jira UE-54985 #4485 Change 3891234 by Krzysztof.Narkowicz Optimized PCF2x2 and PCF3x3 - merged #4494 GithHub pull request #jira UE-55121 Change 3891407 by Rolando.Caloca DR - vk - Set vendor id earlier Change 3891417 by Rolando.Caloca DR - vk - Missing layout transitions Change 3891718 by Arne.Schober DR - Do not recreate one Frame Resource for dynamic draws #jira UE-55063 Change 3891925 by Yuriy.ODonnell Fix/workaround for inconsistent preprocessor definitions for NVAftermath that result in FD3D11DynamicRHI class layout mismatch. NVAftermath support is now enabled by default for Win64. NVAftermath is declared as a private dependency in D3D11RHI. It does not automatically propagate to modules that explicitly include private RHI headers (OculusHMD, OSVR, OSVRInput). This results in NV_AFTERMATH being defined while compiling RHI module and not defined when compiling other modules, causing memory corruption at runtime. The long-term solution for this and similar issues requires some mechanism for adding transitive module dependencies, so that anyone that depends on D3D11RHI module would automatically also get the NVAftermath. Additionally, private headers should *never* be included directly by external modules. The short-term solution is to explicitly add NVAftermath dependency to OculusHMD, OSVR and OSVRInput. Additionally, NV_AFTERMATH is no longer forced by D3D11RHIPrivate.h when it's not defined. This allows catching this kind of mismatch in the future through a compiler warning (C4668). #jira UE-53065 Change 3891987 by Rolando.Caloca DR - vk - Support for dedicated allocations Change 3892339 by Jian.Ru Fix a crash when tessellation shaders are used in dx12 #jira UE-55127 Change 3892528 by Rolando.Caloca DR - vk - Update Linux headers Change 3892867 by Rolando.Caloca DR - vk - Don't create swapchain if not needed Change 3893416 by Guillaume.Abadie Implements bokeh simmulation on foreground and background gather. Change 3893732 by Chris.Bunner GetRelevance_Internal should use the immediate parent resource, not the base, as some features are overridden by permutations e.g. UsesWorldPositionOffset. #jira UE-53404 Change 3893868 by Guillaume.Abadie Allocates diaphragm DOF's buffers and structered buffer only on supported platforms. Change 3893917 by Chris.Bunner Potential fix for CIS. Change 3893933 by Chris.Bunner Duplicating CL 2647737 as this is the same issue from that JIRA where accessing game-thread data was being prevented. We don't have this check in UMaterial::GetMaterialResource already, but presumably the UMaterialInstance case was never removed as we've not been calling it until now. Change 3894218 by Rolando.Caloca DR - vk - Remove stat counters per draw call, gains 10% CPU on Infiltrator Change 3894579 by Arne.Schober RT - Fix assert not in RenderingThread from Triangle Renderer. #jira UE-55247 Change 3894724 by Rolando.Caloca DR - vk - New API for batching barriers Change 3894909 by Arne.Schober DR - Fix crash in Speedtree wind where Renderdata is unavailable #jira UE-54544 Change 3895414 by Rolando.Caloca DR - Add a configurable threshold for SCWs time outs Change 3896429 by Marcus.Wassmer Allow variable frame-latency delay in FrameGrabber frames. For performance you want at least a 1 frame delay so you don't sync the GPU to the CPU. Change 3896495 by Marcus.Wassmer Set pointer properly Fix CIS Change 3897253 by Guillaume.Abadie Fixes CIS warning in diaphragm DOF Change 3899179 by Guillaume.Abadie Implements background hybrid scatter occlusion for diaphragm DOF. Change 3903654 by Rolando.Caloca DR - vk - Rework dump layer to allow other layers Change 3903766 by Rolando.Caloca DR - vk - More wrappers Change 3904025 by Rolando.Caloca DR - vk - More wrappers Change 3904342 by Rolando.Caloca DR - vk - Track image resources & callstacks Change 3904346 by Rolando.Caloca DR - vk - Copy fix from 4.19 for flickering grass Change 3904510 by Rolando.Caloca DR - vk - Compile fix Change 3904914 by Daniel.Wright [Integrate] Fixed PS4 transitions with forward shading Change 3904916 by Daniel.Wright [Integrate] Fixed PS4 transitions with occlusion queries Change 3905975 by Rolando.Caloca DR - vk - Missing wrappers Change 3905977 by Rolando.Caloca DR - vk - Missed file Change 3907829 by Rolando.Caloca DR - Move depth bounds to the PSO Change 3907832 by Rolando.Caloca DR - vk - Prep for delaying transitions Change 3907834 by Rolando.Caloca DR - vk - Fix for depth stencil issues/validation errors Change 3907967 by Rolando.Caloca DR - vk - Linux compile Change 3908093 by Rolando.Caloca DR - vk - Fix depthstencil layout on descriptors Change 3908393 by Rolando.Caloca DR - vk - Disable dedicated allocation as it causes crashes on Nvidia 700 series Change 3908401 by Rolando.Caloca DR - Do transitions outside render pass Change 3908422 by Rolando.Caloca DR - vk - Fix transition state not getting stored Change 3908735 by Guillaume.Abadie Cherry-pick 3896619: Fixes after TAAU post process material that had wrong default buffer UV. #jira UE-55317 Change 3908736 by Guillaume.Abadie Cherry-pick 3891352: Fixes ensure when visualizing HDR with TAAU. #jira UE-55019 Change 3908753 by Guillaume.Abadie Lets the renderer layout the views in the internal render targets like it prefers. Change 3909119 by Daniel.Wright Fix some static analysis warnings Change 3911943 by Rolando.Caloca DR - vk - Fix for packaging Vulkan projects Change 3912145 by Rolando.Caloca DR - vk - Fix layout on streaming textures Change 3913029 by Rolando.Caloca DR - Fix missing transition Change 3913048 by Rolando.Caloca DR - Fix for hlslcc Change 3913054 by Rolando.Caloca DR - vk - Fix number of layers on barrier Change 3913171 by Rolando.Caloca DR - vk - Fix for decal missing transition Change 3913211 by Rolando.Caloca DR - vk - Add debug name to image tracking Change 3913449 by Rolando.Caloca DR - vk - Restore transition Change 3913466 by Rolando.Caloca DR - Fix Vulkan EngineTest Change 3913537 by Rolando.Caloca DR - vk - Fixes independent samplers & textures (contributed by AMD) Change 3913548 by Rolando.Caloca DR - vk - Warning fix Change 3913691 by Rolando.Caloca DR - vk - Fixes for parallel (wip) Change 3914656 by Rolando.Caloca DR - vk - Fix bug when using separate samplerstates and textures Change 3914730 by Rolando.Caloca DR - vk - Bump version Change 3914764 by Rolando.Caloca DR - vk - Don't crash on exit Change 3915532 by Rolando.Caloca DR - vk - Parallel context fixes Change 3915589 by Rolando.Caloca DR - vk - Hoist and rename transition and layout manager class out of the context Change 3915592 by Rolando.Caloca DR - Fix gpu marker name Change 3917607 by Rolando.Caloca DR - vk - Fix depth bounds on Vulkan Change 3917609 by Rolando.Caloca DR - vk - Fix static analysis Change 3917616 by Rolando.Caloca DR - Fix D3D11 initialization Change 3920569 by Rolando.Caloca DR - vk - Prep for layout mgr refactor Change 3921023 by Rolando.Caloca DR - vk - Dump layer fixes Change 3921623 by Rolando.Caloca DR - vk - Prep refactor for layouts - Dump now shows marker tree Change 3922007 by Rolando.Caloca DR - vk - Fix extra allocation per draw call Change 3922442 by Rolando.Caloca DR - vk - Detect potential issues Change 3922470 by Rolando.Caloca DR - vk - Minor optimization Change 3922482 by Rolando.Caloca DR - vk - More minor optimizations Change 3923158 by Rolando.Caloca DR - Move r.DisableEngineAndAppRegistration out to common RHI and use it on Vulkan Change 3923486 by Rolando.Caloca DR - vk - Minor cpu optimizations Change 3923505 by Rolando.Caloca DR - vk - Use bigger allocations for uniform buffers Change 3923516 by Rolando.Caloca DR - vk - Android compile fix Change 3923557 by Rolando.Caloca DR - vk - Cache descriptorset layouts, refactor duplicated code Change 3923851 by Rolando.Caloca DR - vk - Linux compile fix Change 3924153 by Rolando.Caloca DR - vk - Support for dynamic UBs Change 3924193 by Rolando.Caloca DR - vk - Remove old per pso descriptor pools Change 3924197 by Rolando.Caloca DR - vk - Remove unused global uniform buffer pool Change 3924220 by Rolando.Caloca DR - vk - Wrap some unused classes in their define Change 3924234 by Rolando.Caloca DR - vk - Show ring buffer wrapping messages Change 3924243 by Rolando.Caloca DR - vk - Fix bad dynamic buffer Change 3924902 by Rolando.Caloca DR - vk - Fix crash running infiltrator Change 3925209 by Rolando.Caloca DR - vk - Fix bug with dynamic buffers - Remove old defines Change 3925300 by Rolando.Caloca DR - vk - Allow packed uniforms as dynamic UBs (with r.Vulkan.DynamicGlobalUBs) Change 3925627 by Rolando.Caloca DR - vk - Move DynamicOffsets into the pipeline state Change 3925834 by Rolando.Caloca DR - vk - Cache per stage information Change 3925835 by Daniel.Wright Fixed DisplayName for UParticleModuleCollisionGPU Change 3925897 by Rolando.Caloca DR - vk - Split update descriptors loop Change 3926488 by Rolando.Caloca DR - vk - 16MB for ring buffer on desktop, 8 MB for mobile Change 3928168 by Guillaume.Abadie Cherry-pick 3917219: Implements r.DOF.RecombineQuality Change 3928173 by Guillaume.Abadie Cherry-pick 3927888: Enables r.DOF.HybridScatter.BackgroundCompositing and r.DOF.HybridScatter.ForegroundCompositing to work when both enabled. Change 3928216 by Rolando.Caloca DR - vk - Fix Android - Fix static analysis Change 3929119 by Rolando.Caloca DR - vk - Rename some classes for clarity - Fix read-only cvar Change 3929151 by Rolando.Caloca DR - vk - Rename class Change 3930046 by Rolando.Caloca DR - Temp fix Vulkan flickering grass Change 3930148 by Rolando.Caloca DR - vk - Only update dirty descriptors - Use dynamic descriptors for packed global uniform buffers Change 3930998 by Guillaume.Abadie Packs shader permutation in different XGE submissions. Change 3931079 by Rolando.Caloca DR - vk - Fixes for Android and non-real ubs platforms Change 3931942 by Krzysztof.Narkowicz Depth rendering - When EarlyZPassMode is set to DDM_AllOccluders, dynamic objects need also to test bUseAsOccluder just like static ones #jira none Change 3932819 by Daniel.Wright [Integrate] Scene Textures uniform buffer * Base Pass Uniform Buffer now contains a Scene Textures uniform buffer. Previously the translucent base pass had to check ~40 loose scene texture parameters every draw. * FMeshMaterialShader's must now bind PassUniformBuffer and supply a valid pass uniform buffer. For most passes this is just FSceneTextureUniformParameters. * FRendererModule::DrawTileMesh can now cleanly set dummy scene texture resources, just by configuring how the pass uniform buffer is created. * Moved scene texture shader functions out of Common, into SceneTexturesCommon which must be manually included by shaders that want to use them * Separate Mobile Scene Textures uniform buffer to silo the platform complexities Moved DBuffer inputs out of FDeferredPixelShaderParameters and into FOpaqueBasePassUniformParameters Removed per-frame material uniform expressions. GameTime material node with period is now implemented with an fmod in the shader, without the use of MaterialFloat, so that it will happen at full precision. * Per-frame expressions were used when the GameTime material node had a period, to do the fmod on the CPU where 32 bit precision is guaranteed, for mobile GPU's where pixel shader precision is sometimes less than 32fp. Moved forward shading data into the Base Pass Uniform Buffer Removed instanced stereo support for the light cull grid - will have to be reimplemented without changing SRV's per draw Base pass sets View Uniform Buffer from DrawRenderState instead of choosing which one to set per-draw Fixed padding in nested uniform buffer structs Skip SRV members on Feature Level SM4 and below Change 3932964 by Rolando.Caloca DR - vk - Renderdoc on Android Change 3933095 by Daniel.Wright Moved FSceneTextureUniformParameters out of the opaque base pass uniform buffer. * Base Pass shaders now enable SCENE_TEXTURES_DISABLED when compiling for a material of any domain other than MD_Surface. These are used when rendering thumbnails of a material in a different domain, which could be opaque, but the opaque base pass drawing policy does not bind a scene textures uniform buffer, so the shader must not bind it. * Opaque materials can no longer use EyeAdaptation. Change 3933096 by Daniel.Wright Better d3d11 assert message when a uniform buffer was not set by the renderer Change 3933176 by Rolando.Caloca DR - vk - Prefer mailbox if available Change 3933271 by Ryan.Vance #jira UE-55936 Fixed missing referenced uniform bindings on AR pass-through camera shaders. Change 3934000 by Guillaume.Abadie Fixes Win32 build in ShaderCompilerXGE.cpp Change 3934299 by Guillaume.Abadie Fixes a bug in DOF's reduce operator that was casusing color leaking between background and foreground. Change 3934699 by Daniel.Wright Added bAffectDistanceFieldLighting to landscape Change 3935190 by Daniel.Wright Forward Light Grid SRV's use StructuredBuffer on Metal, instead of 'invariant Buffer', which throws off RemoveUniformBuffersFromSource parsing Change 3935606 by Daniel.Wright Removed LightmapPolicy::Set which was needed for vertex lightmaps Renamed FVertexFactory::Set to SetStreams to make it findable Change 3936510 by Rolando.Caloca DR - vk - Update glslangValidator.exe to 1.0.65.1 for dumped debug SPIRV shaders Change 3936545 by Richard.Wallis Clone of CL's (3925763, 3925430, 3925424, 3925385, 3925278) Mark Satt's Xcode fixes from task stream //Tasks/UE4/Dev-UERNDR-354-mtlpp/ Plus XCode 9.2 compile fix in ApplicationPlatformCompilerPreSetup.h for -Wunused-lambda-capture. Change 3938061 by Daniel.Wright Vulkan: Added support for SRV's in Uniform Buffers Change 3938123 by Daniel.Wright Vulkan: Slightly better assert for null resources in uniform buffer Change 3939197 by Rolando.Caloca DR - vk - Disable custom memory mgmt Change 3939677 by Rolando.Caloca DR - vk - Fix static analysis warning Change 3939809 by Rolando.Caloca DR - vk - Fixes for async compute Change 3939875 by Rolando.Caloca DR - vk - Support for -vktrace Change 3939977 by Rolando.Caloca DR - vk - Skip a condition during gather UBs - Set up efficient compute async var - Fix validation cmd line Change 3939982 by Rolando.Caloca DR - vk - Revert mipchain Change 3939984 by Rolando.Caloca DR - vk - Remove unnecessary asserts Change 3940082 by Rolando.Caloca DR - vk - Custom mem mgr Change 3940475 by Rolando.Caloca DR - vk - Fix DFAO (indirect draw offset) Change 3940555 by Rolando.Caloca DR - vk - Minor fixes Change 3940675 by Rolando.Caloca DR - vk - Fix indirect type mismatch Change 3941111 by Rolando.Caloca DR - Renderpass bGeneratingMips Change 3941847 by Daniel.Wright Fixed Volumetric Lightmaps on Static geometry only working if the geometry had been built with Surface Lightmaps before Change 3941978 by Rolando.Caloca DR - vk - Minor fixes for presenting on compute queue Change 3942074 by Rolando.Caloca DR - vk - Remove some RHI stalls - Fixed swap chain stat Change 3943946 by Daniel.Wright Fixed Texcoord0 on Volume materials on a particle sprite, including SubUV particles. Change 3944065 by Daniel.Wright Fixed SceneDepth collision getting broken on GPU particles when a scene capture is rendering Change 3944158 by Daniel.Wright Fixed ViewUniformShaderParameters accessing GEngine->PreIntegratedSkinBRDFTexture too early during slate loading screen Change 3944865 by Rolando.Caloca DR - vk - Prep for render passes Change 3945196 by Rolando.Caloca DR - Move render pass validate to cpp Change 3945202 by Rolando.Caloca DR - vk - Some fixes for using real render passes Change 3945357 by Rolando.Caloca DR - Fix bad condition Change 3946295 by Yuriy.ODonnell Added a sentinel member to FLightMap, which is initialized in the ctor and reset in the dtor. Sentinel is then checked in FLightCacheInterface::GetLightMapInteraction(). This aims to shed some more light on a hard-to-repro crash, which is suspected to be a use-after-free bug: http://crashreporter/Buggs/Show/1785593 Change 3946407 by Rolando.Caloca DR - vk - Prep for refactor Change 3946648 by Rolando.Caloca DR - vk - Fixes for async compute (wip) Change 3947299 by Rolando.Caloca DR - vk - FIx static analysis Change 3948434 by Rolando.Caloca DR - vk - Fix exiting with parallel Change 3948928 by Rolando.Caloca DR - vk - Fix enabling draw markers for tools Change 3949021 by Rolando.Caloca DR - vk - Buffer tracking layer Change 3949602 by Rolando.Caloca DR - vk - static analysis fix Change 3949757 by Rolando.Caloca DR - vk - Remove bogus parameter Change 3949810 by Rolando.Caloca DR - vk - Move waits for cmd buffer Change 3950270 by Guillaume.Abadie Implements dedicated gather pass for foreground hole filling to avoid being VGPR bound in foreground gather pass, but still being hable to amend foreground. Change 3950272 by Rolando.Caloca DR - vk - Minor refactor for semaphores Change 3950279 by Guillaume.Abadie Oups... fixes build Change 3950298 by Rolando.Caloca DR - vk - Gather wait semaphores in the cmd buffers Change 3950371 by Rolando.Caloca DR - vk - fixes for async compute Change 3950597 by Rolando.Caloca DR - vk - Fix for clip distance (fixes planar reflections) Change 3951075 by Rolando.Caloca DR - vk - Fix for async compute Change 3952524 by Guillaume.Abadie Some DOF enum refactoring. Change 3955016 by Daniel.Wright Fixed BuiltData package getting renamed into the map package during a content browser folder move, causing a redirector to be incorrectly placed in the map package Change 3955668 by Guillaume.Abadie Fixes a bug where full res coc buffer was computed even if not doing slight out of focus. Change 3956722 by Guillaume.Abadie Fixes a bug where r.DOF.MaximalForegroundBlurringRadius was screen percentage dependent. Change 3959212 by Guillaume.Abadie Prefixes all DOF's shaders files with DOF keyword. Change 3959705 by Guillaume.Abadie Optimises the DOF setup pass outputing half res and full res with LDS downsample. Change 3959941 by Guillaume.Abadie Halfs DOF's hybrid scatter compilation by using a unique downsampling for both foreground and background, instead of 2 reduce passes. Change 3962273 by Rolando.Caloca DR - Fix typos #jira UE-56317 PR #4586 Change 3962615 by Rolando.Caloca DR - vk - Compile fix Change 3962949 by Rolando.Caloca DR - Fix DOFDownsample extension Change 3962993 by Guillaume.Abadie Back out changelist 3962949 Change 3963016 by Guillaume.Abadie Adds missing DOFDownsample.usf Change 3963041 by Rolando.Caloca DR - vk - Misc changes to help integrate Change 3964293 by Guillaume.Abadie Fixes DOF's setup pass reading outside of the viewport. Change 3964475 by Guillaume.Abadie Collapses DOF's hybrid scatter compilation passes into reduce passes. Change 3964883 by Daniel.Wright Fixed 3d texture in uniform buffer on unsupporting RHI Change 3964897 by Rolando.Caloca DR - Compile fixes Change 3964914 by Guillaume.Abadie Fixes a bug on r.DOF.RecombineQuality=0 Change 3965153 by Guillaume.Abadie Fixes compile warning in D3D12Commands.cpp. Change 3965814 by Rolando.Caloca DR - Prep for integration conflict resolve Change 3965899 by Rolando.Caloca DR - Fix odd linkage issue Change 3966072 by Rolando.Caloca DR - More prep for merge Change 3966163 by Rolando.Caloca DR - Merge prep Change 3966844 by Guillaume.Abadie Packs multiple DOF scattered bokeh per instance and uses PT_RectList in DOF for platforms that can. Change 3967116 by Rolando.Caloca DR - Compile fixes for integration Change 3967273 by Rolando.Caloca DR - Use same path for mip generation Change 3967277 by Rolando.Caloca DR - vk - Fix mips on cubemaps Change 3967693 by Rolando.Caloca DR - Copying //UE4/Dev-Main@3912313 to //UE4-DevRendering, missing shaders Change 3967851 by Rolando.Caloca DR - Copying //UE4/Dev-Main@3912313 to //UE4-DevRendering, Engine 2/2 Change 3968083 by Rolando.Caloca DR - Integration compile fixes Change 3968240 by Rolando.Caloca DR - Shader compile fixes for integration Change 3968270 by Rolando.Caloca DR - Fix for missing hash calculation Change 3969426 by Rolando.Caloca DR - vk - Fix warning Change 3969869 by Krzysztof.Narkowicz Back out changelist 3946295 - UE-54537 is fixed, so no need for this debug sentinel. #jira none Change 3969944 by Rolando.Caloca DR - Warning fix Change 3970020 by Rolando.Caloca DR - Bump after integration Change 3970052 by Rolando.Caloca DR - Fix for mobile Change 3970236 by Daniel.Wright Causing decal shader to recompile to fix a merge bug Change 3970270 by Daniel.Wright Bump shader version from merge Change 3970339 by Olaf.Piesche Replace series of locks/unlocks with a single one for curve injection #tests QAGame Change 3970390 by Rolando.Caloca DR - Rename FSceneTextureUniformParameters to FSceneTexturesUniformParameters - Remove duplicate method for occlusion queries Change 3970523 by Rolando.Caloca DR - Fix serialization of shaders Change 3970533 by Arne.Schober DR - fix for removing the Speed tree wind when the scene gets deleted. The original enque rendercommand requeues the element onto the renderthread although the call already came from the Renderthread and the scene can get lost in between. #jira UE-56322 Change 3971160 by Guillaume.Abadie Fixes CompositeEditorPrimtive pass and SelectionOutline pass for VR editor to work with TAAU. Change 3971516 by Guillaume.Abadie Cherry-pick 3912629: Fixes SSR that was computing vigneting according to PrevScreen that could let some outside viewport samples going through when rotating the camera. #jira UE-55353 Change 3971594 by Krzysztof.Narkowicz Fixed assert inside BindLightMapVertexBuffer. FSplineMeshSceneProxy was calling BindLightMapVertexBuffer for invalid (still not generated) lightmap UV channel after mesh reimport. Simplified assert, as at the moment almost all of the high callsites already clamp lightmap uv channel. #jira UE-56321 Change 3971622 by Krzysztof.Narkowicz Fixed crash inside Indirect Lighting Cache. Data (reflection captures and lightmap) generation calls ULevel::GetOrCreateMapBuildData(), which can destroy lightmap data if level has legacy data. Last Lightmap generation step recreates this data, but if user cancels lightmap generation - it won't do that. #jira UE-56171 Change 3974788 by Rolando.Caloca DR - Remove GSupportsGenerateMips Change 3974789 by Rolando.Caloca DR - Remove bogus function Change 3974986 by Rolando.Caloca DR - vk - Tracking fixes Change 3974989 by Rolando.Caloca DR - vk - Don't submit dummy barriers Change 3975075 by Olaf.Piesche Update for particle curve injection improvement, fixing ES2 problems #tests QAGame tm-shadermodels, various color curve tests in-editor Change 3975957 by Uriel.Doyon Fixed invalid max texture resolution when using the bake material tools. Change 3978471 by Daniel.Wright New cvar r.SkylightUpdateEveryFrame Change 3978779 by Rolando.Caloca DR - Accessor for texture sizes Change 3978797 by Rolando.Caloca DR - Clean up RHI CopyTexture API Change 3978832 by Rolando.Caloca DR - vk - Workaround for RenderDoc crashing due to Descriptor Pool reset Change 3978836 by Rolando.Caloca DR - vk - Remove generate mips Change 3979201 by Rolando.Caloca DR - vk - RHI CopyTexture. Uses general layout for generating mips Change 3979204 by Rolando.Caloca DR - Use render passes and CopyTexture to generate mips Change 3979592 by Rolando.Caloca DR - Warning fix Change 3980855 by Krzysztof.Narkowicz Optimize bounding sphere radius after non-uniform scale by using bounding box extent. #jira UE-56227 Change 3981065 by Rolando.Caloca DR - vk - Fix bad layout #jira UE-56238 Change 3981346 by Rolando.Caloca DR - Copy from 3707257 Support for not flushing compute jobs (r.D3D11.UAVFlushNV) Change 3981347 by Rolando.Caloca DR - Copy from 3707257 Don't flush between morph dispatched Change 3981932 by Mark.Satterthwaite Generate the shader hash and function name when a Metal shader error needs to be reported so that even without shader code we get something to go on. Change 3982442 by Rolando.Caloca DR - Fix warning Change 3982652 by Rolando.Caloca DR - vk - Signal semaphore cleanup Change 3983917 by Richard.Wallis Clone of CL 3974146 converted for mtlpp along with extra mtlpp usage suggestions by Mark Satt: Fix for black flickering on first paint with weighted material landscape on Mac. When using AsyncCopyFromBufferToTexture in Metal we put the blit operation on the prologue encoder - however after a draw call using that resource the copy operation should happen after on the current encoder, this keeps the correct order of operations. Added Bool return from various Asnyc renderpass resource requests so caller can decide correct further action. Updated to include the other async functions. Change 3984409 by Guillaume.Abadie Attempts to make static analysis happy again. Change 3984435 by Nick.Bullard Checking in Performance Test level provided to us by Tor Frick based on UE-44841. This has been utilized for checking issues against Aftermath performance impact. The Map includes 2 Level Book marks, most testing has been done against Bookmark 1 view, in fullscreen, in game mode Change 3985087 by Mark.Satterthwaite Make sure that the particle scratch buffer is large enough to hold all the data for the curve texture we are rendering to, otherwise a full set of curves will start scribbling memory after 64Kb (the curve texture is 256Kb of data - 512x512x4 as sizeof(RGBAUInt8) == 4). This happens in ElementalDemo. Change 3985201 by Rolando.Caloca DR - Fix bad CopyTexture Change 3985258 by Mark.Satterthwaite Try and detect orientation changes so that we don't blow-up on iOS due to a huge mismatch between the drawable texture for the display and the scene's depth-stencil target. I can't just fiddle with the depth-stencil texture itself without running the risk of obliterating in-use data and really we shouldn't permit such a mismatch anyway but it is fallout from 3620990. #jira UE-55756 Change 3986449 by Rolando.Caloca DR - vk - Update & consolidate Vulkan headers to 1.1.70.1 Consolidate SDK into one Change 3986571 by Guillaume.Abadie Makes PVS-Studio happy again in DOF. Change 3987039 by Yuriy.ODonnell Initial implementation of tracing profiler to show CPU and multiple GPUs on the same timeline. Currently only supported on DX12 platforms. Use `TracingProfiler frames=N` console command to trigger a capture of the next N frames. Trace is saved to disk as a JSON file into `Saved/Profiling/Traces` directory. Trace file uses Google Tracing format and can be visualized in Chrome built-in profiler (chrome://tracing). `r.GPUStatsChildTimesIncluded=1` CVar makes timing scopes hierarchical. `TracingProfiler.BufferSize=N` CVar controls the size of the tracing buffer, which may need to be increased for long traces (default is 65k events). Only can be set at startup. Change 3987074 by Yuriy.ODonnell Implemented timestamp calibration on DX11. Calibration is only performed when tracing profiler session starts. Change 3987160 by Yuriy.ODonnell Added thread naming and ordering to the tracing profiler output Change 3987331 by Mark.Satterthwaite Remove the Nvidia hack to retain resource references in command-buffers for UE-46604 as the mtlpp refactor provides stronger resource lifetime guarantees. #jira UE-46604 Change 3987754 by Mark.Satterthwaite Fix MetalRHI memory reporting in non-default path. PR #4568 Change 3988184 by Arciel.Rekman Linux: Fix editor OpenGL performance (UE-55960). - GetCurrentThreadId() calls became much more frequent with the OpenGL RHIT refactor. - We used to only cache that value in monolithic builds, because having per-thread static variables in dynamic libraries is risky due to OS limits. - This change adds dynamically-managed per-thread cache for non-monolithic builds. #jira UE-55960 Change 3988394 by Rolando.Caloca DR - vk - Improve memory mgmt - Use 256MB pages for Device heap (or 1/8th if less). - Remove texture allocations not going through resource manager Change 3988405 by Marcin.Undak Fix VulkanQuery crash on exit #codereview rolando.caloca #codereview arciel.rekman #rb arciel.rekman Change 3988567 by Rolando.Caloca DR - vk - Support for packed global UBs on pci aperture heap Change 3988668 by Rolando.Caloca DR - vk - Remove old comments Change 3988956 by Marcin.Undak RecordPerformance: added option to skip building/cooking before tests #rb none #codereview arciel.rekman Change 3989161 by Yuriy.ODonnell Static analysis error fix Change 3989196 by Guillaume.Abadie Fixes a crash in light shaft's TAA pass. #jira UE-57366 Change 3989207 by Yuriy.ODonnell Refactored FRealtimeGPUProfilerFrame to avoid splitting profile events when calculating exclusive times of scopes. This allows tracing profiler to retain the hierarchical view of the data, while keeping CSV and GPU Stat system behavior intact. Change 3989469 by Rolando.Caloca DR - vk - Fix for bad index; fix for bad transition Change 3989772 by Yuriy.ODonnell Implemented timestamp calibration on Vulkan Change 3990040 by Marcus.Wassmer Aftermath enabled by default. Removed unnecessary warning for other vendors Change 3990064 by Mark.Satterthwaite Ensure that packed globals are reuploaded when the command-encoder is restarted - don't simply invalidate the existing parameters. This properly handles cases where a single logical render-pass is broken into multiple command-encoders and/or command-buffers - otherwise all shaders must reset all parameters each time. When we move between frames we *do* want to perform a full state reset though as previous frame globals are treated as invalid. Change 3990080 by Mark.Satterthwaite Change the way we invalidate the visibility buffer between command-buffers and command-encoders so that on iOS you can reuse the same buffer within the same command-buffer, but not across more than one. The code provides an exception to this rule when running under the MetalRHI validation tools which can break each draw call into its own buffer. Change 3990084 by Mark.Satterthwaite Get MetalStatistics compiling again. Change 3990381 by Arciel.Rekman Bring back D3D12 in RecordPerformance. Change 3991113 by Rolando.Caloca DR - Fix crash on RHI thread on mobile preview - Check RHI objects are not null in the PSO initializer Change 3991191 by Ryan.Vance #jira UE-55952 Reimplemented instanced stereo for forward lighting cull grid after the srv/ub clean up. Change 3991343 by Rolando.Caloca DR - Copy from 3911492 UE4 - Disabled parallel mobile bass pass by default. This is experiemental and not known to be useful on any mobile platform. Change 3991375 by Mark.Satterthwaite Proper copyright assignment in the mtlpp debugger header. Change 3993151 by Daniel.Wright Fix RTDF resource transition found by Rolando Change 3993818 by Rolando.Caloca DR - Missed file Change 3993923 by Krzysztof.Narkowicz Fixed crashes inside RemoveSpeedTreeWind() and RemoveSpeedTreeWind_RenderThread(). FStaticMeshComponentRecreateRenderStateContext didn't flush deferred render updates causing stale RenderData to be left: 1. Thumbnail manager called SetStaticMesh(nullptr), which added StaticMeshComponent to deferred render updates. 2. UStaticMesh::Build called FStaticMeshComponentRecreateRenderStateContext and destroyed DenderData, but didn't touch Thumbnail's manager StaticMeshComponent as it was nullptr. 3. This resulted in a StaticMeshComponent with stale RenderData pointer. #jira UE-54544 Change 3994033 by Rolando.Caloca DR - vk - Reworked layers & extensions, as we were not doing it properly - Remove -vulkanstandardvalidation and -novulkanstandardvalidation as they are not needed anymore Change 3994275 by Mark.Satterthwaite Change to linking against mtlpp via AddEngineThirdPartyPrivateStaticDependencies and marking its header with THIRD_PARTY_* macros in the vain hope that might convince the remote compilation code to distribute the module to the remote machine when building MetalRHI. #jira UE-57507 Change 3994365 by Mark.Satterthwaite Pilfer some code from the old MetalHeap file to handle calculating texture memory size on older macOS and iOS builds when running with stats or LLM enabled. #jira UE-57513 Change 3994382 by Rolando.Caloca DR - vk - Some missing locks during image tracking Change 3994422 by Rolando.Caloca DR - vk - Remove bogus shader format Change 3995530 by Rolando.Caloca DR - vk - Fix for crash when validation is enabled Change 3995531 by Rolando.Caloca DR - vk - Fix static analysis Change 3995532 by Rolando.Caloca DR - vk - Added support for r.Vulkan.SaveValidationCache Change 3995610 by Uriel.Doyon Texture Streaming Changes and Fixes: - Using the small FOV items (like scopes) now only affect visible primitives (through "r.Streaming.MaxHiddenPrimitiveViewBoost"). - Static components added after the level is registered in the streaming manager are now handled correctly (fixes the low quality on the chests) - Dynamic components do not need to register to the streaming manager anymore. - Optimized dynamic component management by removing duplicate entries in the update list. - Added a pregarbage collect pass to the dynamic component management to optimize GC handling. - Added a budget reset logic whenever the scene requirements change significantly. - PIE worlds now have correct visibility information. - Fixed possible invalid memory access when processing the streaming manager slave views. - Refactored the incremental level texture data build to prevent new components from being unhandled. - Removed StreamingManager callbacks for NotifyActorSpawned() and NotifyPrimitiveAttached() - Added a StreamingManager callback NotifyPrimitiveUpdated(), to be used whenever a primitive streaming state must be updated. #jira none Change 3995908 by Arciel.Rekman Fix compile errors when using new Vulkan queries. Change 3995990 by Arciel.Rekman More compile fixes to new Vulkan queries. - MSVC did not catch this, clang did. Change 3996101 by Rolando.Caloca DR - vk - Win32 compile fix Change 3996323 by Mark.Satterthwaite Use the right include path to export the mtlpp headers. #jira UE-57507 Change 3996392 by Arciel.Rekman Vulkan: fix crash on start when using new queries. - CommandBufferManager was not yet set at that point and the code in queries relied on it. Change 3996585 by Rolando.Caloca DR - Slight improvement to GL being black, but just a temporary 'workaround' as it's not correct. Change 3998806 by Arciel.Rekman Fix Linux build (UE-57602). #jira UE-57602 Change 3998866 by Arciel.Rekman SubwaySequencer: fix old shader platform name. Change 3998947 by Mark.Satterthwaite Silence deprecation warnings in CEF on macOS now that we've moved to 10.12 as the minimum. #jira UE-57577 Change 3998951 by Mark.Satterthwaite Fix last of the deprecation errors that I am aware of for macOS 10.12. #jira UE-57581 Change 3998984 by Mark.Satterthwaite Build mtlpp for iOS 9.0 not 9.3. #jira UE-57586 Change 3999065 by Rolando.Caloca DR - vk - Make sure we use version 1.0.0 #jira UE-57521 Change 3999071 by Arne.Schober DR - [UE-55433, UE-57361] Hack SNORM support in OpenGL by re-interpreting UNORM. Underlying data is always SNORM. #jira UE-55433, UE-57361 Change 3999494 by Rolando.Caloca DR - Enable r.UnbindResourcesBetweenDrawsInDX11 in debug - Clear compute resources when r.UnbindResourcesBetweenDrawsInDX11 is enabled Change 4000197 by Krzysztof.Narkowicz Mesh simplifier - normalize TexCoordWeights using min/max TexCoord range. This fixes precision issues for very big TexCoord values and allows to optimize for all TexCoord channels when channels have values of different magnitudes (e.g. non standard TexCoord data). #jira UE-54935 Change 4000305 by Yuriy.ODonnell Suppress PVS Studio warning V547 (Expression is always true) related to Aftermath Reported issue to PVS team and to NVIDIA. Confirmed false positive, fix coming in future PVS version (v6.24). #jira UE-57579 Change 4000853 by Arciel.Rekman Linux: fix not calling CrashReportClient (UE-57678). #jira UE-57678 Change 4001504 by Rolando.Caloca DR - vk - Fix transition Change 4002460 by Krzysztof.Narkowicz Toggle for contant shadow length in word space Exposed contact shadows to Blueprints #jira none Change 4002608 by Rolando.Caloca DR - vk - Fix static analysis - Fix potential debug image tracking crash - Comment out unused methods Change 4002615 by Rolando.Caloca DR - vk - Allow r.Vulkan.WaitForIdleOnSubmit to be set at startup (e.g. in ConsoleVariables.ini) Previously, if your map needed to UpdateSkyCaptureContents on startup, an ensure would fail if GWaitForIdleOnSubmit was set. PrepareForCPURead needs to wait for the command buffer to finish before trying to read the results back, but the wait has already happened when r.Vulkan.WaitForIdleOnSubmit is set. Trying to wait again correctly complains that the command buffer is not in the correct state. So, skip the WaitForCmdBuffer call when r.Vulkan.WaitForIdleOnSubmit is set. Change 4002640 by Rolando.Caloca DR - vk - Missing support for CVarDefaultBackBufferPixelFormat Change 4002919 by Guillaume.Abadie Implements DOF's temporal upsampling pass for better dynamic resolution stability. Change 4002984 by Guillaume.Abadie Integrates Sebastian Aaltonen's ALU optimisations for TAAU. Change 4003112 by Olaf.Piesche Fir for TBB stall (resulting in severe hitches and hangs in the editor with stats active); tested multiple scenarios and encountered no hitches. #tests QAGame PerformanceTest and RenderTest map with various stats on and off Change 4003159 by Mark.Satterthwaite Undo parts of changelist 3970553 - the ref-counted pointer approach to returning textures to the pool is not working as expected so we'll remove that. It'll be faster on the CPU without it and everything works thanks to the changes this CL made to the way textures were released. #jira UE-57538 Change 4003287 by zachary.wilson Adding reflection capture content to TM-LightingScenarios Change 4003395 by Arne.Schober DR - Fix unitzialised value when clicking Go To in the editor #jira UE-57048 Change 4003425 by Rolando.Caloca DR - vk - Fix for new occlusion queries Change 4003530 by Arne.Schober DR - Disable GPU Benchmark in headless configurations #jira UE-57673 Change 4003717 by Rolando.Caloca DR - vk - Fix for depth not store, stencil store Change 4003719 by Rolando.Caloca DR - Minor switch to render pass Change 4003720 by Mark.Satterthwaite Don't suballocate private memory buffers on Vega and only Vega as there is something wrong with the blits in those cases but I can't capture a GPU trace to find out what right now (the driver is broken) - could be a bug in my code but this works on Polaris and Nvidia so it will need to be filed as a radar for AMD. Remove the FMetalBufferChunk from FMetalBuffer and simply store a pointer to the owning Heap/Magazine allocator. The FMetalResourceHeap now calls a new Release function to return the buffer to the allocator which will be faster on the CPU. #jira UE-57659 Change 4003854 by Mark.Satterthwaite Undo parts of 3990064 and try a different approach to get the uniforms to upload and remain available in the right places. As the original bug has been lost to time we should keep an eye out for missing buffer bindings by running under the Metal validation layer periodically. #jira UE-57576 Change 4004709 by Rolando.Caloca DR - Support for D3D 11, 12 & Vulkan for UAVs off Index Buffers Change 4005149 by Guillaume.Abadie Adds shader permutation to avoid clamping input buffer UV in DOF's gather pass. Change 4005284 by Uriel.Doyon Resaved volume texture assets with proper engine version. #jira UE-57534 Change 4005286 by Guillaume.Abadie Reduces constant setup in DOF's gather pass. Change 4005359 by Rolando.Caloca DR - vk - Fix annoying warning Change 4005363 by Rolando.Caloca DR - Fix android not finding vulkan shaders Change 4005457 by Rolando.Caloca DR - vk - Fix swapchain crash Change 4005473 by Patrick.Kelly UE-57135: Editor crash if set Reflection Capture Resolution to be 64 and New a Default level Codde by Daniel Tested by Patrick Change 4005474 by Rolando.Caloca DR - vk - Remove glsl code from shaders. Packaged QAGame goes from 176MB to 162MB Change 4005759 by Krzysztof.Narkowicz Fixed a bug, where reflection capture build is called, even though we are in mobile preview mode. #jira UE-57743 Change 4005774 by Mark.Satterthwaite Update the wave intrinsics to avoid implicit bool->uint conversion that Apple don't like. #jira UE-57750 Change 4005974 by Mark.Satterthwaite Don't use cubemap array types on iOS Metal as they aren't available on all devices and we need to maintain backward compatibiliy for years to come. #jira UE-57083 Change 4006056 by Mark.Satterthwaite Remove the use of the PrimitiveType argument from Metal draw calls. #jira UE-57822 Change 4006139 by Mark.Satterthwaite - Move the render-pass functions into the MetalRHI implementation for later alteration. - Implement Index buffer UAVs for Metal - makes them more like vertex-buffers so this is one more step on the road to a unified buffer base-class implementation. Change 4006215 by Mark.Satterthwaite Metal's begin & end render/compute pass API implementation will take some time, but for now make it not depend on the parent stub implementation. Change 4006394 by Mark.Satterthwaite In lieu of a real instruction count just use the number of lines in the "Main" function of the shader as the instruction count for Metal. #jira UE-57551 Change 4006493 by Mark.Satterthwaite MetalRHI can currently support 4-component formats for Buffer UAVs - this might need some thought in the future as the API evolves but we might as well take advantage while we can. Change 4006495 by Daniel.Wright Integrate from Refactor branch * New FMaterialRenderProxy function GetMaterialWithFallback which provides both the FMaterialRenderProxy and FMaterial. Needed when falling back to default material, so that proxy and material resource match. * Local vertex factory uniform buffer Change 4006851 by Brian.Karis Fix for joined charts forming an L to inflate both axii. Thanks to Jess Kube of The Coalition. Change 4006852 by Brian.Karis Fix for hard coded reflection capture cube map size. Should fix light static light aliasing in captures Change 4006918 by Brian.Karis New ByteBuffer functionality. Memcpy and scatter upload. Can implement GPU side TArray reflection. Not yet used by checked in code. WIP optimization. Change 4007246 by Guillaume.Abadie Creates lower quality permutation for DOF's gathering pass, without Coc based weighting of the samples, and lower number of gathering ring for fast accumulator. Change 4007291 by Guillaume.Abadie Exposes more DOF scalability settings. Change 4007328 by Guillaume.Abadie Optimises DOF's half res only setup pass using gather4 Change 4007627 by Richard.Wallis Fix for when Magic Mouse cannot zoom in World Composition editor. Missing default SNodePanel::OnMouseMove behaviour. Tested using a classic 2xbutton + wheel mouse and a Mac MagicMouse. #jira UE-57030 Change 4007682 by Richard.Wallis No video when playing HLS streaming video on Mac. 2 Issues, FPS was zero making duration for video sample buffer nonsense and Video Track dimensions were going to zero on the AVAsset once fully initialized when playing HSL streams. Now cache relevant details and handle zero frame rate. Notes: - Caching the frame rate is not as important as we could look it up each time and fix for zero - ignoring that at the moment. - Assume we DO NOT want the FrameSize to be the last fetched video frame size from the AvfMediaVideoSampler as I think that is the video quality for streaming video and not the media frame size. - Renamed a variable in the AvfMediaVideoSample - was called FrameRate but it was the FrameDuration by that point. #jira UE-56734 Change 4007731 by Rolando.Caloca DR - Disable byte buffers on non-hlsl based platforms #jira UE-57851 Change 4007741 by Rolando.Caloca DR - Disable byte buffers on hlslcc platforms Change 4007782 by Mark.Satterthwaite Force Metal shaders, including the stdlib, to recompile. Change 4007918 by Rolando.Caloca DR - vk - Some static asserts Change 4008404 by Arciel.Rekman Do not crash on incompatible Vulkan drivers (UE-57521). #jira UE-57521 Change 4008442 by Daniel.Wright Better comments on ERHIFeatureLevel expectations Change 4008494 by Arne.Schober DR - moved bDeletedThroughDeferredCleanup before begincleanup to catch cases where the reference is added twice to the array. also removed finishcleanup as all they ever did was deleting the pointer anyway, and it sould be adfded if such functionallity is ever required fom outside of the regular destructor. #jira UE-57754 Change 4008730 by Mark.Satterthwaite After the most recent changes to handling uniform buffer dirty bits in MetalRHI we should guard against attempts to set an unbound uniform buffer. #jira UE-57870 Change 4008949 by Brian.Karis Fix compile warning Change 4008951 by Brian.Karis Added LTC LUT textures Change 4009326 by Guillaume.Abadie Compiles out DOF's gathering bokeh simulation on platform other than desktop. Change 4009380 by Krzysztof.Narkowicz Moved area light code before the contact shadows, so contact shadows use representative light's direction. Merged all contact shadows shader code. Contact shadows keep constant screen space length independent of FoV settings. Contact shadows for translucents. Contact shadows for eye. Change 4009555 by Guillaume.Abadie Splits DOFCocTile.usf in two. Change 4009999 by Yuriy.ODonnell MallocStomp can now be enabled on certain platforms using '-stompmalloc' command line argument. Previously it was necessary to modify MallocaStomp.h and re-compile the engine. Currently supported platforms: Win64, Mac, Linux. Replaced hard-coded page size with FPlatformMemory::GetConstants().PageSize. Change 4010288 by Rolando.Caloca DR - vk - Fix for vertex streams Change 4010289 by Krzysztof.Narkowicz D3D12 - fixed depth bounds bug, where depth bounds wasn't properly set to [0;1] after disabling. #jira UE-57510 Change 4010297 by Rolando.Caloca DR - vk - Remove some functions for android Change 4010315 by Rolando.Caloca DR - vk - Remove create info macro Change 4010451 by Rolando.Caloca DR - vk - Reuse samplers - Infiltrator goes from 5759 to 24 samplers! Change 4010627 by Rolando.Caloca DR - vk - Fix missing values for tracking swapchain validation Change 4011924 by Guillaume.Abadie Implements tile based early return optimisation on DOF's postfiltering method. Change 4011941 by Guillaume.Abadie Shaves some ALU in DOF's accumulator for LowQuality permutation. Change 4012093 by Yuriy.ODonnell Disable MallocStompOverrunTest() in static analysis config, as it intentionally performs an out-of-bounds access. Change 4012195 by Rolando.Caloca DR - vk - Fix for mobile backbuffer layout Change 4012202 by Rolando.Caloca DR - vk - Don't use staging buffers on UMA Change 4012467 by Rolando.Caloca DR - Remove redundant check Change 4012486 by Rolando.Caloca DR - Fix missing transition Change 4012518 by Guillaume.Abadie Implements fast shader permutation for DOF's TAA pass. Change 4013084 by Arciel.Rekman Fix for Linux clock discrepancy. - Causing at least one precision issue, possibly more. (Edigrating 4003273, 4012462 from //UE4/Dev-Editor/... to //UE4/Dev-Rendering/...) Change 4013266 by Uriel.Doyon Fixed crash when setting SceneDepthTextureNonMS and not having valid depth buffers in the SceneContext. Change 4013626 by Uriel.Doyon Fixed crash in the lighting build when creating a blueprint of the ALight and placing a light component in it. #jira UE-51672 Change 4013805 by Rolando.Caloca DR - Fix more missing transitions Change 4014128 by Arne.Schober DR - Do not create LocalVFUniformBuffer when running without MVF #jira UE-57929 Change 4014193 by Uriel.Doyon Editing component transforms now invalidate the component's lighting cache. #jira UE-48134 Change 4014282 by Rolando.Caloca DR - vk - Remove extra validation during dump Change 4014584 by Uriel.Doyon Duplicated static meshes now generate a new GUID to prevent possible issues with lightmass. #jira UE-49064 Change 4014604 by Uriel.Doyon UStaticMesh postduplicate now only generates a new GUID if !bDuplicateForPIE. Change 4015460 by Guillaume.Abadie Composes separate translucency within DOF's recombine pass. Change 4015571 by Guillaume.Abadie Refactors tonemapper to use global shader permutation API, that adds permutation for HDR output device rather than dynamic branching that some shader compiler are not very well optimizing. Change 4015984 by Krzysztof.Narkowicz Fixed crash inside DFAO resource allocation, when DFAO viewport has zero area. #jira UE-58000 Change 4016056 by Mark.Satterthwaite Fix Mac Metal shader compilation of texture cube arrays. Change 4016062 by Richard.Wallis Convert things like Space, Delete, F6 etc to unicode so they display correctly on the Mac menu rather than first letter of word. Added the default Mac commands to the GenericCommands so we get a Chord overwrite message and stop things like cmd+ q / w / h from getting bound. #jira UE-46999 Change 4016109 by Mark.Satterthwaite One unified Metal buffer implementation - will make further changes a heck of a lot easier. Change 4016221 by Patrick.Kelly UE-57617: Ensure changing viewmode to ShaderComplexity while in -game Change 4016238 by Guillaume.Abadie Makes clang happy again in Tonemapper. Change 4016309 by Mark.Satterthwaite More *_RenderThread implementations for MetalRHI. Change 4016414 by Mark.Satterthwaite And MetalRHI version of CreateStructuredBuffer_RenderThread... Change 4016498 by Mark.Satterthwaite Don't hold on to the uniform buffers bound to the hull shader when switching to a tessellated draw call as they'll have the wrong buffer layout. #jira UE-57930 Change 4017394 by Juan.Canada OpenGL: Fixed shading artifacts due incorrect UNORM/SNORM conversions in skin/skincache/computetangent shaderss. #jira UE-57691 Change 4017522 by Rolando.Caloca DR - vk - Remove unused code path (old mip generation detection) Change 4017539 by Rolando.Caloca DR - vk - Fix for sky lighting mips showing green on AMD Change 4017542 by Arciel.Rekman Moved appCountTrailingZeros to a non-SSE header (fixes ARM64 build). - Arguably WITH_SLI shouldn't apply to Linux on ARM but the fact that the function wasn't available is bad on its own. Change 4017827 by Guillaume.Abadie Optimises DOF's scattering cost by a third. Change 4017835 by Rolando.Caloca DR - Only allow a render pass to generate mips for one color render target Change 4017889 by Mark.Satterthwaite Cache all the Metal state objects to avoid hitting the API unnecessarily. Change 4018251 by Mark.Satterthwaite Fix broken rendering on Metal that tracked back to the innocuous looking changes in CL #4006495 (no blame attached - these changes are entirely reasonable) and cause various bugs in QAGame's TM-DistanceFields, ElementalDemo and probably more. Doesn't fix broken SpeedTree rendering :(. MetalRHI was allowing uniform buffers to blow away linear texture buffers when the constant buffer has been elided due to dead-code elimination. This problem can manifest without linear textures if the uniform buffer contains both constant data and a resource-table but the shader doesn't use any of the constant data. That's because Metal doesn't separate constant buffers from any other kind of buffer unlike D3D which separates all the slots out - and Metal doesn't provide enough buffers to emulate the D3D arrangement. So far this has only manifested in the MVF + Linear Texture case but a more robust solution will be necessary long term. Change 4018514 by Guillaume.Abadie Implements r.DOF.Scatter.MinCocRadius. Change 4018553 by Guillaume.Abadie Implements r.DOF.Scatter.MaxSpriteRatio to control the budget upperbound of DOF's scattering Change 4020369 by Yuriy.ODonnell Disable MallocStompOverrunTest in all static analysis configs (using USING_CODE_ANALYSIS macro) Previously was only disabled for PVS-Studio. Change 4020620 by Arciel.Rekman Fix XboxOne CIS (fallout of appCountTrailingZeros move). Change 4020949 by Guillaume.Abadie Configures DOF in scalability settings. Change 4021593 by Rolando.Caloca DR - vk - Support for Aftermath style api on AMD Change 4021740 by Rolando.Caloca DR - vk - Change log output Change 4022008 by Uriel.Doyon Fixed renderthread stalls when streaming texture mips on low end systems. Change 4022135 by Rolando.Caloca DR - vk - Fix last mip's layout during mip chain creation Change 4022607 by Jian.Ru Speculative fix for a bug where an invalid vertex buffer is deferenced #jira UE-56229 Change 4022890 by Rolando.Caloca DR - Fix reference count not getting released Change 4023540 by Mark.Satterthwaite Avoid some pointless retain/release calls on Metal Encoders. Change 4023796 by Marcus.Wassmer Tell users they are over the maximum size when allocating very large rendertargets. Change 4025337 by Yuriy.ODonnell Improved use-after-free detection mechanism and physical memory usage of MallocStomp on Windows. MallocStomp on Windows will now reserve virtual address space for every allocation and then commit physical pages only to the valid usable part. Physical pages will be unmapped on Free, but virtual address space will not be released and therefore will never be re-used. Virtual address space is allocated from the OS in blocks of 1GB and then linearly sub-allocated. This reduces VA space usage, as VirtualAlloc returns blocks on 64KB granularity even if we just need 4KB. As a small bonus, this also reduces number of syscalls per allocation. This dramatically increases accuracy of use-after-free detection, but consumes significant amount of memory for the OS page table. Virtual memory limit for a process on Win10 is 128 TB, which means we can afford to keep virtual memory reserved for a long time. Running Infiltrator demo consumes ~700MB of virtual address space per second. Additionally, committing physical pages only for the usable part of the entire virtual block reduces physical memory usage by ~30% compared to old behavior, which allocated and committed entire block of pages via BinnedAllocFromOS and then marks border page as non-accessible. Change 4026047 by Rolando.Caloca DR - Fix test/shipping #jira UE-58148 Change 4026150 by Krzysztof.Narkowicz Force proper ordering of buffer visualization materials - after tonemapping (so exposure doesn't influence it) and before editor stuff like icons. #jira UE-57992 Change 4026226 by Rolando.Caloca DR - Fix static analysis #jira UE-58150 Change 4026354 by Jian.Ru Debug check trying to catch a crash. Only enabled in editor build #jira UE-50111 Change 4026655 by Rolando.Caloca DR - Fix for static analysis #jira UE-58149 Change 4026763 by Rolando.Caloca DR - Remove references to defunct CCT to avoid confusing licensees Change 4027167 by Uriel.Doyon Fixed possible out of bound buffer access when serializing with FDuplicateDataWriter. #jira UE-56509 Change 4027850 by Jian.Ru Prevent log spam #jira UE-50111 Change 4029546 by Rolando.Caloca DR - Compile fixes Change 4029624 by Yuriy.ODonnell Addressed static analysis errors in MallocStomp - VirtualAlloc return value is now explicitly checked. - C6250 is suppressed, as VirtualFree does not release address space by design. Change 4030225 by Yuriy.ODonnell Static analysis warning fix: make sure declaration of Sleep() is consistent between Windows headers and TBB The complexity with this particular case is that the warning is generated in synchapi.h, which is included by some Windows headers. If a module includes TBB and then Windows platform headers, static analyzer will report this warning. Suppressing it would require wrapping all instances of Windows header includes in third-party macros. Current pragmatic solution is to modify the Sleep() declaration in TBB header to be consistent with Windows and to report the issue to Intel for a permanent fix. Change 4030440 by Rolando.Caloca DR - Fix crash on mobile #jira UE-58222 Change 4030570 by Daniel.Wright Allow null SRV's in uniform buffers for feature levels that don't support SRV's in shaders Change 4030618 by Arne.Schober DR - missing tangent/normal sign conversion after integration from main #jira UE-58224 Change 4031588 by Rolando.Caloca DR - vk - Fix compile error when missing vkCmdWriteBufferMarkerAMD Change 4032145 by Mark.Satterthwaite Fix UE-58268 by only emitting the base_instance/base_vertex variables required to fix-up the instance/vertex ID values to match D3D when the Metal version is 1.1 or higher, earlier versions don't support these features. #jira UE-58268 Change 4032209 by Rolando.Caloca DR - Fix crash on EngineTest: Mesh Batch's UserIndex is not a union anymore Change 4033178 by Guillaume.Abadie Fixes FXAA sampling outside viewports, that was causing black outline on bottom and right edge of the screen when ViewSize != BufferSize, problematic for some screenshot automated test. #jira UE-58151 Change 4034489 by Daniel.Wright Fixed UStaticMeshComponent modifying its UStaticMesh when undoing a change. This caused a crash when other static mesh components using the same mesh asset were rendered, since their rendering state was not recreated. A component should not modify its asset during PostEditUndo. * This behavior has been present for a long time but was previously hidden because only the vertex factory of the mesh asset is cached in static draw lists, not any of its rendering resources (eg vertex declaration). Change 4035157 by Uriel.Doyon Fixed deadlock in the streaming code when running with -onethread. #jira UE-58299 Change 4035198 by Rolando.Caloca DR - vk - Fix issue when an older SDK was installed, UBT would pick it (should pick the newer of ThirdParty\Vulkan or installed SDK). #jira UE-58267 Change 4035730 by Arne.Schober DR - Fix missing Fog parameters during LightScattering Injection #jira UE-57608 Change 4035843 by Daniel.Wright Reimplemented support for EyeAdaptation node in opaque materials Change 4036837 by Marcus.Wassmer Replace some of the screenshots to match new un-tonemapped buffer visualization Change 4036980 by Rolando.Caloca DR - vk - Fix deadlock contention during mem allocation on Linux Change 4037225 by Guillaume.Abadie Fixes jittering selection outline. #jira UE-58350 Change 4038056 by Marcus.Wassmer roll back changelist 4026150. breaks a bunch of automated tests by cutting off half the image. Change can go back in later with that part fixed also Change 4038296 by Jian.Ru Static analysis fix #jira UE-58377 Change 4038402 by Ben.Marsh Suppress IncludeTool warnings caused by CL 3998947. Change 4038514 by Arne.Schober DR - Fix case with MVF where instance offset is not supported by the API (in this case only foliage OpenGL and TvOS), usually the buffers are offsetted instead but with MVF we do not use offsetted buffers, therfore the offset needs to be passed into the shader although we are drawing with offset of 0. #jira UE-57652 Change 4038747 by Marcus.Wassmer Back out changelist 3853645, causing us to lose shadows in the shaderhair test Change 4040138 by Rolando.Caloca DR - Fix compile warning Change 4041614 by Rolando.Caloca DR - vk - Fix for Oculus module #jira UE-58267 Change 3810277 by Daniel.Wright Ray Traced Distance Field shadows use a two pass tile culling algorithm with no tile max - fixes flickering from tile overflow in dense areas or with a low sun angle. Costs .2ms on PS4. The distance field scene buffers now use float4 on PS4 and Xbox, saves .1ms on PS4. Change 3817029 by Uriel.Doyon Added UVolumeTexture, which use 3D textures. Compressed formats are supported on DX11, DX12, PS4 and XB1. Projects targetting OpengGL don't have access to compressed formats (as the implementation has texture tiling issues). Add "r.AllowVolumeTextureAssetCreation" set as 0 by default, which controls whether volume texture can be sampled in materials and whether they can be created from 2D texture assets. Platform not supporting BC7, will now fallback on RGBA8 instead of DXT to preserve quality, in an attemps to increase usage of BC7. #jira UE-32263 Change 3819960 by Michael.Lentine Expose UEPhysics Clothing Parameters through UI. Change 3823401 by Rolando.Caloca DR - Add NumQueriesInBatch to RHIBeginOcclusionQueryBatch Change 3844805 by Arne.Schober DR - Increased Intermediate normal of Umodel and Skelmesh from 8bit Unorm Compressed to float. A resave/rebuid/reimport of the meshes is recommended to recover some lost precision. Fixed an issue with compressed (packed) normals on the GPU which were off by one integer representation. Also switched from UNORM to SNORM to get a discrete zero representation and removed some mads from all the VertexShaders. Change 3847283 by Marcus.Wassmer Extra fixes from Uriel Change 3876607 by Rolando.Caloca DR - Use render passes when running occlusion queries - Removes the RHI(Begin|End)OcclusionQueryBatch API Change 3903799 by Daniel.Wright [Integrate] Pass Uniform Buffers * All pass-constant shader inputs should go into the appropriate pass uniform buffer, instead of being set per-draw * Moved many per-draw base pass parameters over to the Base Pass Uniform Buffer * Opaque and Translucent base pass shaders have different uniform buffers, which allows compile errors when accessing an invalid resource (eg GBuffer in Opaque), instead of silently falling back to GBlackTexture Uniform buffers can now contain nested structs with UNIFORM_MEMBER_STRUCT() * This allows composing a uniform buffer at a particular update frequency out of many features, with encapsulation of each feature's parameters in a struct. * Eg deferred fog uses FFogUniformParameters, but so does translucency in the base pass, where FFogUniformParameters is reused nested inside the base pass uniform buffer. * Resources can now be located anywhere in the uniform buffer. Padding is inserted to the cbuffer representation to keep memory layouts matching. In the future the cbuffer could be compacted. * RemoveUniformBuffersFromSource() which works around HLSLCC lack of struct initializers now handles nested structs Change 3917500 by Rolando.Caloca DR - Change depth bounds so only the enable bit is in the PSO, allow min/max to be dynamically modified Change 3964907 by Guillaume.Abadie Implements RectList topology support in RHI. Change 3979171 by Mark.Satterthwaite Copying //Tasks/UE4/Dev-UERNDR-354-mtlpp to Dev-Rendering (//UE4/Dev-Rendering): Rewrites MetalRHI in terms of mtlpp, which is a C++ wrapper library built around Metal's Objective-C API that attempts to reduce overheads and eliminate resource lifetime errors. Regarding mtlpp: - The mtlpp library uses C++ constructor/destructor and smart-pointer style management of Objective-C retain/release calls to prevent over- and under-release problems. - To reduce Objective-C overheads the mtlpp library caches the internal C-function that implements the Objective-C selectors for the most commonly used Metal protocol types and calls the function directly - this avoids objc_msgSend which does this look-up dynamically and thus improves CPU performance slightly. - Another advantage is that mtlpp provides infrastructure to extend the Metal API slightly to help improve MetalRHI - the two important aspects are mtlpp::CommandBufferFence which provides a consistent CPU<->GPU synchronisation primitive and sub-buffer allocations from mtlpp::Buffer which allow for far superior memory management. - Validation functionality is also provided by mtlpp to detect CPU vs. GPU data races and resource lifetime validation - this is expensive and is thus optional and compiled out from Shipping binaries that should be used when performance is most critical. The validation only works between resource modification and *submitted* command-buffers - anything that is being actively encoded on the CPU is ignored and it remains the responsibility of the application to validate the order of operations when encoding. Apple Platform: - LLM support which tracks Objective-C objects is enabled only on macOS - we don't have the necessary libraries to intercept and override the internal system calls on iOS. MetalRHI: - All the types are switched over, (mostly) insuling the external API from the horror of Metal and Objective-C. - Buffers are now managed quite differently, small buffers are allocated from a magazine allocator that allocates in fixed blocks from a larger parent buffer, intermediate sized buffers are allocated from a simple heap allocator that wraps a larger buffer and anything of reasonable size (>2Mb) will use the pooled allocator. This *radically* reduces the number of buffer resources, by as much as a factor of 10, because they are now sub-allocated without the need to use MTLHeap or MTLFence so they are performance equivalent to the existing implementation on the GPU and much faster on the CPU. Total memory use is approximately the same. - Vertex & index buffer management has been updated to reflect changes in the management and to avoid reallocating buffers which provide a Linear Texture (for SRVs) unless strictly necessary. This ensures that even in cases where a dynamic buffer is updated multiple times in a frame it will still work acceptably well. - The Metal ring-buffer implementation is completely different again, this time it can use Managed memory on macOS which allows for much better performance on eGPUs which will be more and more important for Mac. - Everyone that needs to wait on a command-buffer fence (rather than a command-buffer itself) now use mtlpp::CommandBufferFence, which prevents race conditions between the different command-buffer handlers (which sometimes execute out of order). - LLM tracking should now report the same data as the MetalRHI stats group for buffer & texture allocations - there is no segmentation for Vertex/index/Structured/Uniform allocations in Metal so these numbers are going to be wrong and will need to be rethought. - What will be unseen are the number of small but important resource usage fixes that avoid stale resources from being bound to the device after the point at which they become invalid. This should eliminate a class of errors where the GPU uses a resource pointer that is modified by the CPU and was necessary to satisfy the new mtlpp validation code. Other: - Remove the Metal focused workarounds from the ClothBuffer resource binding and related vertex-buffer SRV - these were put in when MetalRHI/MetalShaderFormat couldn't handle float->uint conversions correctly and they should now. - Fix a validation error caused by trying to render a 0-sized scissor rect which is invalid in Metal and simply pointless elsewhere. - Consistency of disabling the Manual Vertex Fetch behaviour in shaders. #jira UERNDR-354 Change 3979312 by Rolando.Caloca DR - Remove bogus bKeepOriginalSurface parameter in CopyToResolveTarget Change 4005122 by Rolando.Caloca DR - Support for PS4 Index Buffer UAVs Change 4016298 by Guillaume.Abadie Fixes DOF hybrid scattering on platforms that supports RectList topology. Change 4018575 by Guillaume.Abadie Optimises DOF's reduce pass when doing scattering compilation. Change 4020317 by Guillaume.Abadie Implements WaveBroadcastIntrinsics.ush. [CL 4042226 by Marcus Wassmer in Main branch]
2018-05-01 10:36:33 -04:00
const int32 MaxTextureSize = 1 << (MAX_TEXTURE_MIP_COUNT - 1); // Don't use GetMax2DTextureDimension() as this is for the RHI only.
const FIntPoint ClampedTargetSize(FMath::Clamp(InTargetSize.X, 1, MaxTextureSize), FMath::Clamp(InTargetSize.Y, 1, MaxTextureSize));
Copying //UE4/Dev-AnimPhys to //UE4/Dev-Main (Source: //UE4/Dev-AnimPhys @ 3537446) #lockdown Nick.Penwarden ===================================== MAJOR FEATURES + CHANGES ===================================== Change 3491514 by Jonathan.Poncelet Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units. Change 3495025 by Jonathan.Poncelet Back out changelist 3491514 "Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units." Change 3505086 by Danny.Bouimad Updating test content in TM-AnimPhys and TM-TangentNormals Change 3505375 by James.Cobbett Automating Settle test map Change 3505714 by Lina.Halper Add more descriptive pin name and node text for constraint node #jira: UE-45895 #rb: Ori.Cohen Change 3505731 by Lina.Halper 1. Renamed FTargetReference to FBoneSocketTarget - this allows users to choose either bone or socket as a target. 2. Two Bone IK refactor to use FBoneSocketTarget - Effector Target and Joint Target are converted to use FBoneSocketTarget, so you can use socket or bone - Effector Location and Joint Target Location is used as offset from target location, so you can use as a combination of FBoneSocketTarget - Editor code now uses runtime node instead of Graph Node, will have more discussion with Tom on this. 3. FABRIK refactor to use FBoneSocketTarget #code review: Laurent.Delayen, Martin.Wilson, Thomas.Sarkanen #rb: Laurent.Delayen Change 3505770 by Lina.Halper IK automation test #jira: UE-46250 Change 3506369 by Lina.Halper Fix initialization order issue #rb:none #rnx Change 3506697 by Martin.Wilson Fix root motion when using ForceAnimRate's of more than 1 #jira UE-39021 Change 3506765 by Lina.Halper It's confusing to see the same name multiple times. So fixed so that this utility functions show up later, and then added function instead. #jira: UE-45871 #rb: Martin.Wilson Change 3506787 by Ori.Cohen Added single threaded physx tasks stats using "stat PhysXTasks" Change 3506803 by Ori.Cohen Turn off debug code which was submitted by accident Change 3506840 by Jurre.deBaare Fix for automation vertex-color warning Change 3506917 by Danny.Bouimad Checking in Edits made to AnimBP Constraint Content Change 3507045 by James.Cobbett Submitting final Settle test map updates Change 3509208 by Danny.Bouimad Checking in content changes for TM-SuspendCloth Change 3509235 by James.Cobbett Deleting Settle test map from QAGame - now lives in EngineTest Change 3509935 by Lina.Halper One customization tree for supporting Bone and Socket : you can just use FBoneSocketTarget and that will allow displaying sockets also #jira: UE-45778 #rb: Thomas.Sarkanen #code review:Thomas.Sarkanen Change 3511250 by Martin.Wilson Fix crash when performing drag operations in the notify track window #jira UE-46420 Change 3511397 by Thomas.Sarkanen Asset reloading now defers re-opening asset editors until post-GC phase This prevents an issue in some asset editors (like Persona) which may reference other assets in their UI. #jira UE-46442 - Crash when opening skeletal mesh editor window after reloading asset Change 3512849 by Aaron.McLeran #jira UE-46576 Fixing granulator loading multiple sound waves Change 3513414 by James.Cobbett Fixing destructible test map Change 3513588 by Benn.Gallagher Clothing LOD improvements - Added full pipeline for adding LODs to clothing assets in editor - Added methods for mapping parameters between masks on meshes with differing topology - Fixed a few UI bugs Change 3513599 by Benn.Gallagher Missed files from last checkin Change 3513920 by Martin.Wilson Move Live Link Retarget Asset to live link plugin and remove engine dependency from LiveLinkInterface (fixes maya live link compiling) Change 3515400 by Aaron.McLeran #jira UE-46299 Added a fade in function to audio device so audio can resume after fading out in main audio device. Change 3515495 by Joe.Conley Had reports some AnimationBP deterministic cooking errors were being caused by FBakedAnimationState::bAlwaysResetOnEntry not being initialized in the constructor explicitly. Changing that to be explicitly initialized to false in the constructor, as well as UAnimStateNode::bAlwaysResetOnEntry. Change 3515641 by Benn.Gallagher CIS fix for game builds Change 3516817 by Aaron.McLeran Moving opus lib to subfolder Windows instead of win32 to fix UGS game sync issues. Change 3516853 by Aaron.McLeran Slight optimization in converting proc audio buffer to 16 bit PCM. Change 3517525 by Jonathan.Poncelet Fix comment for FName operator!= Change 3517826 by James.Cobbett Test files for bug UE-46719 Change 3518049 by James.Cobbett Updating Settle automated test map to include settling on convex floors. Also added step to save actor starting location in Ground Truth, and reset actors to that location at the end of the test. Change 3518185 by Ori.Cohen Fix merge error as reported by NVIDIA Change 3518711 by Ethan.Geller Integrating fix for switch crash on load level. Change 3518720 by Ethan.Geller Back out changelist 3518711 Change 3519040 by Aaron.McLeran Simple feature to add attack/decay interpolation times for focus feature to avoid fast focus/out-of-focus volume scaling. Change 3519972 by James.Golding Fix constructor order for FSoundAttenuationSettings to fix CIS Change 3520141 by Martin.Wilson Make retarget assets are no longer assets but blueprints instead. Add blueprint function for remap asset to allow blueprints to transform bone names #jira UEAP-235 Change 3520568 by Martin.Wilson CIS fix Change 3520677 by Benn.Gallagher Added ability to rename clothing assets after creation Change 3520727 by Benn.Gallagher Removed unecessary header for asset list Change 3520791 by Martin.Wilson Fix multiple calls to FinalizeBoneTransforms when calling RefreshBoneTransforms outside of tick Change 3521069 by Jurre.deBaare Merging an actor with recompute normal and Overlapping UVs causes the normals generate incorrectly #fix old code path was causing normals to be recompute when it wasn't required causing the smooth normals on the issueing asset #jira UE-46806 Change 3521070 by Jurre.deBaare Ensure occurs when performing a Bake Out Material on Cube #fix Make sure that we update the Material data used for texture streaming when adding/changing materials during material baking #jira UE-46807 Change 3521142 by Jurre.deBaare Bake Material large Texture size crash #fix Added clamping to baked out material texture sizes in all occurences (GetMax2DTextureDimension()) #jira UE-46808 Change 3523294 by Aaron.McLeran Resetting available byte count when resetting the procedural sound wave Change 3523297 by Aaron.McLeran Adding thread safe mode for plugin interface shared ptrs. Change 3524153 by Jurre.deBaare Issue where new blend space samples list in detailsview would not regenerate blendspace sampling #fix Unified what happens when you change the grid sample value (this issue also caused undo/redo not to work with these numeric boxes) Change 3524154 by Jurre.deBaare Advanced preview tool tip in BlendSpace editor has different behaviour when grid doesn't have focus, and broke the sample dragging functionality. #fix Undid some added state cleanup code which actually was invalid to do, and made sure the CTRL down isn't a toggle but a constant state Change 3524282 by Thomas.Sarkanen Fixed OculusAudio in line with new API Post-Main merge fixup Change 3524348 by Thomas.Sarkanen Merging using Dev-Physics-Upgrade_PhysX3_To_Dev-AnimPhys_PhysX Original CL 3521358: [From trunk] 22410436 [Px-1090]PCm sphere convex jittering in UE4 [Reviewer: Kier] p4rmerge of Change 22415420 by sschirm from e:\P4\dev1\sw\physx\Releases\distro_mirrors\PhysX_3.4_APEX_1.4\Mirror_scripts\patch/cl-22415420.p4r moved from //sw/physx/Releases/distro_mirrors/PhysX_3.4_APEX_1.4/Mirror/ to //UE4/Dev-Physics-Upgrade/Engine/Source/ThirdParty/PhysX3/ #jira UE-46668 - PCM still has stability issues Change 3524541 by Jurre.deBaare Disabled material baking automated tests for now #jira UE-46510 Change 3524684 by Jurre.deBaare If you paste a invalid name into Bones to Remove and hit apply the editor will crash #fix did not check for INDEX_NONE or NAME_NONE bones when retrieving bone indices #jira UE-46830 Change 3525244 by Ori.Cohen Added the ability to verify the DDC content is not stale. Systems have to opt in (-VerifyDDC) Change 3525248 by Ori.Cohen Physx DDC will now run with verify ddc. Also fixed bad key which was missing the complexity type of body setup Change 3525263 by Ori.Cohen Fix typo with printf Change 3525279 by Ori.Cohen Fix CIS Change 3525478 by Ethan.Geller Adding memory aligned audio buffer support Change 3525688 by Aaron.McLeran Removing unnecessary code Change 3526391 by Benn.Gallagher Clothing optimization pass, mainly removing allocations and precaching some skin information. 10% Overall non-gamethread time reduction, gamethread sync completion task time halved. #jira UEAP-197 Change 3526454 by Benn.Gallagher CIS fix Change 3526919 by Chad.Garyet adding verifyddc flag to automated tests Change 3527006 by Lina.Halper Fix crash with blendspace sample value change - Matt also fixed undo transaction, queuing every move vs only final value - Matt fixed property changed to send interactive or not paramger, so that it doens't call object changed for every single move #jira: UE-46929 #rb: Matt.Kuhlenschmidt #code review: Jurre.DeBaare, Matt.Kuhlenschmidt Change 3528684 by Benn.Gallagher Static analysis fix, excessive statement left to signal reasons clothing assets would invalidate their caches triggered SA fail. Change 3528687 by Benn.Gallagher CIS Fix, method definition outside of declaration #if block. Change 3528890 by Ori.Cohen Fix false negative with PIE and verify DDC Change 3528899 by Martin.Wilson Smart name refactor part 1 - Changed FindUID api to return UID rather than pointer to UID, fix code in Orion that was caching a pointer to internal TMap allocated memory. #jira UEAP-264 Change 3530148 by Aaron.McLeran Making check for Supporting multiple audio devices only happen in editor builds. Change 3530519 by Jonathan.Poncelet Deprecated original angular physics functions in preference of a consistent API, using degrees vs. radians Functions are now suffixed "InDegrees" or "InRadians", to make it obvious which are used. The deprecated functions now call whichever degrees or radians counterpart is needed. FBodyInstance now works entirely in radians, to avoid unnecessary conversions. Automated tests have been added to verify behaviour. #jira UE-39757 Torque and angular velocity are inconsistent Change 3530943 by Benn.Gallagher Fixed clothing shader model automation test. #jira UE-47052 Change 3530993 by Thomas.Sarkanen Merging using Dev-Rendering_To_Dev-AnimPhys from CL 3512333. Converting integrates to edits. Oiriginal CL desc: Texture source data is not released anymore in WillNeverCacheCookedPlatformDataAgain(). This prevents an issue where texture referenced through CompositeTexture have no source data available. This doesn't affect peak memory so much as texture loaded with AllowAsyncLoading already release their temporary load data. #jira UE-47083 - Cook Odin fails with LogTexture: Error: Unable to get texture source mips because its bulk data was released. Change 3536312 by Chad.Garyet adding verifyddc into the automatedtestbuild xml Change 3537375 by James.Golding Merge Ocean GC crash fix (OCN-7666) from CL 3512485 #jira UE-47211 DONE! [CL 3537460 by Thomas Sarkanen in Main branch]
2017-07-14 06:36:47 -04:00
auto RenderTargetComparison = [bInForceLinearGamma, InPixelFormat, ClampedTargetSize](const UTextureRenderTarget2D* CompareRenderTarget) -> bool
{
Copying //UE4/Dev-AnimPhys to //UE4/Dev-Main (Source: //UE4/Dev-AnimPhys @ 3537446) #lockdown Nick.Penwarden ===================================== MAJOR FEATURES + CHANGES ===================================== Change 3491514 by Jonathan.Poncelet Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units. Change 3495025 by Jonathan.Poncelet Back out changelist 3491514 "Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units." Change 3505086 by Danny.Bouimad Updating test content in TM-AnimPhys and TM-TangentNormals Change 3505375 by James.Cobbett Automating Settle test map Change 3505714 by Lina.Halper Add more descriptive pin name and node text for constraint node #jira: UE-45895 #rb: Ori.Cohen Change 3505731 by Lina.Halper 1. Renamed FTargetReference to FBoneSocketTarget - this allows users to choose either bone or socket as a target. 2. Two Bone IK refactor to use FBoneSocketTarget - Effector Target and Joint Target are converted to use FBoneSocketTarget, so you can use socket or bone - Effector Location and Joint Target Location is used as offset from target location, so you can use as a combination of FBoneSocketTarget - Editor code now uses runtime node instead of Graph Node, will have more discussion with Tom on this. 3. FABRIK refactor to use FBoneSocketTarget #code review: Laurent.Delayen, Martin.Wilson, Thomas.Sarkanen #rb: Laurent.Delayen Change 3505770 by Lina.Halper IK automation test #jira: UE-46250 Change 3506369 by Lina.Halper Fix initialization order issue #rb:none #rnx Change 3506697 by Martin.Wilson Fix root motion when using ForceAnimRate's of more than 1 #jira UE-39021 Change 3506765 by Lina.Halper It's confusing to see the same name multiple times. So fixed so that this utility functions show up later, and then added function instead. #jira: UE-45871 #rb: Martin.Wilson Change 3506787 by Ori.Cohen Added single threaded physx tasks stats using "stat PhysXTasks" Change 3506803 by Ori.Cohen Turn off debug code which was submitted by accident Change 3506840 by Jurre.deBaare Fix for automation vertex-color warning Change 3506917 by Danny.Bouimad Checking in Edits made to AnimBP Constraint Content Change 3507045 by James.Cobbett Submitting final Settle test map updates Change 3509208 by Danny.Bouimad Checking in content changes for TM-SuspendCloth Change 3509235 by James.Cobbett Deleting Settle test map from QAGame - now lives in EngineTest Change 3509935 by Lina.Halper One customization tree for supporting Bone and Socket : you can just use FBoneSocketTarget and that will allow displaying sockets also #jira: UE-45778 #rb: Thomas.Sarkanen #code review:Thomas.Sarkanen Change 3511250 by Martin.Wilson Fix crash when performing drag operations in the notify track window #jira UE-46420 Change 3511397 by Thomas.Sarkanen Asset reloading now defers re-opening asset editors until post-GC phase This prevents an issue in some asset editors (like Persona) which may reference other assets in their UI. #jira UE-46442 - Crash when opening skeletal mesh editor window after reloading asset Change 3512849 by Aaron.McLeran #jira UE-46576 Fixing granulator loading multiple sound waves Change 3513414 by James.Cobbett Fixing destructible test map Change 3513588 by Benn.Gallagher Clothing LOD improvements - Added full pipeline for adding LODs to clothing assets in editor - Added methods for mapping parameters between masks on meshes with differing topology - Fixed a few UI bugs Change 3513599 by Benn.Gallagher Missed files from last checkin Change 3513920 by Martin.Wilson Move Live Link Retarget Asset to live link plugin and remove engine dependency from LiveLinkInterface (fixes maya live link compiling) Change 3515400 by Aaron.McLeran #jira UE-46299 Added a fade in function to audio device so audio can resume after fading out in main audio device. Change 3515495 by Joe.Conley Had reports some AnimationBP deterministic cooking errors were being caused by FBakedAnimationState::bAlwaysResetOnEntry not being initialized in the constructor explicitly. Changing that to be explicitly initialized to false in the constructor, as well as UAnimStateNode::bAlwaysResetOnEntry. Change 3515641 by Benn.Gallagher CIS fix for game builds Change 3516817 by Aaron.McLeran Moving opus lib to subfolder Windows instead of win32 to fix UGS game sync issues. Change 3516853 by Aaron.McLeran Slight optimization in converting proc audio buffer to 16 bit PCM. Change 3517525 by Jonathan.Poncelet Fix comment for FName operator!= Change 3517826 by James.Cobbett Test files for bug UE-46719 Change 3518049 by James.Cobbett Updating Settle automated test map to include settling on convex floors. Also added step to save actor starting location in Ground Truth, and reset actors to that location at the end of the test. Change 3518185 by Ori.Cohen Fix merge error as reported by NVIDIA Change 3518711 by Ethan.Geller Integrating fix for switch crash on load level. Change 3518720 by Ethan.Geller Back out changelist 3518711 Change 3519040 by Aaron.McLeran Simple feature to add attack/decay interpolation times for focus feature to avoid fast focus/out-of-focus volume scaling. Change 3519972 by James.Golding Fix constructor order for FSoundAttenuationSettings to fix CIS Change 3520141 by Martin.Wilson Make retarget assets are no longer assets but blueprints instead. Add blueprint function for remap asset to allow blueprints to transform bone names #jira UEAP-235 Change 3520568 by Martin.Wilson CIS fix Change 3520677 by Benn.Gallagher Added ability to rename clothing assets after creation Change 3520727 by Benn.Gallagher Removed unecessary header for asset list Change 3520791 by Martin.Wilson Fix multiple calls to FinalizeBoneTransforms when calling RefreshBoneTransforms outside of tick Change 3521069 by Jurre.deBaare Merging an actor with recompute normal and Overlapping UVs causes the normals generate incorrectly #fix old code path was causing normals to be recompute when it wasn't required causing the smooth normals on the issueing asset #jira UE-46806 Change 3521070 by Jurre.deBaare Ensure occurs when performing a Bake Out Material on Cube #fix Make sure that we update the Material data used for texture streaming when adding/changing materials during material baking #jira UE-46807 Change 3521142 by Jurre.deBaare Bake Material large Texture size crash #fix Added clamping to baked out material texture sizes in all occurences (GetMax2DTextureDimension()) #jira UE-46808 Change 3523294 by Aaron.McLeran Resetting available byte count when resetting the procedural sound wave Change 3523297 by Aaron.McLeran Adding thread safe mode for plugin interface shared ptrs. Change 3524153 by Jurre.deBaare Issue where new blend space samples list in detailsview would not regenerate blendspace sampling #fix Unified what happens when you change the grid sample value (this issue also caused undo/redo not to work with these numeric boxes) Change 3524154 by Jurre.deBaare Advanced preview tool tip in BlendSpace editor has different behaviour when grid doesn't have focus, and broke the sample dragging functionality. #fix Undid some added state cleanup code which actually was invalid to do, and made sure the CTRL down isn't a toggle but a constant state Change 3524282 by Thomas.Sarkanen Fixed OculusAudio in line with new API Post-Main merge fixup Change 3524348 by Thomas.Sarkanen Merging using Dev-Physics-Upgrade_PhysX3_To_Dev-AnimPhys_PhysX Original CL 3521358: [From trunk] 22410436 [Px-1090]PCm sphere convex jittering in UE4 [Reviewer: Kier] p4rmerge of Change 22415420 by sschirm from e:\P4\dev1\sw\physx\Releases\distro_mirrors\PhysX_3.4_APEX_1.4\Mirror_scripts\patch/cl-22415420.p4r moved from //sw/physx/Releases/distro_mirrors/PhysX_3.4_APEX_1.4/Mirror/ to //UE4/Dev-Physics-Upgrade/Engine/Source/ThirdParty/PhysX3/ #jira UE-46668 - PCM still has stability issues Change 3524541 by Jurre.deBaare Disabled material baking automated tests for now #jira UE-46510 Change 3524684 by Jurre.deBaare If you paste a invalid name into Bones to Remove and hit apply the editor will crash #fix did not check for INDEX_NONE or NAME_NONE bones when retrieving bone indices #jira UE-46830 Change 3525244 by Ori.Cohen Added the ability to verify the DDC content is not stale. Systems have to opt in (-VerifyDDC) Change 3525248 by Ori.Cohen Physx DDC will now run with verify ddc. Also fixed bad key which was missing the complexity type of body setup Change 3525263 by Ori.Cohen Fix typo with printf Change 3525279 by Ori.Cohen Fix CIS Change 3525478 by Ethan.Geller Adding memory aligned audio buffer support Change 3525688 by Aaron.McLeran Removing unnecessary code Change 3526391 by Benn.Gallagher Clothing optimization pass, mainly removing allocations and precaching some skin information. 10% Overall non-gamethread time reduction, gamethread sync completion task time halved. #jira UEAP-197 Change 3526454 by Benn.Gallagher CIS fix Change 3526919 by Chad.Garyet adding verifyddc flag to automated tests Change 3527006 by Lina.Halper Fix crash with blendspace sample value change - Matt also fixed undo transaction, queuing every move vs only final value - Matt fixed property changed to send interactive or not paramger, so that it doens't call object changed for every single move #jira: UE-46929 #rb: Matt.Kuhlenschmidt #code review: Jurre.DeBaare, Matt.Kuhlenschmidt Change 3528684 by Benn.Gallagher Static analysis fix, excessive statement left to signal reasons clothing assets would invalidate their caches triggered SA fail. Change 3528687 by Benn.Gallagher CIS Fix, method definition outside of declaration #if block. Change 3528890 by Ori.Cohen Fix false negative with PIE and verify DDC Change 3528899 by Martin.Wilson Smart name refactor part 1 - Changed FindUID api to return UID rather than pointer to UID, fix code in Orion that was caching a pointer to internal TMap allocated memory. #jira UEAP-264 Change 3530148 by Aaron.McLeran Making check for Supporting multiple audio devices only happen in editor builds. Change 3530519 by Jonathan.Poncelet Deprecated original angular physics functions in preference of a consistent API, using degrees vs. radians Functions are now suffixed "InDegrees" or "InRadians", to make it obvious which are used. The deprecated functions now call whichever degrees or radians counterpart is needed. FBodyInstance now works entirely in radians, to avoid unnecessary conversions. Automated tests have been added to verify behaviour. #jira UE-39757 Torque and angular velocity are inconsistent Change 3530943 by Benn.Gallagher Fixed clothing shader model automation test. #jira UE-47052 Change 3530993 by Thomas.Sarkanen Merging using Dev-Rendering_To_Dev-AnimPhys from CL 3512333. Converting integrates to edits. Oiriginal CL desc: Texture source data is not released anymore in WillNeverCacheCookedPlatformDataAgain(). This prevents an issue where texture referenced through CompositeTexture have no source data available. This doesn't affect peak memory so much as texture loaded with AllowAsyncLoading already release their temporary load data. #jira UE-47083 - Cook Odin fails with LogTexture: Error: Unable to get texture source mips because its bulk data was released. Change 3536312 by Chad.Garyet adding verifyddc into the automatedtestbuild xml Change 3537375 by James.Golding Merge Ocean GC crash fix (OCN-7666) from CL 3512485 #jira UE-47211 DONE! [CL 3537460 by Thomas Sarkanen in Main branch]
2017-07-14 06:36:47 -04:00
return (CompareRenderTarget->SizeX == ClampedTargetSize.X && CompareRenderTarget->SizeY == ClampedTargetSize.Y && CompareRenderTarget->OverrideFormat == InPixelFormat && CompareRenderTarget->bForceLinearGamma == bInForceLinearGamma);
};
// Find any pooled render target with suitable properties.
UTextureRenderTarget2D** FindResult = RenderTargetPool.FindByPredicate(RenderTargetComparison);
if (FindResult)
{
RenderTarget = *FindResult;
}
else
{
TRACE_CPUPROFILER_EVENT_SCOPE(CreateNewRenderTarget)
// Not found - create a new one.
RenderTarget = NewObject<UTextureRenderTarget2D>();
check(RenderTarget);
RenderTarget->AddToRoot();
RenderTarget->ClearColor = FLinearColor(1.0f, 0.0f, 1.0f);
RenderTarget->ClearColor.A = 1.0f;
RenderTarget->TargetGamma = 0.0f;
Copying //UE4/Dev-AnimPhys to //UE4/Dev-Main (Source: //UE4/Dev-AnimPhys @ 3537446) #lockdown Nick.Penwarden ===================================== MAJOR FEATURES + CHANGES ===================================== Change 3491514 by Jonathan.Poncelet Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units. Change 3495025 by Jonathan.Poncelet Back out changelist 3491514 "Added new functions AddTorqueDegrees and AddAngularImpulseDegrees to UPrimitiveComponent Provided automated tests ensure that the angular velocity is consistent for each of the new functions by comparing it with an equivalent call to the original function. #jira UE-39757 Torque and angular velocity are inconsistent #automation Tests verify that AddTorque/AddTorqueDegrees and AddAngularImpulse/AddAngularImpulseDegrees both produce the correct angular velocity, when passed the same value in different units." Change 3505086 by Danny.Bouimad Updating test content in TM-AnimPhys and TM-TangentNormals Change 3505375 by James.Cobbett Automating Settle test map Change 3505714 by Lina.Halper Add more descriptive pin name and node text for constraint node #jira: UE-45895 #rb: Ori.Cohen Change 3505731 by Lina.Halper 1. Renamed FTargetReference to FBoneSocketTarget - this allows users to choose either bone or socket as a target. 2. Two Bone IK refactor to use FBoneSocketTarget - Effector Target and Joint Target are converted to use FBoneSocketTarget, so you can use socket or bone - Effector Location and Joint Target Location is used as offset from target location, so you can use as a combination of FBoneSocketTarget - Editor code now uses runtime node instead of Graph Node, will have more discussion with Tom on this. 3. FABRIK refactor to use FBoneSocketTarget #code review: Laurent.Delayen, Martin.Wilson, Thomas.Sarkanen #rb: Laurent.Delayen Change 3505770 by Lina.Halper IK automation test #jira: UE-46250 Change 3506369 by Lina.Halper Fix initialization order issue #rb:none #rnx Change 3506697 by Martin.Wilson Fix root motion when using ForceAnimRate's of more than 1 #jira UE-39021 Change 3506765 by Lina.Halper It's confusing to see the same name multiple times. So fixed so that this utility functions show up later, and then added function instead. #jira: UE-45871 #rb: Martin.Wilson Change 3506787 by Ori.Cohen Added single threaded physx tasks stats using "stat PhysXTasks" Change 3506803 by Ori.Cohen Turn off debug code which was submitted by accident Change 3506840 by Jurre.deBaare Fix for automation vertex-color warning Change 3506917 by Danny.Bouimad Checking in Edits made to AnimBP Constraint Content Change 3507045 by James.Cobbett Submitting final Settle test map updates Change 3509208 by Danny.Bouimad Checking in content changes for TM-SuspendCloth Change 3509235 by James.Cobbett Deleting Settle test map from QAGame - now lives in EngineTest Change 3509935 by Lina.Halper One customization tree for supporting Bone and Socket : you can just use FBoneSocketTarget and that will allow displaying sockets also #jira: UE-45778 #rb: Thomas.Sarkanen #code review:Thomas.Sarkanen Change 3511250 by Martin.Wilson Fix crash when performing drag operations in the notify track window #jira UE-46420 Change 3511397 by Thomas.Sarkanen Asset reloading now defers re-opening asset editors until post-GC phase This prevents an issue in some asset editors (like Persona) which may reference other assets in their UI. #jira UE-46442 - Crash when opening skeletal mesh editor window after reloading asset Change 3512849 by Aaron.McLeran #jira UE-46576 Fixing granulator loading multiple sound waves Change 3513414 by James.Cobbett Fixing destructible test map Change 3513588 by Benn.Gallagher Clothing LOD improvements - Added full pipeline for adding LODs to clothing assets in editor - Added methods for mapping parameters between masks on meshes with differing topology - Fixed a few UI bugs Change 3513599 by Benn.Gallagher Missed files from last checkin Change 3513920 by Martin.Wilson Move Live Link Retarget Asset to live link plugin and remove engine dependency from LiveLinkInterface (fixes maya live link compiling) Change 3515400 by Aaron.McLeran #jira UE-46299 Added a fade in function to audio device so audio can resume after fading out in main audio device. Change 3515495 by Joe.Conley Had reports some AnimationBP deterministic cooking errors were being caused by FBakedAnimationState::bAlwaysResetOnEntry not being initialized in the constructor explicitly. Changing that to be explicitly initialized to false in the constructor, as well as UAnimStateNode::bAlwaysResetOnEntry. Change 3515641 by Benn.Gallagher CIS fix for game builds Change 3516817 by Aaron.McLeran Moving opus lib to subfolder Windows instead of win32 to fix UGS game sync issues. Change 3516853 by Aaron.McLeran Slight optimization in converting proc audio buffer to 16 bit PCM. Change 3517525 by Jonathan.Poncelet Fix comment for FName operator!= Change 3517826 by James.Cobbett Test files for bug UE-46719 Change 3518049 by James.Cobbett Updating Settle automated test map to include settling on convex floors. Also added step to save actor starting location in Ground Truth, and reset actors to that location at the end of the test. Change 3518185 by Ori.Cohen Fix merge error as reported by NVIDIA Change 3518711 by Ethan.Geller Integrating fix for switch crash on load level. Change 3518720 by Ethan.Geller Back out changelist 3518711 Change 3519040 by Aaron.McLeran Simple feature to add attack/decay interpolation times for focus feature to avoid fast focus/out-of-focus volume scaling. Change 3519972 by James.Golding Fix constructor order for FSoundAttenuationSettings to fix CIS Change 3520141 by Martin.Wilson Make retarget assets are no longer assets but blueprints instead. Add blueprint function for remap asset to allow blueprints to transform bone names #jira UEAP-235 Change 3520568 by Martin.Wilson CIS fix Change 3520677 by Benn.Gallagher Added ability to rename clothing assets after creation Change 3520727 by Benn.Gallagher Removed unecessary header for asset list Change 3520791 by Martin.Wilson Fix multiple calls to FinalizeBoneTransforms when calling RefreshBoneTransforms outside of tick Change 3521069 by Jurre.deBaare Merging an actor with recompute normal and Overlapping UVs causes the normals generate incorrectly #fix old code path was causing normals to be recompute when it wasn't required causing the smooth normals on the issueing asset #jira UE-46806 Change 3521070 by Jurre.deBaare Ensure occurs when performing a Bake Out Material on Cube #fix Make sure that we update the Material data used for texture streaming when adding/changing materials during material baking #jira UE-46807 Change 3521142 by Jurre.deBaare Bake Material large Texture size crash #fix Added clamping to baked out material texture sizes in all occurences (GetMax2DTextureDimension()) #jira UE-46808 Change 3523294 by Aaron.McLeran Resetting available byte count when resetting the procedural sound wave Change 3523297 by Aaron.McLeran Adding thread safe mode for plugin interface shared ptrs. Change 3524153 by Jurre.deBaare Issue where new blend space samples list in detailsview would not regenerate blendspace sampling #fix Unified what happens when you change the grid sample value (this issue also caused undo/redo not to work with these numeric boxes) Change 3524154 by Jurre.deBaare Advanced preview tool tip in BlendSpace editor has different behaviour when grid doesn't have focus, and broke the sample dragging functionality. #fix Undid some added state cleanup code which actually was invalid to do, and made sure the CTRL down isn't a toggle but a constant state Change 3524282 by Thomas.Sarkanen Fixed OculusAudio in line with new API Post-Main merge fixup Change 3524348 by Thomas.Sarkanen Merging using Dev-Physics-Upgrade_PhysX3_To_Dev-AnimPhys_PhysX Original CL 3521358: [From trunk] 22410436 [Px-1090]PCm sphere convex jittering in UE4 [Reviewer: Kier] p4rmerge of Change 22415420 by sschirm from e:\P4\dev1\sw\physx\Releases\distro_mirrors\PhysX_3.4_APEX_1.4\Mirror_scripts\patch/cl-22415420.p4r moved from //sw/physx/Releases/distro_mirrors/PhysX_3.4_APEX_1.4/Mirror/ to //UE4/Dev-Physics-Upgrade/Engine/Source/ThirdParty/PhysX3/ #jira UE-46668 - PCM still has stability issues Change 3524541 by Jurre.deBaare Disabled material baking automated tests for now #jira UE-46510 Change 3524684 by Jurre.deBaare If you paste a invalid name into Bones to Remove and hit apply the editor will crash #fix did not check for INDEX_NONE or NAME_NONE bones when retrieving bone indices #jira UE-46830 Change 3525244 by Ori.Cohen Added the ability to verify the DDC content is not stale. Systems have to opt in (-VerifyDDC) Change 3525248 by Ori.Cohen Physx DDC will now run with verify ddc. Also fixed bad key which was missing the complexity type of body setup Change 3525263 by Ori.Cohen Fix typo with printf Change 3525279 by Ori.Cohen Fix CIS Change 3525478 by Ethan.Geller Adding memory aligned audio buffer support Change 3525688 by Aaron.McLeran Removing unnecessary code Change 3526391 by Benn.Gallagher Clothing optimization pass, mainly removing allocations and precaching some skin information. 10% Overall non-gamethread time reduction, gamethread sync completion task time halved. #jira UEAP-197 Change 3526454 by Benn.Gallagher CIS fix Change 3526919 by Chad.Garyet adding verifyddc flag to automated tests Change 3527006 by Lina.Halper Fix crash with blendspace sample value change - Matt also fixed undo transaction, queuing every move vs only final value - Matt fixed property changed to send interactive or not paramger, so that it doens't call object changed for every single move #jira: UE-46929 #rb: Matt.Kuhlenschmidt #code review: Jurre.DeBaare, Matt.Kuhlenschmidt Change 3528684 by Benn.Gallagher Static analysis fix, excessive statement left to signal reasons clothing assets would invalidate their caches triggered SA fail. Change 3528687 by Benn.Gallagher CIS Fix, method definition outside of declaration #if block. Change 3528890 by Ori.Cohen Fix false negative with PIE and verify DDC Change 3528899 by Martin.Wilson Smart name refactor part 1 - Changed FindUID api to return UID rather than pointer to UID, fix code in Orion that was caching a pointer to internal TMap allocated memory. #jira UEAP-264 Change 3530148 by Aaron.McLeran Making check for Supporting multiple audio devices only happen in editor builds. Change 3530519 by Jonathan.Poncelet Deprecated original angular physics functions in preference of a consistent API, using degrees vs. radians Functions are now suffixed "InDegrees" or "InRadians", to make it obvious which are used. The deprecated functions now call whichever degrees or radians counterpart is needed. FBodyInstance now works entirely in radians, to avoid unnecessary conversions. Automated tests have been added to verify behaviour. #jira UE-39757 Torque and angular velocity are inconsistent Change 3530943 by Benn.Gallagher Fixed clothing shader model automation test. #jira UE-47052 Change 3530993 by Thomas.Sarkanen Merging using Dev-Rendering_To_Dev-AnimPhys from CL 3512333. Converting integrates to edits. Oiriginal CL desc: Texture source data is not released anymore in WillNeverCacheCookedPlatformDataAgain(). This prevents an issue where texture referenced through CompositeTexture have no source data available. This doesn't affect peak memory so much as texture loaded with AllowAsyncLoading already release their temporary load data. #jira UE-47083 - Cook Odin fails with LogTexture: Error: Unable to get texture source mips because its bulk data was released. Change 3536312 by Chad.Garyet adding verifyddc into the automatedtestbuild xml Change 3537375 by James.Golding Merge Ocean GC crash fix (OCN-7666) from CL 3512485 #jira UE-47211 DONE! [CL 3537460 by Thomas Sarkanen in Main branch]
2017-07-14 06:36:47 -04:00
RenderTarget->InitCustomFormat(ClampedTargetSize.X, ClampedTargetSize.Y, InPixelFormat, bInForceLinearGamma);
RenderTargetPool.Add(RenderTarget);
}
checkf(RenderTarget != nullptr, TEXT("Unable to create or find valid render target"));
return RenderTarget;
}
FExportMaterialProxy* FMaterialBakingModule::CreateMaterialProxy(UMaterialInterface* Material, const FMaterialPropertyEx& Property)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FMaterialBakingModule::CreateMaterialProxy)
FExportMaterialProxy* Proxy = nullptr;
// Find all pooled material proxy matching this material
TArray<FMaterialPoolValue> Entries;
MaterialProxyPool.MultiFind(Material, Entries);
// Look for the matching property
for (FMaterialPoolValue& Entry : Entries)
{
if (Entry.Key == Property)
{
Proxy = Entry.Value;
break;
}
}
// Not found, create a new entry
if (Proxy == nullptr)
{
Proxy = new FExportMaterialProxy(Material, Property.Type, Property.CustomOutput.ToString(), false /* bInSynchronousCompilation */);
MaterialProxyPool.Add(Material, FMaterialPoolValue(Property, Proxy));
}
return Proxy;
}
void FMaterialBakingModule::ProcessEmissiveOutput(const FFloat16Color* Color16, int32 Color16Pitch, const FIntPoint& OutputSize, TArray<FColor>& OutputColor, float& EmissiveScale)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
TRACE_CPUPROFILER_EVENT_SCOPE(FMaterialBakingModule::ProcessEmissiveOutput)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const int32 NumThreads = [&]()
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
return FPlatformProcess::SupportsMultithreading() ? FPlatformMisc::NumberOfCores() : 1;
}();
float* MaxValue = new float[NumThreads];
FMemory::Memset(MaxValue, 0, NumThreads * sizeof(MaxValue[0]));
const int32 LinesPerThread = FMath::CeilToInt((float)OutputSize.Y / (float)NumThreads);
// Find maximum float value across texture
ParallelFor(NumThreads, [&Color16, LinesPerThread, MaxValue, OutputSize, Color16Pitch](int32 Index)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const int32 EndY = FMath::Min((Index + 1) * LinesPerThread, OutputSize.Y);
float& CurrentMaxValue = MaxValue[Index];
const FFloat16Color MagentaFloat16 = FFloat16Color(FLinearColor(1.0f, 0.0f, 1.0f));
for (int32 PixelY = Index * LinesPerThread; PixelY < EndY; ++PixelY)
{
const int32 SrcYOffset = PixelY * Color16Pitch;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
for (int32 PixelX = 0; PixelX < OutputSize.X; PixelX++)
{
const FFloat16Color& Pixel16 = Color16[PixelX + SrcYOffset];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// Find maximum channel value across texture
if (!(Pixel16 == MagentaFloat16))
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
CurrentMaxValue = FMath::Max(CurrentMaxValue, FMath::Max3(Pixel16.R.GetFloat(), Pixel16.G.GetFloat(), Pixel16.B.GetFloat()));
}
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
});
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const float GlobalMaxValue = [&MaxValue, NumThreads]
{
float TempValue = 0.0f;
for (int32 ThreadIndex = 0; ThreadIndex < NumThreads; ++ThreadIndex)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
TempValue = FMath::Max(TempValue, MaxValue[ThreadIndex]);
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
return TempValue;
}();
if (GlobalMaxValue <= 0.01f)
{
// Black emissive, drop it
}
// Now convert Float16 to Color using the scale
OutputColor.SetNumUninitialized(OutputSize.X * OutputSize.Y);
const float Scale = 255.0f / GlobalMaxValue;
ParallelFor(NumThreads, [&Color16, LinesPerThread, &OutputColor, OutputSize, Color16Pitch, Scale](int32 Index)
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
{
const FFloat16Color MagentaFloat16 = FFloat16Color(FLinearColor(1.0f, 0.0f, 1.0f));
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
const int32 EndY = FMath::Min((Index + 1) * LinesPerThread, OutputSize.Y);
for (int32 PixelY = Index * LinesPerThread; PixelY < EndY; ++PixelY)
{
const int32 SrcYOffset = PixelY * Color16Pitch;
const int32 DstYOffset = PixelY * OutputSize.X;
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
for (int32 PixelX = 0; PixelX < OutputSize.X; PixelX++)
{
const FFloat16Color& Pixel16 = Color16[PixelX + SrcYOffset];
FColor& Pixel8 = OutputColor[PixelX + DstYOffset];
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
if (Pixel16 == MagentaFloat16)
{
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
Pixel8.R = 255;
Pixel8.G = 0;
Pixel8.B = 255;
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
else
{
Pixel8.R = (uint8)FMath::RoundToInt(Pixel16.R.GetFloat() * Scale);
Pixel8.G = (uint8)FMath::RoundToInt(Pixel16.G.GetFloat() * Scale);
Pixel8.B = (uint8)FMath::RoundToInt(Pixel16.B.GetFloat() * Scale);
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
Pixel8.A = 255;
}
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
}
});
Edigrating 3 CLs to improve HLOD generation time (faster material baking & mesh merging) CL 10373564 by danny.couture Optimize Material Baking (Phase 1) - Introduce a mecanism to override the vertex/index buffer allocator used for dynamic meshes - Avoid GDynamicMesh non-ticked pools build-up by using our own vertex/index buffer pool during baking - Reduce reallocation and incurred soft page faults by reusing a single set of vertex/index buffers big enough for the biggest mesh - Preemptively detect if smearing would result in monochrome texture to avoid useless work - Shrink smeared monochrome textures during the baking process for huge memory savings - Move UV smearing in worker threads to avoid blocking the game thread - Required shaders are now built asynchronously - Add progress bar for material baking - 28m23 [at] 150 GB RAM -> 2m14s [at] 45 GB RAM for 6 channels [at] 512x512 when baking materials on ProxyLOD for DATASET-0008a with DDC empty #rb Jurre.deBaare, Sebastien.Lussier CL 10516258 by danny.couture Optimize Material Baking (Phase 2) - Implement pipelining with staging buffers to avoid GPU stalls when reading from render targets - Reuse the same prepared FMeshBatch instead of rebuilding it for each draw pass - Prepare the RenderItem in advance on other threads to reduce work on the game thread - Move the staging surface copy out of the render thread - Small vertex and index buffers are not reused to avoid dependency locks when mapping them - Fix bug in Canvas Flush_RenderThread found while running HLOD rebuild commandlet on Fortnite - Delete old and unused MaterialBakingModule.h from public files - 4m44s -> 59s for baking 6 channel [at] 1024x1024 when baking materials on ProxyLOD for DATASET-0008a with shaders already compiled - Time spent in Material Baking when rebuilding all HLOD on Apollo_POI_Large_HLOD (Phase 1 + 2 combined) - 10m18s -> 2m36s for a first rebuild all in editor with no shaders in DDC (cold) - 1m23s -> 20s for a second rebuild all in editor (warm) #rb Jeremy.Moore, Sebastien.Lussier CL 11135986 by sebastien.lussier Optimized mesh merging * Added DeletePolygons() & DeleteTriangles methods to FMeshDescription which rely on TSets<> instead of performing costly TArray::AddUnique() calls() * Parallelized UV generation and avoided duplicate processing of the same mesh+lod pairs * Optimized FMeshDescriptionOperations::GenerateUniqueUVsForStaticMesh() * Goes from 100s to 10s in my test case #rb danny.couture, jeanfrancois.dube, richard.talbotwatkin #ROBOMERGE-OWNER: sebastien.lussier #ROBOMERGE-AUTHOR: sebastien.lussier #ROBOMERGE-SOURCE: CL 11206337 via CL 11206341 via CL 11206346 #ROBOMERGE-BOT: (v643-11205221) [CL 11206493 by sebastien lussier in Main branch]
2020-02-03 11:08:35 -05:00
// This scale will be used in the proxy material to get the original range of emissive values outside of 0-1
EmissiveScale = GlobalMaxValue;
}
void FMaterialBakingModule::OnObjectModified(UObject* Object)
{
TRACE_CPUPROFILER_EVENT_SCOPE(FMaterialBakingModule::OnObjectModified)
if (CVarUseMaterialProxyCaching.GetValueOnAnyThread())
{
UMaterialInterface* MaterialToInvalidate = Cast<UMaterialInterface>(Object);
if (!MaterialToInvalidate)
{
// Check to see if the object is a material editor instance constant and if so, retrieve its source instance
UMaterialEditorInstanceConstant* EditorInstance = Cast<UMaterialEditorInstanceConstant>(Object);
if (EditorInstance && EditorInstance->SourceInstance)
{
MaterialToInvalidate = EditorInstance->SourceInstance;
}
}
if (MaterialToInvalidate)
{
// Search our proxy pool for materials or material instances that refer to MaterialToInvalidate
for (auto It = MaterialProxyPool.CreateIterator(); It; ++It)
{
TWeakObjectPtr<UMaterialInterface> PoolMaterialPtr = It.Key();
// Remove stale entries from the pool
bool bMustDelete = PoolMaterialPtr.IsValid();
if (!bMustDelete)
{
bMustDelete = PoolMaterialPtr == MaterialToInvalidate;
}
// No match - Test the MaterialInstance hierarchy
if (!bMustDelete)
{
UMaterialInstance* MaterialInstance = Cast<UMaterialInstance>(PoolMaterialPtr);
while (!bMustDelete && MaterialInstance && MaterialInstance->Parent != nullptr)
{
bMustDelete = MaterialInstance->Parent == MaterialToInvalidate;
MaterialInstance = Cast<UMaterialInstance>(MaterialInstance->Parent);
}
}
// We have a match, remove the entry from our pool
if (bMustDelete)
{
DeleteCachedMaterialProxy(It.Value().Value);
It.RemoveCurrent();
}
}
}
}
}
void FMaterialBakingModule::OnPreGarbageCollect()
{
CleanupMaterialProxies();
}
#undef LOCTEXT_NAMESPACE //"MaterialBakingModule"