2019-12-26 15:32:37 -05:00
// Copyright Epic Games, Inc. All Rights Reserved.
2014-03-14 14:13:41 -04:00
# include "ShaderPreprocessor.h"
Copying //UE4/Dev-Build to //UE4/Dev-Main (Source: //UE4/Dev-Build @ 3209340)
#lockdown Nick.Penwarden
#rb none
==========================
MAJOR FEATURES + CHANGES
==========================
Change 3209340 on 2016/11/23 by Ben.Marsh
Convert UE4 codebase to an "include what you use" model - where every header just includes the dependencies it needs, rather than every source file including large monolithic headers like Engine.h and UnrealEd.h.
Measured full rebuild times around 2x faster using XGE on Windows, and improvements of 25% or more for incremental builds and full rebuilds on most other platforms.
* Every header now includes everything it needs to compile.
* There's a CoreMinimal.h header that gets you a set of ubiquitous types from Core (eg. FString, FName, TArray, FVector, etc...). Most headers now include this first.
* There's a CoreTypes.h header that sets up primitive UE4 types and build macros (int32, PLATFORM_WIN64, etc...). All headers in Core include this first, as does CoreMinimal.h.
* Every .cpp file includes its matching .h file first.
* This helps validate that each header is including everything it needs to compile.
* No engine code includes a monolithic header such as Engine.h or UnrealEd.h any more.
* You will get a warning if you try to include one of these from the engine. They still exist for compatibility with game projects and do not produce warnings when included there.
* There have only been minor changes to our internal games down to accommodate these changes. The intent is for this to be as seamless as possible.
* No engine code explicitly includes a precompiled header any more.
* We still use PCHs, but they're force-included on the compiler command line by UnrealBuildTool instead. This lets us tune what they contain without breaking any existing include dependencies.
* PCHs are generated by a tool to get a statistical amount of coverage for the source files using it, and I've seeded the new shared PCHs to contain any header included by > 15% of source files.
Tool used to generate this transform is at Engine\Source\Programs\IncludeTool.
[CL 3209342 by Ben Marsh in Main branch]
2016-11-23 15:48:37 -05:00
# include "Misc/FileHelper.h"
# include "Misc/Paths.h"
# include "Misc/ScopeLock.h"
# include "Modules/ModuleManager.h"
2014-11-21 12:21:29 -05:00
# include "PreprocessorPrivate.h"
2023-08-03 13:40:32 -04:00
# include "ShaderCompilerDefinitions.h"
2022-12-01 09:27:13 -05:00
# include "stb_preprocess/preprocessor.h"
# include "stb_preprocess/stb_alloc.h"
# include "stb_preprocess/stb_ds.h"
2023-09-26 05:29:05 -04:00
static TAutoConsoleVariable < int32 > CVarShaderCompilerThreadLocalPreprocessBuffer (
TEXT ( " r.ShaderCompiler.ThreadLocalPreprocessBuffer " ) ,
1280 * 1024 ,
TEXT ( " Amount to preallocate for preprocess output per worker thread, to save reallocation overhead in the preprocessor. " ) ,
ECVF_Default
) ;
2022-12-01 09:27:13 -05:00
namespace
{
const FString PlatformHeader = TEXT ( " /Engine/Public/Platform.ush " ) ;
2023-05-03 10:17:48 -04:00
void LogMandatoryHeaderError ( const FShaderCompilerInput & Input , FShaderPreprocessOutput & Output )
2022-12-01 09:27:13 -05:00
{
2023-05-03 10:17:48 -04:00
FString Path = Input . VirtualSourceFilePath ;
FString Message = FString : : Printf ( TEXT ( " Error: Shader is required to include %s " ) , * PlatformHeader ) ;
Output . LogError ( MoveTemp ( Path ) , MoveTemp ( Message ) , 1 ) ;
2022-12-01 09:27:13 -05:00
}
}
2014-03-14 14:13:41 -04:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
// Utility function to wrap FShaderPreprocessDependencies hash table lookups -- used with FComparePathInSource / FCompareResultPath below
template < typename CompareType , typename . . . ArgsType >
FORCEINLINE uint32 DependencyHashTableFind ( const FShaderPreprocessDependencies & Dependencies , const CompareType & Compare , uint32 KeyHash , ArgsType . . . Args )
{
const FHashTable & HashTable = Compare . GetHashTable ( Dependencies ) ;
for ( uint32 Index = HashTable . First ( KeyHash ) ; HashTable . IsValid ( Index ) ; Index = HashTable . Next ( Index ) )
{
if ( Compare . Equals ( Dependencies . Dependencies [ Index ] , Args . . . ) )
{
return Index ;
}
}
return INDEX_NONE ;
}
struct FComparePathInSource
{
static FORCEINLINE const FHashTable & GetHashTable ( const FShaderPreprocessDependencies & Dependencies )
{
return Dependencies . BySource ;
}
static FORCEINLINE bool Equals ( const FShaderPreprocessDependency & Dependency , const ANSICHAR * PathInSource , uint32 PathLen , FXxHash64 PathHash , const ANSICHAR * ParentPathAnsi )
{
return Dependency . EqualsPathInSource ( PathInSource , PathLen , PathHash , ParentPathAnsi ) ;
}
} ;
struct FCompareResultPath
{
static FORCEINLINE const FHashTable & GetHashTable ( const FShaderPreprocessDependencies & Dependencies )
{
return Dependencies . ByResult ;
}
static FORCEINLINE bool Equals ( const FShaderPreprocessDependency & Dependency , const FString & ResultPath , uint32 ResultPathHash )
{
return Dependency . EqualsResultPath ( ResultPath , ResultPathHash ) ;
}
} ;
2023-07-22 06:35:10 -04:00
PRAGMA_DISABLE_DEPRECATION_WARNINGS // FShaderCompilerDefinitions will be made internal in the future, marked deprecated until then
2023-03-31 09:55:19 -04:00
static void AddStbDefine ( stb_arena * MacroArena , macro_definition * * & StbDefines , const TCHAR * Name , const TCHAR * Value ) ;
2023-06-21 03:26:02 -04:00
static void AddStbDefines ( stb_arena * MacroArena , macro_definition * * & StbDefines , const FShaderCompilerDefinitions & Defines ) ;
2023-03-31 09:55:19 -04:00
class FShaderPreprocessorUtilities
2019-09-14 09:45:25 -04:00
{
2023-03-31 09:55:19 -04:00
public :
2023-05-19 14:50:25 -04:00
static void PopulateDefines ( const FShaderCompilerEnvironment & Environment , const FShaderCompilerDefinitions & AdditionalDefines , stb_arena * MacroArena , macro_definition * * & OutDefines )
2023-03-31 09:55:19 -04:00
{
2023-09-26 05:29:05 -04:00
arrsetcap ( OutDefines , Environment . Definitions - > Num ( ) + AdditionalDefines . Num ( ) ) ;
2023-07-22 06:35:10 -04:00
AddStbDefines ( MacroArena , OutDefines , * Environment . Definitions ) ;
2023-06-21 03:26:02 -04:00
AddStbDefines ( MacroArena , OutDefines , AdditionalDefines ) ;
2023-03-31 09:55:19 -04:00
}
} ;
2023-07-22 06:35:10 -04:00
PRAGMA_ENABLE_DEPRECATION_WARNINGS
2019-09-14 09:45:25 -04:00
2018-12-18 12:15:17 -05:00
//////////////////////////////////////////////////////////////////////////
2022-12-01 09:27:13 -05:00
extern " C "
{
// adapter functions for STB memory allocation
void * StbMalloc ( size_t Size )
{
void * Alloc = FMemory : : Malloc ( Size ) ;
return Alloc ;
}
void * StbRealloc ( void * Pointer , size_t Size )
{
void * Alloc = FMemory : : Realloc ( Pointer , Size ) ;
return Alloc ;
}
void StbFree ( void * Pointer )
{
return FMemory : : Free ( Pointer ) ;
}
ANSICHAR * StbStrDup ( const ANSICHAR * InString )
{
if ( InString )
{
int32 Len = FCStringAnsi : : Strlen ( InString ) + 1 ;
ANSICHAR * Result = reinterpret_cast < ANSICHAR * > ( StbMalloc ( Len ) ) ;
return FCStringAnsi : : Strncpy ( Result , InString , Len ) ;
}
return nullptr ;
}
}
2023-09-26 05:29:05 -04:00
struct FStbLoadedInclude
{
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
const ANSICHAR * FileName = nullptr ; // Points to ResultPath in FShaderPreprocessDependenciesShared, or LocalFileName
2023-09-26 05:29:05 -04:00
const ANSICHAR * Data = nullptr ; // Points to SharedData, LocalData, or data from FShaderCompilerEnvironment
size_t DataLength = 0 ;
FShaderSharedAnsiStringPtr SharedData ;
TArray < ANSICHAR > LocalData ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
TArray < ANSICHAR > LocalFileName ;
2023-09-26 05:29:05 -04:00
} ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
static bool HasDependencyFromResultPath ( const FShaderPreprocessDependencies & Dependencies , const FString & ResultPath , const FStbLoadedInclude * CacheShared ) ;
2022-12-01 09:27:13 -05:00
struct FStbPreprocessContext
{
const FShaderCompilerInput & ShaderInput ;
2023-05-03 10:17:48 -04:00
const FShaderCompilerEnvironment & Environment ;
2023-09-26 05:29:05 -04:00
TMap < FString , FStbLoadedInclude > LoadedIncludesCache ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
// Shared includes from PreprocessDependencies, VertexFactoryDependencies, and Environment.IncludeVirtualPathToSharedContentsMap
// are stored in this array instead of the map, indexed sequentially. Avoids hash table overhead of "LoadedIncludesCache".
TArray < FStbLoadedInclude > LoadedIncludesCacheShared ;
FShaderPreprocessDependenciesShared PreprocessDependencies ;
FShaderPreprocessDependenciesShared VertexFactoryDependencies ;
FHashTable SharedContentsHash ; // Case insensitive hash table pointing at LoadedIncludesCacheShared with entries from IncludeVirtualPathToSharedContentsMap
uint32 SharedIncludeIndex = INDEX_NONE ; // Index in LoadedIncludesCacheShared propagated from StbResolveInclude to StbLoadFile
uint32 VertexFactoryOffset = INDEX_NONE ; // Vertex factory dependencies start at this offset in LoadedIncludesCacheShared
uint32 VirtualSharedContentsOffset = INDEX_NONE ; // Virtual shared contents start at this offset in LoadedIncludesCacheShared
2022-12-01 09:27:13 -05:00
2023-10-25 16:01:11 -04:00
// TEXT macro processing state
struct FTextEntry
{
uint32 Index ;
uint32 Hash ;
uint32 Offset ;
bool bIsAssert ;
FString SourceText ;
FString ConvertedText ;
FString EncodedText ;
} ;
TArray < FTextEntry > TextEntries ;
TArray < ANSICHAR > TextMacroSubstituted ;
uint32 TextGlobalCount = 0 ;
uint32 TextAssertCount = 0 ;
uint32 TextPrintfCount = 0 ;
bool bInAssert = false ;
bool HasIncludedHeader ( const FString & Header )
{
// Checks if a given header has been included. Note that the header may be encountered through one of our FShaderPreprocessDependencies structures,
// so if those are valid, we need to check the corresponding elements in the LoadedIncludesCacheShared array to see if the path was encountered.
return
( PreprocessDependencies . IsValid ( ) & & HasDependencyFromResultPath ( * PreprocessDependencies , Header , & LoadedIncludesCacheShared [ 0 ] ) ) | |
( VertexFactoryDependencies . IsValid ( ) & & HasDependencyFromResultPath ( * VertexFactoryDependencies , Header , & LoadedIncludesCacheShared [ VertexFactoryOffset ] ) ) | |
LoadedIncludesCache . Contains ( Header ) ;
}
2022-12-01 09:27:13 -05:00
bool HasIncludedMandatoryHeaders ( )
{
2023-10-25 16:01:11 -04:00
// Check if the mandatory PlatformHeader has been included ("/Engine/Public/Platform.ush")
return HasIncludedHeader ( PlatformHeader ) ;
2022-12-01 09:27:13 -05:00
}
2023-10-25 16:01:11 -04:00
2024-08-06 13:59:01 -04:00
void ShaderPrintGenerate ( char * & PreprocessFile , TArray < FShaderDiagnosticData > * OutDiagnosticDatas ) ;
2022-12-01 09:27:13 -05:00
} ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
static void StbLoadedIncludeTrimPaddingChecked ( FStbLoadedInclude * ContentsCached )
{
// Need 15 characters beyond null terminator, so an unaligned SSE read at the null terminator can safely read 15 extra unused characters
// without going out of memory bounds. ShaderConvertAndStripComments adds this padding in the form of extra trailing zeroes. Make sure
// these zeroes are there.
static const char SixteenZeroes [ 16 ] = { 0 } ;
checkf ( ContentsCached - > DataLength > = 16 & & memcmp ( & ContentsCached - > Data [ ContentsCached - > DataLength - 16 ] , SixteenZeroes , 16 ) = = 0 ,
TEXT ( " Shader preprocessor ANSI files must include 15 bytes of zero padding past null terminator " ) ) ;
ContentsCached - > DataLength - = 15 ;
}
static FORCEINLINE void StbLoadedIncludeTrimPadding ( FStbLoadedInclude * ContentsCached )
{
// For includes cached at startup, don't bother with the assert, since we know they came from a "safe" source that always adds the padding.
ContentsCached - > DataLength - = 15 ;
}
2022-12-01 09:27:13 -05:00
static const ANSICHAR * StbLoadFile ( const ANSICHAR * Filename , void * RawContext , size_t * OutLength )
{
FStbPreprocessContext & Context = * reinterpret_cast < FStbPreprocessContext * > ( RawContext ) ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
// Check if we found this file in our preprocess dependencies (fast path)
if ( Context . SharedIncludeIndex ! = INDEX_NONE )
{
FStbLoadedInclude * ContentsCached = & Context . LoadedIncludesCacheShared [ Context . SharedIncludeIndex ] ;
// Reset this after we consume it (although StbResolveInclude should clear it as well before StbLoadFile is called again)
Context . SharedIncludeIndex = INDEX_NONE ;
* OutLength = ContentsCached - > DataLength ;
return ContentsCached - > Data ;
}
2022-12-01 09:27:13 -05:00
FString FilenameConverted = StringCast < TCHAR > ( Filename ) . Get ( ) ;
2023-09-26 05:29:05 -04:00
uint32 FilenameConvertedHash = GetTypeHash ( FilenameConverted ) ;
2023-05-31 11:08:31 -04:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
FStbLoadedInclude & ContentsCached = Context . LoadedIncludesCache . FindOrAddByHash ( FilenameConvertedHash , FilenameConverted ) ;
if ( ! ContentsCached . Data )
{
2023-09-26 05:29:05 -04:00
const FString * InMemorySource = Context . Environment . IncludeVirtualPathToContentsMap . FindByHash ( FilenameConvertedHash , FilenameConverted ) ;
2023-06-01 09:03:52 -04:00
2023-09-26 05:29:05 -04:00
if ( InMemorySource )
2023-06-01 09:03:52 -04:00
{
2023-09-26 05:29:05 -04:00
check ( ! InMemorySource - > IsEmpty ( ) ) ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
ShaderConvertAndStripComments ( * InMemorySource , ContentsCached . LocalData ) ;
2023-09-26 05:29:05 -04:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
ContentsCached . Data = ContentsCached . LocalData . GetData ( ) ;
ContentsCached . DataLength = ContentsCached . LocalData . Num ( ) ;
2023-09-26 05:29:05 -04:00
}
else
{
const FThreadSafeSharedAnsiStringPtr * InMemorySourceAnsi = Context . Environment . IncludeVirtualPathToSharedContentsMap . FindByHash ( FilenameConvertedHash , FilenameConverted ) ;
if ( InMemorySourceAnsi )
{
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
ContentsCached . Data = InMemorySourceAnsi - > Get ( ) - > GetData ( ) ;
ContentsCached . DataLength = InMemorySourceAnsi - > Get ( ) - > Num ( ) ;
2023-09-26 05:29:05 -04:00
}
else
{
CheckShaderHashCacheInclude ( FilenameConverted , Context . ShaderInput . Target . GetPlatform ( ) , Context . ShaderInput . ShaderFormat . ToString ( ) ) ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
LoadShaderSourceFile ( * FilenameConverted , Context . ShaderInput . Target . GetPlatform ( ) , nullptr , nullptr , nullptr , & ContentsCached . SharedData ) ;
2023-09-26 05:29:05 -04:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
ContentsCached . Data = ContentsCached . SharedData - > GetData ( ) ;
ContentsCached . DataLength = ContentsCached . SharedData - > Num ( ) ;
2023-09-26 05:29:05 -04:00
}
2023-06-01 09:03:52 -04:00
}
Shader Preprocessor: Early bloom filter and SSE optimizations. Overall 27.9% improvement to low level preprocessor, or 10.4% to ConditionalPreprocessShader as a whole.
* Moved identifier copy and macro bloom filter from maybe_expand_macro to test into copy_to_action_point / copy_to_action_point_macro_expansion. 13.1% of improvement.
* SSE implementation of scan_to_directive, 10x faster, 5.2%
* SSE implementation of identifier copy, 3x faster, 4.5%
* SSE ShaderConvertAndStripComments, 4x faster, 3.6%
* Fast inline string equality comparison, 4x faster, 1.5%
To make SSE implementations "safe" without needing special cases near the end of a buffer, it's necessary to ensure padding is present in the relevant buffers, anything that goes through a preprocess_string call. This includes the string arena allocator, temporary stbds arrays that hold strings, and file buffers passed in. The latter all pass through ShaderConvertAndStripComments, where we can add padding. (ShaderConvertAndStripComments itself has special cases for end of buffer). Code related to original 1 and 2 character macro filter removed, since I can't see a reason to enable it over the bloom filter.
I also attempted SSE optimization of copy_to_action_point and copy_line_without_comments, but improvement wasn't big enough to be worth the complexity (around 2% for the former, but massive code complexity, 0.5% for the latter). That's pretty much everything SSE friendly that's over 1% on a profile, although I think copy_argument can be made a lot faster, not primarily through SSE.
#jira UE-197212
#rnx
#rb yuriy.odonnell jason.nadro
[CL 28834324 by jason hoerner in ue5-main branch]
2023-10-17 05:18:57 -04:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
StbLoadedIncludeTrimPaddingChecked ( & ContentsCached ) ;
2023-06-01 09:03:52 -04:00
}
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
* OutLength = ContentsCached . DataLength ;
return ContentsCached . Data ;
2022-12-01 09:27:13 -05:00
}
static void StbFreeFile ( const ANSICHAR * Filename , const ANSICHAR * Contents , void * RawContext )
{
2023-06-01 09:03:52 -04:00
// No-op; stripped/converted shader source will be freed from the cache in FStbPreprocessContext when it's destructed;
// we want to keep it around until that point in case includes are loaded multiple times from different source locations
2022-12-01 09:27:13 -05:00
}
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
static uint32 ResolveDependencyFromPathInSource ( const FShaderPreprocessDependencies & Dependencies , const ANSICHAR * PathInSource , uint32 PathLen , FXxHash64 PathHash , const ANSICHAR * ParentPathAnsi , FStbLoadedInclude * CacheShared )
{
uint32 HashIndex = DependencyHashTableFind ( Dependencies , FComparePathInSource ( ) , GetTypeHash ( PathHash ) , PathInSource , PathLen , PathHash , ParentPathAnsi ) ;
if ( HashIndex ! = INDEX_NONE )
{
// Choose the first unique instance of this result path
HashIndex = Dependencies . Dependencies [ HashIndex ] . ResultPathUniqueIndex ;
const FShaderPreprocessDependency & Dependency = Dependencies . Dependencies [ HashIndex ] ;
FStbLoadedInclude * ContentsCached = & CacheShared [ HashIndex ] ;
if ( ! ContentsCached - > FileName )
{
ContentsCached - > FileName = Dependency . ResultPath . GetData ( ) ;
ContentsCached - > Data = Dependency . StrippedSource - > GetData ( ) ;
ContentsCached - > DataLength = Dependency . StrippedSource - > Num ( ) ;
StbLoadedIncludeTrimPadding ( ContentsCached ) ;
}
}
return HashIndex ;
}
static uint32 ResolveDependencyFromResultPath ( const FShaderPreprocessDependencies & Dependencies , const FString & ResultPath , uint32 ResultPathHash , FStbLoadedInclude * CacheShared )
{
// ResultPathHash is passed in twice -- once for "Find" function, and again as an argument to the "FCompareResultPath::Equals" function
uint32 HashIndex = DependencyHashTableFind ( Dependencies , FCompareResultPath ( ) , ResultPathHash , ResultPath , ResultPathHash ) ;
if ( HashIndex ! = INDEX_NONE )
{
const FShaderPreprocessDependency & Dependency = Dependencies . Dependencies [ HashIndex ] ;
FStbLoadedInclude * ContentsCached = & CacheShared [ HashIndex ] ;
if ( ! ContentsCached - > FileName )
{
ContentsCached - > FileName = Dependency . ResultPath . GetData ( ) ;
ContentsCached - > Data = Dependency . StrippedSource - > GetData ( ) ;
ContentsCached - > DataLength = Dependency . StrippedSource - > Num ( ) ;
StbLoadedIncludeTrimPadding ( ContentsCached ) ;
}
}
return HashIndex ;
}
// Returns true if the path in question was encountered during preprocessing, if the path is one of the paths referenced by that dependency structure.
static bool HasDependencyFromResultPath ( const FShaderPreprocessDependencies & Dependencies , const FString & ResultPath , const FStbLoadedInclude * CacheShared )
{
uint32 ResultPathHash = GetTypeHash ( ResultPath ) ;
uint32 HashIndex = DependencyHashTableFind ( Dependencies , FCompareResultPath ( ) , ResultPathHash , ResultPath , ResultPathHash ) ;
// Entry will have FileName set if it was encountered
return HashIndex ! = INDEX_NONE & & CacheShared [ HashIndex ] . FileName ! = nullptr ;
}
static void CopyStringToAnsiCharArray ( const TCHAR * Text , int32 TextLen , TArray < ANSICHAR > & Out )
{
Out . SetNumUninitialized ( TextLen + 1 ) ;
ANSICHAR * OutData = Out . GetData ( ) ;
for ( int32 CharIndex = 0 ; CharIndex < TextLen ; CharIndex + + , OutData + + , Text + + )
{
* OutData = ( ANSICHAR ) * Text ;
}
* OutData = 0 ;
}
2023-12-14 16:35:09 -05:00
// Adds 16 bytes of zeroes at end, to allow SSE reads at the end of the buffer without reading past the end of the heap allocation
static void CopyStringToAnsiCharArraySSEPadded ( const TCHAR * Text , int32 TextLen , TArray < ANSICHAR > & Out )
{
constexpr int32 SSEPadding = 16 ;
Out . SetNumUninitialized ( TextLen + SSEPadding ) ;
ANSICHAR * OutData = Out . GetData ( ) ;
for ( int32 CharIndex = 0 ; CharIndex < TextLen ; CharIndex + + , OutData + + , Text + + )
{
* OutData = ( ANSICHAR ) * Text ;
}
FMemory : : Memset ( OutData , 0 , SSEPadding * sizeof ( ANSICHAR ) ) ;
}
2022-12-01 09:27:13 -05:00
static const ANSICHAR * StbResolveInclude ( const ANSICHAR * PathInSource , uint32 PathLen , const ANSICHAR * ParentPathAnsi , void * RawContext )
{
FStbPreprocessContext & Context = * reinterpret_cast < FStbPreprocessContext * > ( RawContext ) ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
FXxHash64 PathHash = FXxHash64 : : HashBuffer ( PathInSource , PathLen ) ;
// Try main shader preprocess dependencies
Context . SharedIncludeIndex = INDEX_NONE ;
if ( Context . PreprocessDependencies . IsValid ( ) )
{
uint32 DependencyIndex = ResolveDependencyFromPathInSource ( * Context . PreprocessDependencies , PathInSource , PathLen , PathHash , ParentPathAnsi , & Context . LoadedIncludesCacheShared [ 0 ] ) ;
if ( DependencyIndex ! = INDEX_NONE )
{
// Propagate the found index to StbLoadFile
uint32 SharedIncludeIndex = DependencyIndex ;
Context . SharedIncludeIndex = SharedIncludeIndex ;
return Context . LoadedIncludesCacheShared [ SharedIncludeIndex ] . FileName ;
}
}
// Try vertex factory preprocess dependencies
if ( Context . VertexFactoryDependencies . IsValid ( ) )
{
uint32 DependencyIndex = ResolveDependencyFromPathInSource ( * Context . VertexFactoryDependencies , PathInSource , PathLen , PathHash , ParentPathAnsi , & Context . LoadedIncludesCacheShared [ Context . VertexFactoryOffset ] ) ;
if ( DependencyIndex ! = INDEX_NONE )
{
// Propagate the found index to StbLoadFile
uint32 SharedIncludeIndex = DependencyIndex + Context . VertexFactoryOffset ;
Context . SharedIncludeIndex = SharedIncludeIndex ;
return Context . LoadedIncludesCacheShared [ SharedIncludeIndex ] . FileName ;
}
}
// Try SharedContentsHash
FAnsiStringView RawPathInSourceView ( PathInSource , PathLen ) ;
for ( uint32 HashIndex = Context . SharedContentsHash . First ( GetTypeHash ( RawPathInSourceView ) ) ; Context . SharedContentsHash . IsValid ( HashIndex ) ; HashIndex = Context . SharedContentsHash . Next ( HashIndex ) )
{
if ( RawPathInSourceView = = Context . LoadedIncludesCacheShared [ HashIndex ] . FileName )
{
// Propagate the found index to StbLoadFile
Context . SharedIncludeIndex = HashIndex ;
return Context . LoadedIncludesCacheShared [ HashIndex ] . FileName ;
}
}
// Slow path... Platform specific files and procedurally generated files (/Engine/Generated/Material.ush) -- typically 5% of files.
2024-06-24 09:25:50 -04:00
FString PathModified = FString : : ConstructFromPtrSize ( PathInSource , PathLen ) ;
2022-12-01 09:27:13 -05:00
if ( ! PathModified . StartsWith ( TEXT ( " / " ) ) ) // if path doesn't start with / it's relative, if so append the parent's folder and collapse any relative dirs
{
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
FString ParentFolder ( ParentPathAnsi ) ;
ParentFolder = FPaths : : GetPath ( ParentFolder ) ;
2022-12-01 09:27:13 -05:00
PathModified = ParentFolder / PathModified ;
FPaths : : CollapseRelativeDirectories ( PathModified ) ;
}
2023-05-31 11:08:31 -04:00
FixupShaderFilePath ( PathModified , Context . ShaderInput . Target . GetPlatform ( ) , & Context . ShaderInput . ShaderPlatformName ) ;
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
uint32 PathModifiedHash = GetTypeHash ( PathModified ) ;
2022-12-01 09:27:13 -05:00
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
// We need to check our preprocess dependencies again with the result path, so we get the canonical capitalization for it from the dependencies, if available.
// This case can be reached for platform includes (which aren't added to the bulk dependencies).
if ( Context . PreprocessDependencies . IsValid ( ) )
2022-12-01 09:27:13 -05:00
{
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
uint32 DependencyIndex = ResolveDependencyFromResultPath ( * Context . PreprocessDependencies , PathModified , PathModifiedHash , & Context . LoadedIncludesCacheShared [ 0 ] ) ;
if ( DependencyIndex ! = INDEX_NONE )
{
// Propagate the found index to StbLoadFile
uint32 SharedIncludeIndex = DependencyIndex ;
Context . SharedIncludeIndex = SharedIncludeIndex ;
return Context . LoadedIncludesCacheShared [ SharedIncludeIndex ] . FileName ;
}
}
// Try vertex factory preprocess dependencies
if ( Context . VertexFactoryDependencies . IsValid ( ) )
{
uint32 DependencyIndex = ResolveDependencyFromResultPath ( * Context . VertexFactoryDependencies , PathModified , PathModifiedHash , & Context . LoadedIncludesCacheShared [ Context . VertexFactoryOffset ] ) ;
if ( DependencyIndex ! = INDEX_NONE )
{
// Propagate the found index to StbLoadFile
uint32 SharedIncludeIndex = DependencyIndex + Context . VertexFactoryOffset ;
Context . SharedIncludeIndex = SharedIncludeIndex ;
return Context . LoadedIncludesCacheShared [ SharedIncludeIndex ] . FileName ;
}
}
// If we reach here, the include will be added to the map. Check if it's already in the map.
FStbLoadedInclude * ContentsCached = Context . LoadedIncludesCache . FindByHash ( PathModifiedHash , PathModified ) ;
if ( ContentsCached )
{
// We return the same previously resolved path so preprocessor will handle #pragma once with files included with inconsistent casing correctly
return ContentsCached - > FileName ;
2022-12-01 09:27:13 -05:00
}
bool bExists =
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
Context . Environment . IncludeVirtualPathToContentsMap . ContainsByHash ( PathModifiedHash , PathModified ) | |
2023-06-01 09:03:52 -04:00
// LoadShaderSourceFile will load the file if it exists, but then cache it internally, so the next call in StbLoadFile will be cheap
2023-05-31 11:08:31 -04:00
// (and hence this is not wasteful, just performs the loading earlier)
2023-06-01 09:03:52 -04:00
LoadShaderSourceFile ( * PathModified , Context . ShaderInput . Target . GetPlatform ( ) , nullptr , nullptr ) ;
2022-12-01 09:27:13 -05:00
if ( bExists )
{
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
ContentsCached = & Context . LoadedIncludesCache . AddByHash ( PathModifiedHash , PathModified ) ;
// Initialize the ANSI file name in the map entry. The file itself will be loaded in StbLoadFile, but we need the ANSI string
// as the return value from this function.
CopyStringToAnsiCharArray ( & PathModified [ 0 ] , PathModified . Len ( ) , ContentsCached - > LocalFileName ) ;
ContentsCached - > FileName = ContentsCached - > LocalFileName . GetData ( ) ;
return ContentsCached - > FileName ;
2022-12-01 09:27:13 -05:00
}
return nullptr ;
}
2023-10-25 16:01:11 -04:00
static const char * ShaderPrintTextIdentifier = " TEXT " ;
static const char * ShaderPrintAssertIdentifier = " UEReportAssertWithPayload " ;
static const char * StbCustomMacroBegin ( const char * OriginalText , void * RawContext )
{
FStbPreprocessContext & Context = * reinterpret_cast < FStbPreprocessContext * > ( RawContext ) ;
// Check for assert macro
if ( FCStringAnsi : : Strstr ( OriginalText , ShaderPrintAssertIdentifier ) = = OriginalText )
{
// We only need to track that we're in an assert, we don't need to do any substitution
Context . bInAssert = true ;
return OriginalText ;
}
// TEXT macro
check ( FCStringAnsi : : Strstr ( OriginalText , ShaderPrintTextIdentifier ) = = OriginalText ) ;
const char * TextChar = OriginalText ;
while ( * TextChar ! = ' ( ' )
{
TextChar + + ;
}
TextChar + + ;
while ( * TextChar ! = ' ) ' & & * TextChar ! = ' \" ' )
{
TextChar + + ;
}
// If no quoted text, that's a parse error
if ( * TextChar ! = ' \" ' )
{
return nullptr ;
}
// We found a string, add an entry
const uint32 EntryIndex = Context . TextEntries . Num ( ) ;
FStbPreprocessContext : : FTextEntry & Entry = Context . TextEntries . AddDefaulted_GetRef ( ) ;
Entry . Index = EntryIndex ;
Entry . Offset = Context . TextGlobalCount ;
Entry . bIsAssert = Context . bInAssert ;
if ( Entry . bIsAssert )
{
+ + Context . TextAssertCount ;
}
else
{
+ + Context . TextPrintfCount ;
}
// Parse the string, handling escaped characters. SourceText contains the raw text, ConvertedText removes escape back slashes,
// and EncodedText is an array of integer numeric values as ASCII.
TextChar + + ;
const char * TextStart = TextChar ;
int32 CharCount = 0 ;
for ( ; * TextChar ! = ' \" ' ; TextChar + + )
{
if ( * TextChar = = ' \\ ' )
{
TextChar + + ;
}
CharCount + + ;
}
Entry . SourceText = FString ( FAnsiStringView ( TextStart , TextChar - TextStart ) ) ;
Entry . ConvertedText . GetCharArray ( ) . SetNumUninitialized ( CharCount + 1 ) ;
Entry . EncodedText . Reserve ( CharCount * 4 ) ; // ~3 digits per character + a comma
TCHAR * ConvertedTextData = Entry . ConvertedText . GetCharArray ( ) . GetData ( ) ;
int32 CharIndex = 0 ;
for ( TextChar = TextStart ; * TextChar ! = ' \" ' ; TextChar + + , CharIndex + + )
{
if ( * TextChar = = ' \\ ' )
{
TextChar + + ;
}
ConvertedTextData [ CharIndex ] = * TextChar ;
const char C = * TextChar ;
Entry . EncodedText . AppendInt ( uint8 ( C ) ) ;
if ( CharIndex + 1 ! = CharCount )
{
Entry . EncodedText + = ' , ' ;
}
}
check ( CharIndex = = CharCount ) ;
ConvertedTextData [ CharIndex ] = 0 ;
Entry . Hash = CityHash32 ( ( const char * ) Entry . SourceText . GetCharArray ( ) . GetData ( ) , sizeof ( FString : : ElementType ) * Entry . SourceText . Len ( ) ) ;
Context . TextGlobalCount + = Entry . ConvertedText . Len ( ) ;
2023-12-14 16:35:09 -05:00
// Generate substitution string -- need SSE padding on any text handled by the preprocessor
2023-10-25 16:01:11 -04:00
if ( Entry . bIsAssert )
{
const FString HashString = FString : : Printf ( TEXT ( " %u " ) , Entry . Hash ) ;
2023-12-14 16:35:09 -05:00
CopyStringToAnsiCharArraySSEPadded ( * HashString , HashString . Len ( ) , Context . TextMacroSubstituted ) ;
2023-10-25 16:01:11 -04:00
}
else
{
const FString InitHashBegin ( TEXT ( " InitShaderPrintText( " ) ) ;
const FString InitHashEnd ( TEXT ( " ) " )) ;
const FString HashText = InitHashBegin + FString : : FromInt ( EntryIndex ) + InitHashEnd ;
2023-12-14 16:35:09 -05:00
CopyStringToAnsiCharArraySSEPadded ( * HashText , HashText . Len ( ) , Context . TextMacroSubstituted ) ;
2023-10-25 16:01:11 -04:00
}
return Context . TextMacroSubstituted . GetData ( ) ;
}
static void StbCustomMacroEnd ( const char * OriginalText , void * RawContext , const char * SubstitutionText )
{
FStbPreprocessContext & Context = * reinterpret_cast < FStbPreprocessContext * > ( RawContext ) ;
if ( FCStringAnsi : : Strstr ( OriginalText , ShaderPrintAssertIdentifier ) = = OriginalText )
{
Context . bInAssert = false ;
}
}
2024-08-06 13:59:01 -04:00
void FStbPreprocessContext : : ShaderPrintGenerate ( char * & PreprocessedFile , TArray < FShaderDiagnosticData > * OutDiagnosticDatas )
2023-10-25 16:01:11 -04:00
{
// Check if ShaderPrintCommon.ush was included, to decide whether to add the shader print generated code
static FString ShaderPrintHeader ( " /Engine/Private/ShaderPrintCommon.ush " ) ;
if ( ! HasIncludedHeader ( ShaderPrintHeader ) )
{
return ;
}
// 1. Write a global struct containing all the entries
// 2. Write the function for fetching character for a given entry index
const uint32 EntryCount = TextEntries . Num ( ) ;
FString TextChars ;
if ( TextPrintfCount > 0 & & EntryCount > 0 & & TextGlobalCount > 0 )
{
// 1. Encoded character for each text entry within a single global char array
TextChars = FString : : Printf ( TEXT ( " \n \n static const uint TEXT_CHARS[%d] = { \n " ) , TextGlobalCount ) ;
for ( FTextEntry & Entry : TextEntries )
{
TextChars + = FString : : Printf ( TEXT ( " \t %s%s // %d: \" %s \" \n " ) , * Entry . EncodedText , Entry . Index < EntryCount - 1 ? TEXT ( " , " ) : TEXT ( " " ) , Entry . Index , * Entry . SourceText ) ;
}
TextChars + = TEXT ( " }; \n \n " ) ;
// 2. Offset within the global array
TextChars + = FString : : Printf ( TEXT ( " static const uint TEXT_OFFSETS[%d] = { \n " ) , EntryCount + 1 ) ;
for ( FTextEntry & Entry : TextEntries )
{
TextChars + = FString : : Printf ( TEXT ( " \t %d, // %d: \" %s \" \n " ) , Entry . Offset , Entry . Index , * Entry . SourceText ) ;
}
TextChars + = FString : : Printf ( TEXT ( " \t %d // end \n " ) , TextGlobalCount ) ;
TextChars + = TEXT ( " }; \n \n " ) ;
// 3. Entry hashes
TextChars + = TEXT ( " // Hashes are computed using the CityHash32 function \n " ) ;
TextChars + = FString : : Printf ( TEXT ( " static const uint TEXT_HASHES[%d] = { \n " ) , EntryCount ) ;
for ( FTextEntry & Entry : TextEntries )
{
TextChars + = FString : : Printf ( TEXT ( " \t 0x%x%s // %d: \" %s \" \n " ) , Entry . Hash , Entry . Index < EntryCount - 1 ? TEXT ( " , " ) : TEXT ( " " ) , Entry . Index , * Entry . SourceText ) ;
}
TextChars + = TEXT ( " }; \n \n " ) ;
TextChars + = TEXT ( " uint ShaderPrintGetChar(uint InIndex) { return TEXT_CHARS[InIndex]; } \n " ) ;
TextChars + = TEXT ( " uint ShaderPrintGetOffset(FShaderPrintText InText) { return TEXT_OFFSETS[InText.Index]; } \n " ) ;
TextChars + = TEXT ( " uint ShaderPrintGetHash(FShaderPrintText InText) { return TEXT_HASHES[InText.Index]; } \n " ) ;
}
else
{
TextChars + = TEXT ( " uint ShaderPrintGetChar(uint Index) { return 0; } \n " ) ;
TextChars + = TEXT ( " uint ShaderPrintGetOffset(FShaderPrintText InText) { return 0; } \n " ) ;
TextChars + = TEXT ( " uint ShaderPrintGetHash(FShaderPrintText InText) { return 0; } \n " ) ;
}
// 3. Insert global struct data + print function
TArray < ANSICHAR > TextCharsAnsi ;
CopyStringToAnsiCharArray ( * TextChars , TextChars . Len ( ) , TextCharsAnsi ) ;
2024-08-06 13:59:01 -04:00
PreprocessedFile = preprocessor_file_append ( PreprocessedFile , TextCharsAnsi . GetData ( ) , TextCharsAnsi . Num ( ) - 1 ) ;
2023-10-25 16:01:11 -04:00
// 4. Insert assert data into shader compilation output for runtime CPU lookup
if ( OutDiagnosticDatas & & TextAssertCount > 0 )
{
OutDiagnosticDatas - > Reserve ( OutDiagnosticDatas - > Num ( ) + TextAssertCount ) ;
for ( const FTextEntry & E : TextEntries )
{
if ( E . bIsAssert )
{
FShaderDiagnosticData & Data = OutDiagnosticDatas - > AddDefaulted_GetRef ( ) ;
Data . Hash = E . Hash ;
Data . Message = E . SourceText ;
}
}
}
}
2022-12-01 09:27:13 -05:00
class FShaderPreprocessorModule : public IModuleInterface
{
virtual void StartupModule ( ) override
{
2023-10-25 16:01:11 -04:00
init_preprocessor ( & StbLoadFile , & StbFreeFile , & StbResolveInclude , & StbCustomMacroBegin , & StbCustomMacroEnd ) ;
2022-12-01 09:27:13 -05:00
// disable the "directive not at start of line" error; this allows a few things:
// 1. #define'ing #pragma messages - consumed by the preprocessor (to handle UESHADERMETADATA hackery)
// 2. #define'ing other #pragmas (those not processed explicitly by the preprocessor are copied into the preprocessed code
// 3. handling the HLSL infinity constant (1.#INF); STB preprocessor interprets any use of # as a directive which is not the case here
pp_set_warning_mode ( PP_RESULT_directive_not_at_start_of_line , PP_RESULT_MODE_no_warning ) ;
}
} ;
IMPLEMENT_MODULE ( FShaderPreprocessorModule , ShaderPreprocessor ) ;
static void AddStbDefine ( stb_arena * MacroArena , macro_definition * * & StbDefines , const TCHAR * Name , const TCHAR * Value )
{
2023-10-06 09:14:35 -04:00
TAnsiStringBuilder < 256 > Define ;
// Define format: "%s %s" (Name Value)
Define . Append ( Name ) ;
Define . AppendChar ( ' ' ) ;
Define . Append ( Value ) ;
arrput ( StbDefines , pp_define ( MacroArena , Define . ToString ( ) ) ) ;
2022-12-01 09:27:13 -05:00
}
2023-07-22 06:35:10 -04:00
PRAGMA_DISABLE_DEPRECATION_WARNINGS // FShaderCompilerDefinitions will be made internal in the future, marked deprecated until then
2023-06-21 03:26:02 -04:00
static void AddStbDefines ( stb_arena * MacroArena , macro_definition * * & StbDefines , const FShaderCompilerDefinitions & Defines )
2022-12-01 09:27:13 -05:00
{
2023-06-21 03:26:02 -04:00
for ( FShaderCompilerDefinitions : : FConstIterator It ( Defines ) ; It ; + + It )
2022-12-01 09:27:13 -05:00
{
2023-06-21 03:26:02 -04:00
AddStbDefine ( MacroArena , StbDefines , It . Key ( ) , It . Value ( ) ) ;
2022-12-01 09:27:13 -05:00
}
}
2023-12-07 08:55:41 -05:00
PRAGMA_ENABLE_DEPRECATION_WARNINGS
/**
* Preprocess a shader .
* @ param OutPreprocessedShader - Upon return contains the preprocessed source code .
* @ param ShaderOutput - ShaderOutput to which errors can be added .
* @ param ShaderInput - The shader compiler input .
* @ param AdditionalDefines - Additional defines with which to preprocess the shader .
* @ param DefinesPolicy - Whether to add shader definitions as comments .
* @ returns true if the shader is preprocessed without error .
*/
bool PreprocessShader (
2023-05-03 10:17:48 -04:00
FShaderPreprocessOutput & Output ,
const FShaderCompilerInput & Input ,
const FShaderCompilerEnvironment & Environment ,
2023-12-07 08:55:41 -05:00
PRAGMA_DISABLE_DEPRECATION_WARNINGS // FShaderCompilerDefinitions will be made internal in the future, marked deprecated until then
2022-12-01 09:27:13 -05:00
const FShaderCompilerDefinitions & AdditionalDefines
2023-12-07 08:55:41 -05:00
PRAGMA_ENABLE_DEPRECATION_WARNINGS
2022-12-01 09:27:13 -05:00
)
{
2023-12-07 08:55:41 -05:00
TRACE_CPUPROFILER_EVENT_SCOPE ( PreprocessShader ) ;
2022-12-01 09:27:13 -05:00
stb_arena MacroArena = { 0 } ;
macro_definition * * StbDefines = nullptr ;
2023-12-07 08:55:41 -05:00
PRAGMA_DISABLE_DEPRECATION_WARNINGS
2023-05-19 14:50:25 -04:00
FShaderPreprocessorUtilities : : PopulateDefines ( Environment , AdditionalDefines , & MacroArena , StbDefines ) ;
2022-12-01 09:27:13 -05:00
2023-11-07 18:01:37 -05:00
// The substitution text generated by custom macros gets run through the preprocessor afterwards, but in some cases we want to
// run the arguments through the preprocessor before as well. The TEXT macro needs this to handle things like TEXT(__FILE__),
// where the __FILE__ macro needs to be expanded before the custom macro handler is called, so we pass "1" to enable running
// the preprocessor first. By contrast, for shader asserts, we must NOT run the preprocessor on the arguments first, because
// the assert macro sets a state flag which modifies behavior of TEXT macros inside the assert. Asserts store their TEXT tokens
// outside the shader for printing in code when an assert is triggered, while ShaderPrint stores TEXT in the shader itself.
arrput ( StbDefines , pp_define_custom_macro ( & MacroArena , ShaderPrintTextIdentifier , 1 ) ) ;
arrput ( StbDefines , pp_define_custom_macro ( & MacroArena , ShaderPrintAssertIdentifier , 0 ) ) ;
2023-10-25 16:01:11 -04:00
2023-07-22 06:35:10 -04:00
PRAGMA_ENABLE_DEPRECATION_WARNINGS
2023-05-03 10:17:48 -04:00
FStbPreprocessContext Context { Input , Environment } ;
2022-12-01 09:27:13 -05:00
2023-05-03 10:17:48 -04:00
auto InFilename = StringCast < ANSICHAR > ( * Input . VirtualSourceFilePath ) ;
2022-12-01 09:27:13 -05:00
int NumDiagnostics = 0 ;
pp_diagnostic * Diagnostics = nullptr ;
2023-09-26 05:29:05 -04:00
static const int32 ThreadLocalPreprocessBufferSize = CVarShaderCompilerThreadLocalPreprocessBuffer . GetValueOnAnyThread ( ) ;
static thread_local char * ThreadLocalPreprocessBuffer = nullptr ;
// Sanity check the buffer size so it won't OOM if a bad value is entered.
int32 ClampedPreprocessBufferSize = ThreadLocalPreprocessBufferSize ? FMath : : Clamp ( ThreadLocalPreprocessBufferSize , 64 * 1024 , 4 * 1024 * 1024 ) : 0 ;
if ( ClampedPreprocessBufferSize & & ! ThreadLocalPreprocessBuffer )
{
ThreadLocalPreprocessBuffer = new char [ ClampedPreprocessBufferSize ] ;
}
Shader Preprocessor optimization: Support for loading include preprocess dependencies in bulk, and drastically reduced string processing, memory allocation, and map overhead. Roughly 7x faster, saving 12% in low level preprocessor, or 5% overall.
Flattened include dependencies are generated during include scanning at startup, basically for free (perf difference was well below noise). Bulk dependencies reduce round trips to the shader cache (which require mutex locks), and are indexed by the ANSI text exactly as it appears in the include directive in the source files, allowing a faster case sensitive hash, and avoiding the need for expensive path string operations. Anything found in a bulk dependency is stored in an array that parallels the dependency array, rather than a map. Includes stored in IncludeVirtualPathToSharedContentsMap also use an array.
Noting that our string classes (FString) are already case insensitive by default, some unnecessary case conversions were removed. The separate map of "seen" shaders was also removed, as we can just use the LoadedIncludesCache map for the same purpose. Where possible, existing ANSI strings are referenced, avoiding dynamic allocation.
#jira UE-197213
#rnx
#rb yuriy.odonnell jason.nadro
[CL 29095249 by jason hoerner in ue5-main branch]
2023-10-25 13:58:15 -04:00
if ( GetShaderPreprocessDependencies ( * Input . VirtualSourceFilePath , Context . ShaderInput . Target . GetPlatform ( ) , Context . PreprocessDependencies ) )
{
// First item in dependencies is always root file, so set that index
Context . SharedIncludeIndex = 0 ;
}
// Grab vertex factory dependencies if present
const FString * VertexFactoryInclude = Context . Environment . IncludeVirtualPathToContentsMap . Find ( TEXT ( " /Engine/Generated/VertexFactory.ush " ) ) ;
if ( VertexFactoryInclude )
{
int32 VertexFactoryNameStart ;
int32 VertexFactoryNameEnd ;
if ( VertexFactoryInclude - > FindChar ( TEXT ( ' \" ' ) , VertexFactoryNameStart ) & & VertexFactoryInclude - > FindLastChar ( TEXT ( ' \" ' ) , VertexFactoryNameEnd ) )
{
// Should have at least one character in our filename
check ( VertexFactoryNameEnd > VertexFactoryNameStart + 1 ) ;
FString VertexFactoryFilename ( FStringView ( & ( * VertexFactoryInclude ) [ VertexFactoryNameStart + 1 ] , VertexFactoryNameEnd - ( VertexFactoryNameStart + 1 ) ) ) ;
GetShaderPreprocessDependencies ( * VertexFactoryFilename , Context . ShaderInput . Target . GetPlatform ( ) , Context . VertexFactoryDependencies ) ;
}
}
// Initialize array of loaded includes associated with PreprocessDependencies, VertexFactoryDependencies, and Environment.IncludeVirtualPathToSharedContentsMap
Context . VertexFactoryOffset = Context . PreprocessDependencies . IsValid ( ) ? Context . PreprocessDependencies - > Dependencies . Num ( ) : 0 ;
Context . VirtualSharedContentsOffset = Context . VertexFactoryOffset + ( Context . VertexFactoryDependencies . IsValid ( ) ? Context . VertexFactoryDependencies - > Dependencies . Num ( ) : 0 ) ;
Context . LoadedIncludesCacheShared . AddDefaulted ( Context . VirtualSharedContentsOffset + Context . Environment . IncludeVirtualPathToSharedContentsMap . Num ( ) ) ;
// Initialize root file dependency, if present
if ( Context . PreprocessDependencies . IsValid ( ) )
{
const FShaderPreprocessDependency & Dependency = Context . PreprocessDependencies - > Dependencies [ 0 ] ;
FStbLoadedInclude * ContentsCached = & Context . LoadedIncludesCacheShared [ 0 ] ;
ContentsCached - > FileName = InFilename . Get ( ) ;
ContentsCached - > Data = Dependency . StrippedSource - > GetData ( ) ;
ContentsCached - > DataLength = Dependency . StrippedSource - > Num ( ) ;
StbLoadedIncludeTrimPadding ( ContentsCached ) ;
}
// Initialize loaded includes for IncludeVirtualPathToSharedContentsMap, and generate a hash table
uint32 SharedContentsMapIndex = Context . VirtualSharedContentsOffset ;
for ( const auto & SharedContentsMapIt : Context . Environment . IncludeVirtualPathToSharedContentsMap )
{
FStbLoadedInclude & Include = Context . LoadedIncludesCacheShared [ SharedContentsMapIndex ] ;
// Copy name
CopyStringToAnsiCharArray ( & SharedContentsMapIt . Key [ 0 ] , SharedContentsMapIt . Key . Len ( ) , Include . LocalFileName ) ;
Include . FileName = Include . LocalFileName . GetData ( ) ;
// Set data
Include . Data = SharedContentsMapIt . Value - > GetData ( ) ;
Include . DataLength = SharedContentsMapIt . Value - > Num ( ) ;
StbLoadedIncludeTrimPadding ( & Include ) ;
// Add to hash table -- GetTypeHash on string view is case insensitive
Context . SharedContentsHash . Add ( GetTypeHash ( FAnsiStringView ( Include . LocalFileName . GetData ( ) , Include . LocalFileName . Num ( ) - 1 ) ) , SharedContentsMapIndex ) ;
SharedContentsMapIndex + + ;
}
2023-10-06 09:14:35 -04:00
char * OutPreprocessedAnsi = preprocess_file ( InFilename . Get ( ) , & Context , StbDefines , arrlen ( StbDefines ) , & Diagnostics , & NumDiagnostics , ThreadLocalPreprocessBuffer , ClampedPreprocessBufferSize ) ;
2023-09-26 05:29:05 -04:00
2022-12-01 09:27:13 -05:00
bool HasError = false ;
if ( Diagnostics ! = nullptr )
{
for ( int DiagIndex = 0 ; DiagIndex < NumDiagnostics ; + + DiagIndex )
{
pp_diagnostic * Diagnostic = & Diagnostics [ DiagIndex ] ;
HasError | = ( Diagnostic - > error_level = = PP_RESULT_MODE_error ) ;
FString Message = Diagnostic - > message ;
2024-04-03 12:12:03 -04:00
if ( Diagnostic - > error_level = = PP_RESULT_MODE_error | | Diagnostic - > error_level = = PP_RESULT_MODE_warning )
2022-12-01 09:27:13 -05:00
{
2023-05-03 10:17:48 -04:00
FString Filename = Diagnostic - > where - > filename ;
Output . LogError ( MoveTemp ( Filename ) , MoveTemp ( Message ) , Diagnostic - > where - > line_number ) ;
2022-12-01 09:27:13 -05:00
}
else
{
EMessageType Type = FilterPreprocessorError ( Message ) ;
if ( Type = = EMessageType : : ShaderMetaData )
{
FString Directive ;
ExtractDirective ( Directive , Message ) ;
2023-05-03 10:17:48 -04:00
Output . AddDirective ( MoveTemp ( Directive ) ) ;
2022-12-01 09:27:13 -05:00
}
}
}
}
2023-05-03 10:17:48 -04:00
if ( ! HasError )
{
2023-10-25 16:01:11 -04:00
// Append ShaderPrint generated code at the end of the shader if necessary
Context . ShaderPrintGenerate ( OutPreprocessedAnsi , & Output . EditDiagnosticDatas ( ) ) ;
2023-12-15 15:28:27 -05:00
// "preprocessor_file_size" includes null terminator, so subtract one when initializing the FShaderSource (which automatically null terminates)
Output . EditSource ( ) . Set ( { OutPreprocessedAnsi , preprocessor_file_size ( OutPreprocessedAnsi ) - 1 } ) ;
2023-05-03 10:17:48 -04:00
}
2022-12-01 09:27:13 -05:00
if ( ! HasError & & ! Context . HasIncludedMandatoryHeaders ( ) )
{
2023-05-03 10:17:48 -04:00
LogMandatoryHeaderError ( Input , Output ) ;
2022-12-01 09:27:13 -05:00
HasError = true ;
}
preprocessor_file_free ( OutPreprocessedAnsi , Diagnostics ) ;
stbds_arrfree ( StbDefines ) ;
stb_arena_free ( & MacroArena ) ;
return ! HasError ;
}
2023-05-03 10:17:48 -04:00
bool PreprocessShader (
FString & OutPreprocessedShader ,
FShaderCompilerOutput & ShaderOutput ,
const FShaderCompilerInput & ShaderInput ,
2023-07-22 06:35:10 -04:00
PRAGMA_DISABLE_DEPRECATION_WARNINGS // FShaderCompilerDefinitions will be made internal in the future, marked deprecated until then
2023-05-03 10:17:48 -04:00
const FShaderCompilerDefinitions & AdditionalDefines ,
2023-12-06 13:32:32 -05:00
EDumpShaderDefines DefinesPolicy
2023-07-22 06:35:10 -04:00
PRAGMA_ENABLE_DEPRECATION_WARNINGS
2023-12-06 13:32:32 -05:00
)
2023-05-03 10:17:48 -04:00
{
FShaderPreprocessOutput Output ;
// when called via this overload, environment is assumed to be already merged in input struct
const FShaderCompilerEnvironment & Environment = ShaderInput . Environment ;
2023-12-06 13:32:32 -05:00
bool bSucceeded = PreprocessShader ( Output , ShaderInput , Environment , AdditionalDefines ) ;
2023-05-03 10:17:48 -04:00
2023-12-15 15:28:27 -05:00
OutPreprocessedShader = FString ( Output . GetSourceViewWide ( ) ) ;
2023-05-03 10:17:48 -04:00
for ( FShaderCompilerError & Error : Output . EditErrors ( ) )
{
ShaderOutput . Errors . Add ( MoveTemp ( Error ) ) ;
}
return bSucceeded ;
2024-08-06 13:59:01 -04:00
}