Files
UnrealEngineUWP/Engine/Source/Runtime/RenderCore/Private/ShaderCodeArchive.cpp

2451 lines
92 KiB
C++
Raw Normal View History

// Copyright Epic Games, Inc. All Rights Reserved.
#include "ShaderCodeArchive.h"
#include "Async/ParallelFor.h"
#include "Compression/OodleDataCompression.h"
#include "DataDrivenShaderPlatformInfo.h"
#include "Misc/Compression.h"
#include "Misc/FileHelper.h"
#include "Misc/MemStack.h"
#include "Misc/ScopeRWLock.h"
#include "Policies/PrettyJsonPrintPolicy.h"
#include "RHI.h"
#include "RenderUtils.h"
#include "RHICommandList.h"
#include "Serialization/JsonSerializer.h"
#include "Serialization/MemoryReader.h"
#include "Serialization/MemoryWriter.h"
#include "ShaderCodeLibrary.h"
#include "ShaderCore.h"
#include "Stats/Stats.h"
#include "Serialization/StaticMemoryReader.h"
#if WITH_EDITOR
#include "Misc/Optional.h"
#include "Misc/StringBuilder.h"
#include "Serialization/CompactBinarySerialization.h"
#include "Serialization/CompactBinaryWriter.h"
#include "Templates/Greater.h"
#endif
#if UE_SCA_VISUALIZE_SHADER_USAGE
#include "IImageWrapper.h"
#include "IImageWrapperModule.h"
#include "Modules/ModuleManager.h"
#endif // UE_SCA_VISUALIZE_SHADER_USAGE
int32 GShaderCodeLibraryAsyncLoadingPriority = int32(AIOP_Normal);
static FAutoConsoleVariableRef CVarShaderCodeLibraryAsyncLoadingPriority(
TEXT("r.ShaderCodeLibrary.DefaultAsyncIOPriority"),
GShaderCodeLibraryAsyncLoadingPriority,
TEXT(""),
ECVF_Default
);
int32 GShaderCodeLibraryAsyncLoadingAllowDontCache = 0;
static FAutoConsoleVariableRef CVarShaderCodeLibraryAsyncLoadingAllowDontCache(
TEXT("r.ShaderCodeLibrary.AsyncIOAllowDontCache"),
GShaderCodeLibraryAsyncLoadingAllowDontCache,
TEXT(""),
ECVF_Default
);
int32 GShaderCodeLibraryVisualizeShaderUsage = 0;
static FAutoConsoleVariableRef CVarShaderCodeLibraryVisualizeShaderUsage(
TEXT("r.ShaderCodeLibrary.VisualizeShaderUsage"),
GShaderCodeLibraryVisualizeShaderUsage,
TEXT("If 1, a bitmap with the used shaders (for each shader library chunk) will be saved at the exit. Works in standalone games only."),
ECVF_RenderThreadSafe | ECVF_ReadOnly
);
int32 GShaderCodeLibraryMaxShaderGroupSize = 1024 * 1024; // decompressing 1MB of shaders takes about 0.1ms on PC (TR 3970x, Oodle Mermaid6).
static FAutoConsoleVariableRef CVarShaderCodeLibraryMaxShaderGroupSize(
TEXT("r.ShaderCodeLibrary.MaxShaderGroupSize"),
GShaderCodeLibraryMaxShaderGroupSize,
TEXT("Max (uncompressed) size of a group of shaders to be compressed/decompressed together.")
TEXT("If a group exceeds it, it will be evenly split into subgroups which strive to not exceed it. However, if a shader group is down to one shader and still exceeds the limit, the limit will be ignored."),
ECVF_RenderThreadSafe | ECVF_ReadOnly
);
float GShaderCodeLibraryMaxShaderPreloadWaitTime = 0.001f;
static FAutoConsoleVariableRef CVarShaderCodeLibraryMaxShaderPreloadWaitTime(
TEXT("r.ShaderCodeLibrary.MaxShaderPreloadWaitTime"),
GShaderCodeLibraryMaxShaderPreloadWaitTime,
TEXT("If we wait on shader preloads longer than this amount of seconds, we will log it as a warning."),
ECVF_RenderThreadSafe | ECVF_ReadOnly
);
int32 GPreloadShaderPriority = 2;
static FAutoConsoleVariableRef CVarPreloadShaderPriority(
TEXT("r.PreloadShaderPriority"),
GPreloadShaderPriority,
TEXT("Change PreloadShaderGroup I/O priority.\n")
TEXT("0-Min\n")
TEXT("1-Low\n")
TEXT("2-Medium (Default)\n")
TEXT("3-High\n")
TEXT("4-Max\n"),
ECVF_Default
);
int32 GetShaderCodeArchivePriority()
{
switch (GPreloadShaderPriority)
{
case 4:
return IoDispatcherPriority_Max;
case 3:
return IoDispatcherPriority_High;
case 1:
return IoDispatcherPriority_Low;
case 0:
return IoDispatcherPriority_Min;
default:
return IoDispatcherPriority_Medium;
}
}
#if RHI_RAYTRACING // this function is only needed to check if we need to avoid excluding raytracing shaders
namespace
{
bool IsCreateShadersOnLoadEnabled()
{
static IConsoleVariable* CVar = IConsoleManager::Get().FindConsoleVariable(TEXT("r.CreateShadersOnLoad"));
return CVar && CVar->GetInt() != 0;
}
}
#endif // RHI_RAYTRACING
int32 FSerializedShaderArchive::FindShaderMapWithKey(const FSHAHash& Hash, uint32 Key) const
{
for (uint32 Index = ShaderMapHashTable.First(Key); ShaderMapHashTable.IsValid(Index); Index = ShaderMapHashTable.Next(Index))
{
if (ShaderMapHashes[Index] == Hash)
{
return Index;
}
}
return INDEX_NONE;
}
int32 FSerializedShaderArchive::FindShaderMap(const FSHAHash& Hash) const
{
const uint32 Key = GetTypeHash(Hash);
return FindShaderMapWithKey(Hash, Key);
}
#if !USE_MMAPPED_SHADERARCHIVE
bool FSerializedShaderArchive::FindOrAddShaderMap(const FSHAHash& Hash, int32& OutIndex, const FShaderMapAssetPaths* AssociatedAssets)
{
const uint32 Key = GetTypeHash(Hash);
int32 Index = FindShaderMapWithKey(Hash, Key);
bool bAdded = Index == INDEX_NONE;
if (bAdded)
{
Index = ShaderMapHashes.Add(Hash);
ShaderMapEntries.AddDefaulted();
check(ShaderMapEntries.Num() == ShaderMapHashes.Num());
ShaderMapHashTable.Add(Key, Index);
}
#if WITH_EDITOR
if (AssociatedAssets && AssociatedAssets->Num())
{
ShaderCodeToAssets.FindOrAdd(Hash).Append(*AssociatedAssets);
}
#endif
OutIndex = Index;
return bAdded;
}
#endif
int32 FSerializedShaderArchive::FindShaderWithKey(const FSHAHash& Hash, uint32 Key) const
{
for (uint32 Index = ShaderHashTable.First(Key); ShaderHashTable.IsValid(Index); Index = ShaderHashTable.Next(Index))
{
if (ShaderHashes[Index] == Hash)
{
return Index;
}
}
return INDEX_NONE;
}
int32 FSerializedShaderArchive::FindShader(const FSHAHash& Hash) const
{
const uint32 Key = GetTypeHash(Hash);
return FindShaderWithKey(Hash, Key);
}
#if !USE_MMAPPED_SHADERARCHIVE
bool FSerializedShaderArchive::FindOrAddShader(const FSHAHash& Hash, int32& OutIndex)
{
const uint32 Key = GetTypeHash(Hash);
OutIndex = FindShaderWithKey(Hash, Key);
if (OutIndex == INDEX_NONE)
{
OutIndex = ShaderHashes.Add(Hash);
ShaderEntries.AddDefaulted();
check(ShaderEntries.Num() == ShaderHashes.Num());
ShaderHashTable.Add(Key, OutIndex);
return true;
}
return false;
}
void FSerializedShaderArchive::RemoveLastAddedShader()
{
check(!ShaderEntries.IsEmpty() && ShaderEntries.Num() == ShaderHashes.Num());
int32 ShaderIndex = ShaderEntries.Num() - 1;
const uint32 Key = GetTypeHash(ShaderHashes[ShaderIndex]);
ShaderHashTable.Remove(Key, ShaderIndex);
ShaderHashes.RemoveAt(ShaderIndex);
ShaderEntries.RemoveAt(ShaderIndex);
}
#if WITH_EDITOR
FCbWriter& operator<<(FCbWriter& Writer, const FSerializedShaderArchive& Archive)
{
TArray64<uint8> SerializedBytes;
{
FMemoryWriter64 SerializeArchive(SerializedBytes);
const_cast<FSerializedShaderArchive&>(Archive).Serialize(SerializeArchive);
}
Writer.BeginObject();
{
Writer << "SerializedBytes";
Writer.AddBinary(FMemoryView(SerializedBytes.GetData(), SerializedBytes.Num()));
SerializedBytes.Empty();
// Serialize is meant for runtime fields only, so copy the editor-only fields separately
Writer.BeginArray("ShaderCodeToAssets");
for (const TPair<FSHAHash, FShaderMapAssetPaths>& Pair : Archive.ShaderCodeToAssets)
{
Writer << Pair.Key;
Writer.BeginArray();
for (FName AssetName : Pair.Value)
{
Writer << AssetName;
}
Writer.EndArray();
}
Writer.EndArray();
}
Writer.EndObject();
return Writer;
}
bool LoadFromCompactBinary(FCbFieldView Field, FSerializedShaderArchive& OutArchive)
{
FMemoryView SerializedBytes = Field["SerializedBytes"].AsBinaryView();
{
FMemoryReaderView SerializeArchive(SerializedBytes);
OutArchive.Serialize(SerializeArchive);
if (SerializeArchive.IsError())
{
OutArchive = FSerializedShaderArchive();
return false;
}
}
FCbFieldView ShaderCodeToAssetsField = Field["ShaderCodeToAssets"];
// Map size is array size divided by two because pairs are written as successive elements
int32 NumShaderCodeToAssets = ShaderCodeToAssetsField.AsArrayView().Num()/2;
bool bOk = !ShaderCodeToAssetsField.HasError();
OutArchive.ShaderCodeToAssets.Empty(NumShaderCodeToAssets);
FCbFieldViewIterator It = ShaderCodeToAssetsField.CreateViewIterator();
while (It)
{
FSHAHash ShaderMapHash;
if (!LoadFromCompactBinary(*It++, ShaderMapHash))
{
bOk = false;
continue;
}
FShaderMapAssetPaths& Paths = OutArchive.ShaderCodeToAssets.FindOrAdd(ShaderMapHash);
FCbFieldView AssetNameArrayField = *It++;
Paths.Reserve((*It).AsArrayView().Num());
bOk = (!AssetNameArrayField.HasError()) & bOk;
for (FCbFieldView AssetNameField : AssetNameArrayField)
{
FName AssetName;
bOk = LoadFromCompactBinary(AssetNameField, AssetName) & bOk;
Paths.Add(AssetName);
}
}
return bOk;
}
#endif //WITH_EDITOR
#endif //!USE_MMAPPED_SHADERARCHIVE
#if UE_SCA_VISUALIZE_SHADER_USAGE
void FShaderUsageVisualizer::Initialize(const int32 InNumShaders)
{
FScopeLock Lock(&VisualizeLock);
NumShaders = InNumShaders;
}
void FShaderUsageVisualizer::SaveShaderUsageBitmap(const FString& Name, EShaderPlatform ShaderPlatform)
{
if (GShaderCodeLibraryVisualizeShaderUsage)
{
if (NumShaders)
{
if (IImageWrapperModule* ImageWrapperModule = FModuleManager::Get().GetModulePtr<IImageWrapperModule>(TEXT("ImageWrapper")))
{
if (TSharedPtr<IImageWrapper> PNGImageWrapper = ImageWrapperModule->CreateImageWrapper(EImageFormat::PNG))
{
UE_LOG(LogShaderLibrary, Log, TEXT("Creating shader usage bitmap for archive %s (NumShaders: %d, preloaded %d, created %d)"), *Name, NumShaders, PreloadedShaders.Num(), CreatedShaders.Num());
// find a value close to sqrt(NumShaders)
int32 ImageDimension = static_cast<int32>(FMath::Sqrt(static_cast<float>(NumShaders))) + 1;
TArray<FColor> ShaderUsageBitmap;
ShaderUsageBitmap.Reserve(ImageDimension * ImageDimension);
// map legend:
FColor UnusedShaderColor(128, 128, 128); // unused shaders - this is the majority of the bitmap content
FColor PreloadedShaderColor(192, 192, 192); // preloaded shaders - including those that weren't explicitly so, but they happened to be grouped with shaders we needed
FColor ExplicitlyPreloadedShaderColor(0, 255, 0); // shaders we explicitly wanted to preload - they can become the majority under certain circumstances. Pure white can blend with some viewer's background
FColor PreloadedAndDecompressedShaderColor(0, 0, 255); // shaders that we wanted to preload and that got decompressed (as part of the creating them or their neighbor in group)
FColor NotPreloadedButDecompressedShaderColor(255, 0, 0); // shaders that we decompressed just because they were grouped together with others. We did not want to preload them at all.
FColor CreatedShaderColor(255, 255, 255); // created shaders - in practice, always few and far between. Blue is more noticeable on a largely bright background than magenta
for (int32 Idx = 0; Idx < NumShaders; ++Idx)
{
ShaderUsageBitmap.Add(UnusedShaderColor);
}
// the rest can be zero/transparent
ShaderUsageBitmap.AddZeroed(ImageDimension * ImageDimension - NumShaders);
check(ShaderUsageBitmap.Num() == ImageDimension * ImageDimension);
{
// in case this ever gets called runtime
FScopeLock Lock(&VisualizeLock);
// fill preloaded ones first
for (int32 ShaderIdx : PreloadedShaders)
{
ShaderUsageBitmap[ShaderIdx] = PreloadedShaderColor;
}
// explicitly preloaded shaders
for (int32 ShaderIdx : ExplicitlyPreloadedShaders)
{
ShaderUsageBitmap[ShaderIdx] = ExplicitlyPreloadedShaderColor;
}
// fill decompressed ones, but mark up those that we didn't ask to preload differently
for (int32 ShaderIdx : DecompressedShaders)
{
bool bShaderWasRequestedToBePreloaded = ExplicitlyPreloadedShaders.Contains(ShaderIdx);
ShaderUsageBitmap[ShaderIdx] = bShaderWasRequestedToBePreloaded ? PreloadedAndDecompressedShaderColor : NotPreloadedButDecompressedShaderColor;
}
for (int32 ShaderIdx : CreatedShaders)
{
ShaderUsageBitmap[ShaderIdx] = CreatedShaderColor;
}
}
bool bSet = PNGImageWrapper->SetRaw(ShaderUsageBitmap.GetData(), ShaderUsageBitmap.Num() * sizeof(FColor),
ImageDimension, ImageDimension, ERGBFormat::BGRA, 8);
if (bSet)
{
TArray64<uint8> CompressedData = PNGImageWrapper->GetCompressed(100);
const FString Filename = FString::Printf(TEXT("%s_%s_RuntimeShaderUsage_%s.png"), *Name, *LexToString(ShaderPlatform), *FDateTime::Now().ToString());
const FString SaveDir = FPaths::Combine(FPaths::ProjectSavedDir(), TEXT("Profiling"));
const FString FilePath = FPaths::Combine(SaveDir, Filename);
if (!FFileHelper::SaveArrayToFile(CompressedData, *FilePath))
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Couldn't write shader usage bitmap %s"), *FilePath);
}
else
{
UE_LOG(LogShaderLibrary, Log, TEXT("Saved shader usage bitmap %s. Legend: shaders not loaded from disk - dark grey, loaded to RAM explicitly - green, loaded as part of their compressed group - bright grey, decompressed: blue if they were loaded explicitly, red if just because they were a part of the group. Actually created shaders - white"),
*FilePath);
}
}
else
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Error creating shader usage bitmap for archive %s (NumShaders: %d, preloaded %d, created %d) - cannot create a PNG image"), *Name, NumShaders, PreloadedShaders.Num(), CreatedShaders.Num());
}
}
else
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Couldn't create shader usage bitmap for archive %s (NumShaders: %d, preloaded %d, created %d) - cannot create ImageWrapper for PNG format"), *Name, NumShaders, PreloadedShaders.Num(), CreatedShaders.Num());
}
}
else
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Couldn't create shader usage bitmap for archive %s (NumShaders: %d, preloaded %d, created %d) - no ImageWrapper module"), *Name, NumShaders, PreloadedShaders.Num(), CreatedShaders.Num());
}
}
}
}
#endif
static FString GetDecompressionFailureExtraMessage()
{
FString Brand = FPlatformMisc::GetCPUBrand();
// most common offender is 13900K
// also seems to be a problem on 14900K and maybe 12900K
// not sure if 700K is affected
// don't try to filter the brand string, just always log it if we hit a fatal error
#if 0
bool Is900K =
( Brand.Find(TEXT("900K")) != INDEX_NONE ) ||
( Brand.Find(TEXT("900-K")) != INDEX_NONE ) ||
( Brand.Find(TEXT("900 K")) != INDEX_NONE );
if ( ! Is900K )
{
return FString();
}
#endif
return FString::Printf(TEXT("The CPU (%s) may be unstable; for details see http://www.radgametools.com/oodleintel.htm"),*Brand);
}
void ShaderCodeArchive::DecompressShaderWithOodle(uint8* OutDecompressedShader, int64 UncompressedSize, const uint8* CompressedShaderCode, int64 CompressedSize)
{
// Iostore always compresses with Oodle.
bool bSucceed = FCompression::UncompressMemory(NAME_Oodle, OutDecompressedShader, UncompressedSize, CompressedShaderCode, CompressedSize);
if (!bSucceed)
{
UE_LOG(LogShaderLibrary, Fatal, TEXT("ShaderCodeArchive::DecompressShader(): Could not decompress shader with Oodle. %s"),
*GetDecompressionFailureExtraMessage());
}
}
namespace
{
void DecompressShadergroupWithOodleAndExtraLogging(
const int32 GroupIndex, const FIoChunkId& GroupHash, FIoStoreShaderGroupEntry& Entry, const int32 ShaderIndex, const uint64 ShaderInGroupIndex, const FSHAHash& ShaderHash,
uint8* OutDecompressedShader, int64 UncompressedSize, const uint8* CompressedShaderCode, int64 CompressedSize)
{
bool bSucceed = FCompression::UncompressMemory(NAME_Oodle, OutDecompressedShader, UncompressedSize, CompressedShaderCode, CompressedSize);
if (!bSucceed)
{
UE_LOG(LogShaderLibrary, Fatal, TEXT("DecompressShaderWithOodleAndExtraLogging(): Could not decompress shader group with Oodle. Group Index: %d Group IoStoreHash:%s Group NumShaders: %d Shader Index: %d Shader In-group Index: %d Shader Hash: %s. %s"),
GroupIndex,
*LexToString(GroupHash),
Entry.NumShaders,
ShaderIndex,
ShaderInGroupIndex,
*LexToString(ShaderHash),
*GetDecompressionFailureExtraMessage()
);
}
}
}
bool ShaderCodeArchive::CompressShaderWithOodle(uint8* OutCompressedShader, int64& OutCompressedSize, const uint8* InUncompressedShaderCode, int64 InUncompressedSize, FOodleDataCompression::ECompressor InOodleCompressor, FOodleDataCompression::ECompressionLevel InOodleLevel)
{
if (OutCompressedShader)
{
OutCompressedSize = FOodleDataCompression::Compress(OutCompressedShader, OutCompressedSize, InUncompressedShaderCode, InUncompressedSize, InOodleCompressor, InOodleLevel);
check(OutCompressedSize != 0);
return OutCompressedSize != 0;
}
else
{
// Just requesting an estimate.
OutCompressedSize = FOodleDataCompression::CompressedBufferSizeNeeded(InUncompressedSize);
return false;
}
}
void FSerializedShaderArchive::DecompressShader(int32 Index, const TArray<FSharedBuffer>& ShaderCode, TArray<uint8>& OutDecompressedShader) const
{
const FShaderCodeEntry& Entry = ShaderEntries[Index];
OutDecompressedShader.SetNum(Entry.UncompressedSize, EAllowShrinking::No);
if (Entry.Size == Entry.UncompressedSize)
{
FMemory::Memcpy(OutDecompressedShader.GetData(), ShaderCode[Index].GetData(), Entry.UncompressedSize);
}
else
{
ShaderCodeArchive::DecompressShaderWithOodle(OutDecompressedShader.GetData(), Entry.UncompressedSize, reinterpret_cast<const uint8*>(ShaderCode[Index].GetData()), Entry.Size);
}
}
#if !USE_MMAPPED_SHADERARCHIVE
void FSerializedShaderArchive::Finalize()
{
// Set the correct offsets
{
uint64 Offset = 0u;
for (FShaderCodeEntry& Entry : ShaderEntries)
{
Entry.Offset = Offset;
Offset += Entry.Size;
}
}
constexpr int32 MaxByteGapAllowedInAPreload = 1024;
PreloadEntries.Empty();
for (FShaderMapEntry& ShaderMapEntry : ShaderMapEntries)
{
check(ShaderMapEntry.NumShaders > 0u);
TArray<FFileCachePreloadEntry> SortedPreloadEntries;
SortedPreloadEntries.Empty(ShaderMapEntry.NumShaders + 1);
for (uint32 i = 0; i < ShaderMapEntry.NumShaders; ++i)
{
const int32 ShaderIndex = ShaderIndices[ShaderMapEntry.ShaderIndicesOffset + i];
const FShaderCodeEntry& ShaderEntry = ShaderEntries[ShaderIndex];
SortedPreloadEntries.Add(FFileCachePreloadEntry(ShaderEntry.Offset, ShaderEntry.Size));
}
SortedPreloadEntries.Sort([](const FFileCachePreloadEntry& Lhs, const FFileCachePreloadEntry& Rhs) { return Lhs.Offset < Rhs.Offset; });
SortedPreloadEntries.Add(FFileCachePreloadEntry(INT64_MAX, 0));
ShaderMapEntry.FirstPreloadIndex = PreloadEntries.Num();
FFileCachePreloadEntry CurrentPreloadEntry = SortedPreloadEntries[0];
for (uint32 PreloadIndex = 1; PreloadIndex <= ShaderMapEntry.NumShaders; ++PreloadIndex)
{
const FFileCachePreloadEntry& PreloadEntry = SortedPreloadEntries[PreloadIndex];
const int64 Gap = PreloadEntry.Offset - CurrentPreloadEntry.Offset - CurrentPreloadEntry.Size;
checkf(Gap >= 0, TEXT("Overlapping preload entries, [%lld-%lld), [%lld-%lld)"),
CurrentPreloadEntry.Offset, CurrentPreloadEntry.Offset + CurrentPreloadEntry.Size, PreloadEntry.Offset, PreloadEntry.Offset + PreloadEntry.Size);
if (Gap > MaxByteGapAllowedInAPreload)
{
++ShaderMapEntry.NumPreloadEntries;
PreloadEntries.Add(CurrentPreloadEntry);
CurrentPreloadEntry = PreloadEntry;
}
else
{
CurrentPreloadEntry.Size = PreloadEntry.Offset + PreloadEntry.Size - CurrentPreloadEntry.Offset;
}
}
check(ShaderMapEntry.NumPreloadEntries > 0u);
check(CurrentPreloadEntry.Size == 0);
}
}
#endif
void FSerializedShaderArchive::Serialize(FArchive& Ar)
{
#if USE_MMAPPED_SHADERARCHIVE
auto SerializeMappedToArrayView = [](auto& ArrayView, FStaticMemoryReader& Ar)
{
using ArrayType = std::remove_cvref_t <decltype(ArrayView)>;
typename ArrayType::SizeType SerializeNum;
Ar << SerializeNum;
uint64 ArrayBytes = SerializeNum * sizeof(typename ArrayType::ElementType);
uint64 Offset = Ar.Tell();
ArrayView = TArrayView<typename ArrayType::ElementType >((typename ArrayType::ElementType*)(Ar.GetData() + Offset), SerializeNum);
Ar.Seek(Offset + ArrayBytes);
};
FStaticMemoryReader& MemReaderAr = static_cast<FStaticMemoryReader&>(Ar);
if (Ar.GetArchiveName() != TEXT("FStaticMemoryReader"))
{
UE_LOG(LogShaderLibrary, Fatal, TEXT("mmapped shader archive must be serialized via FStaticMemoryReader"));
}
else
{
SerializeMappedToArrayView(ShaderMapHashes, MemReaderAr);
SerializeMappedToArrayView(ShaderHashes, MemReaderAr);
SerializeMappedToArrayView(ShaderMapEntries, MemReaderAr);
SerializeMappedToArrayView(ShaderEntries, MemReaderAr);
SerializeMappedToArrayView(PreloadEntries, MemReaderAr);
SerializeMappedToArrayView(ShaderIndices, MemReaderAr);
}
#else
Ar << ShaderMapHashes;
Ar << ShaderHashes;
Ar << ShaderMapEntries;
Ar << ShaderEntries;
Ar << PreloadEntries;
Ar << ShaderIndices;
#endif
check(ShaderHashes.Num() == ShaderEntries.Num());
check(ShaderMapHashes.Num() == ShaderMapEntries.Num());
if (Ar.IsLoading())
{
{
const uint32 HashSize = FMath::Min<uint32>(0x10000, 1u << FMath::CeilLogTwo(ShaderMapHashes.Num()));
ShaderMapHashTable.Clear(HashSize, ShaderMapHashes.Num());
for (int32 Index = 0; Index < ShaderMapHashes.Num(); ++Index)
{
const uint32 Key = GetTypeHash(ShaderMapHashes[Index]);
ShaderMapHashTable.Add(Key, Index);
}
}
{
const uint32 HashSize = FMath::Min<uint32>(0x10000, 1u << FMath::CeilLogTwo(ShaderHashes.Num()));
ShaderHashTable.Clear(HashSize, ShaderHashes.Num());
for (int32 Index = 0; Index < ShaderHashes.Num(); ++Index)
{
const uint32 Key = GetTypeHash(ShaderHashes[Index]);
ShaderHashTable.Add(Key, Index);
}
}
}
}
#if WITH_EDITOR
void FSerializedShaderArchive::SaveAssetInfo(FArchive& Ar)
{
if (Ar.IsSaving())
{
FString JsonTcharText;
{
TSharedRef<TJsonWriter<TCHAR, TPrettyJsonPrintPolicy<TCHAR>>> Writer = TJsonWriterFactory<TCHAR, TPrettyJsonPrintPolicy<TCHAR>>::Create(&JsonTcharText);
Writer->WriteObjectStart();
Writer->WriteValue(TEXT("AssetInfoVersion"), static_cast<int32>(EAssetInfoVersion::CurrentVersion));
TArray<const TPair<FSHAHash, FShaderMapAssetPaths>*> SortedData;
SortedData.Reserve(ShaderCodeToAssets.Num());
for (TPair<FSHAHash, FShaderMapAssetPaths>& Pair : ShaderCodeToAssets)
{
SortedData.Add(&Pair);
Pair.Value.Sort(FNameLexicalLess());
}
Algo::Sort(SortedData, [](const TPair<FSHAHash, FShaderMapAssetPaths>* const a, const TPair<FSHAHash, FShaderMapAssetPaths>* const b)
{
return GetTypeHash(a->Key) < GetTypeHash(b->Key);
});
Writer->WriteArrayStart(TEXT("ShaderCodeToAssets"));
for (const TPair<FSHAHash, FShaderMapAssetPaths>* Pair : SortedData)
{
Writer->WriteObjectStart();
const FSHAHash& Hash = Pair->Key;
Writer->WriteValue(TEXT("ShaderMapHash"), Hash.ToString());
const FShaderMapAssetPaths& Assets = Pair->Value;
Writer->WriteArrayStart(TEXT("Assets"));
for (FShaderMapAssetPaths::TConstIterator AssetIter(Assets); AssetIter; ++AssetIter)
{
Writer->WriteValue((*AssetIter).ToString());
}
Writer->WriteArrayEnd();
Writer->WriteObjectEnd();
}
Writer->WriteArrayEnd();
Writer->WriteObjectEnd();
Writer->Close();
}
FTCHARToUTF8 JsonUtf8(*JsonTcharText);
Ar.Serialize(const_cast<void *>(reinterpret_cast<const void*>(JsonUtf8.Get())), JsonUtf8.Length() * sizeof(UTF8CHAR));
}
}
bool FSerializedShaderArchive::LoadAssetInfo(const FString& Filename)
{
TUniquePtr<FArchive> Reader(IFileManager::Get().CreateFileReader(*Filename));
return LoadAssetInfo(Reader.Get());
}
bool FSerializedShaderArchive::LoadAssetInfo(FArchive* Ar)
{
if (!Ar)
{
return false;
}
FString JsonText;
FFileHelper::LoadFileToString(JsonText, *Ar);
TSharedPtr<FJsonObject> JsonObject;
TSharedRef<TJsonReader<TCHAR>> Reader = TJsonReaderFactory<TCHAR>::Create(JsonText);
// Attempt to deserialize JSON
if (!FJsonSerializer::Deserialize(Reader, JsonObject) || !JsonObject.IsValid())
{
return false;
}
TSharedPtr<FJsonValue> AssetInfoVersion = JsonObject->Values.FindRef(TEXT("AssetInfoVersion"));
if (!AssetInfoVersion.IsValid())
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: missing AssetInfoVersion (damaged file?)"),
*Ar->GetArchiveName());
return false;
}
const EAssetInfoVersion FileVersion = static_cast<EAssetInfoVersion>(static_cast<int64>(AssetInfoVersion->AsNumber()));
if (FileVersion != EAssetInfoVersion::CurrentVersion)
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: expected version %d, got unsupported version %d."),
*Ar->GetArchiveName(), static_cast<int32>(EAssetInfoVersion::CurrentVersion), static_cast<int32>(FileVersion));
return false;
}
TSharedPtr<FJsonValue> AssetInfoArrayValue = JsonObject->Values.FindRef(TEXT("ShaderCodeToAssets"));
if (!AssetInfoArrayValue.IsValid())
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: missing ShaderCodeToAssets array (damaged file?)"),
*Ar->GetArchiveName());
return false;
}
TArray<TSharedPtr<FJsonValue>> AssetInfoArray = AssetInfoArrayValue->AsArray();
UE_LOG(LogShaderLibrary, Display, TEXT("Reading asset info file %s: found %d existing mappings"),
*Ar->GetArchiveName(), AssetInfoArray.Num());
for (int32 IdxPair = 0, NumPairs = AssetInfoArray.Num(); IdxPair < NumPairs; ++IdxPair)
{
TSharedPtr<FJsonObject> Pair = AssetInfoArray[IdxPair]->AsObject();
if (UNLIKELY(!Pair.IsValid()))
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: ShaderCodeToAssets array contains unreadable mapping #%d (damaged file?)"),
*Ar->GetArchiveName(),
IdxPair
);
return false;
}
TSharedPtr<FJsonValue> ShaderMapHashJson = Pair->Values.FindRef(TEXT("ShaderMapHash"));
if (UNLIKELY(!ShaderMapHashJson.IsValid()))
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: ShaderCodeToAssets array contains unreadable ShaderMapHash for mapping %d (damaged file?)"),
*Ar->GetArchiveName(),
IdxPair
);
return false;
}
FSHAHash ShaderMapHash;
ShaderMapHash.FromString(ShaderMapHashJson->AsString());
TSharedPtr<FJsonValue> AssetPathsArrayValue = Pair->Values.FindRef(TEXT("Assets"));
if (UNLIKELY(!AssetPathsArrayValue.IsValid()))
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Rejecting asset info file %s: ShaderCodeToAssets array contains unreadable Assets array for mapping %d (damaged file?)"),
*Ar->GetArchiveName(),
IdxPair
);
return false;
}
FShaderMapAssetPaths Paths;
TArray<TSharedPtr<FJsonValue>> AssetPathsArray = AssetPathsArrayValue->AsArray();
for (int32 IdxAsset = 0, NumAssets = AssetPathsArray.Num(); IdxAsset < NumAssets; ++IdxAsset)
{
Paths.Add(FName(*AssetPathsArray[IdxAsset]->AsString()));
}
ShaderCodeToAssets.Add(ShaderMapHash, Paths);
}
return true;
}
void FSerializedShaderArchive::CreateAsChunkFrom(const FSerializedShaderArchive& Parent, const TSet<FName>& PackagesInChunk, TArray<int32>& OutShaderCodeEntriesNeeded)
{
// we should begin with a clean slate
checkf(ShaderMapHashes.Num() == 0 && ShaderHashes.Num() == 0 && ShaderMapEntries.Num() == 0 && ShaderEntries.Num() == 0 && PreloadEntries.Num() == 0 && ShaderIndices.Num() == 0,
TEXT("Expecting a new, uninitialized FSerializedShaderArchive instance for creating a chunk."));
// go through parent's shadermap hashes in the order of their addition
for (int32 IdxSM = 0, NumSMs = Parent.ShaderMapHashes.Num(); IdxSM < NumSMs; ++IdxSM)
{
const FSHAHash& ShaderMapHash = Parent.ShaderMapHashes[IdxSM];
const FShaderMapAssetPaths* Assets = Parent.ShaderCodeToAssets.Find(ShaderMapHash);
bool bIncludeSM = false;
if (UNLIKELY(Assets == nullptr))
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Shadermap %s is not associated with any asset. Including it in every chunk"), *ShaderMapHash.ToString());
bIncludeSM = true;
}
else
{
// if any asset is in the chunk, include
for (const FName& Asset : *Assets)
{
if (PackagesInChunk.Contains(Asset))
{
bIncludeSM = true;
break;
}
}
}
if (bIncludeSM)
{
// add this shader map
int32 ShaderMapIndex = INDEX_NONE;
if (FindOrAddShaderMap(ShaderMapHash, ShaderMapIndex, Assets))
{
// if we're in this scope, it means it's a new shadermap for the chunk and we need more information about it from the parent
int32 ParentShaderMapIndex = IdxSM;
const FShaderMapEntry& ParentShaderMapDescriptor = Parent.ShaderMapEntries[ParentShaderMapIndex];
const int32 NumShaders = ParentShaderMapDescriptor.NumShaders;
FShaderMapEntry& ShaderMapDescriptor = ShaderMapEntries[ShaderMapIndex];
ShaderMapDescriptor.NumShaders = NumShaders;
ShaderMapDescriptor.ShaderIndicesOffset = ShaderIndices.AddZeroed(NumShaders);
// add shader by shader
for (int32 ShaderIdx = 0; ShaderIdx < NumShaders; ++ShaderIdx)
{
int32 ParentShaderIndex = Parent.ShaderIndices[ParentShaderMapDescriptor.ShaderIndicesOffset + ShaderIdx];
int32 ShaderIndex = INDEX_NONE;
if (FindOrAddShader(Parent.ShaderHashes[ParentShaderIndex], ShaderIndex))
{
// new shader! add it to the mapping of parent shadercode entries to ours. and check the integrity of the mapping
checkf(OutShaderCodeEntriesNeeded.Num() == ShaderIndex, TEXT("Mapping between the shader indices in a chunk and the whole archive is inconsistent"));
OutShaderCodeEntriesNeeded.Add(ParentShaderIndex);
// copy the entry as is
ShaderEntries[ShaderIndex] = Parent.ShaderEntries[ParentShaderIndex];
}
ShaderIndices[ShaderMapDescriptor.ShaderIndicesOffset + ShaderIdx] = ShaderIndex;
}
}
}
}
}
void FSerializedShaderArchive::CollectStatsAndDebugInfo(FDebugStats& OutDebugStats, FExtendedDebugStats* OutExtendedDebugStats)
{
// collect the light-weight stats first
FMemory::Memzero(OutDebugStats);
OutDebugStats.NumUniqueShaders = ShaderHashes.Num();
OutDebugStats.NumShaderMaps = ShaderMapHashes.Num();
int32 TotalShaders = 0;
int64 TotalShaderSize = 0;
uint32 MinSMSizeInShaders = UINT_MAX;
uint32 MaxSMSizeInShaders = 0;
for (const FShaderMapEntry& SMEntry : ShaderMapEntries)
{
MinSMSizeInShaders = FMath::Min(MinSMSizeInShaders, SMEntry.NumShaders);
MaxSMSizeInShaders = FMath::Max(MaxSMSizeInShaders, SMEntry.NumShaders);
TotalShaders += SMEntry.NumShaders;
const int32 ThisSMShaders = SMEntry.NumShaders;
for (int32 ShaderIdx = 0; ShaderIdx < ThisSMShaders; ++ShaderIdx)
{
TotalShaderSize += ShaderEntries[ShaderIndices[SMEntry.ShaderIndicesOffset + ShaderIdx]].Size;
}
}
OutDebugStats.NumShaders = TotalShaders;
OutDebugStats.ShadersSize = TotalShaderSize;
// this is moderately expensive, consider moving to ExtendedStats?
{
TSet<FName> AllAssets;
for (TMap<FSHAHash, FShaderMapAssetPaths>::TConstIterator Iter(ShaderCodeToAssets); Iter; ++Iter)
{
for (const FName& AssetName : Iter.Value())
{
AllAssets.Add(AssetName);
}
}
OutDebugStats.NumAssets = AllAssets.Num();
}
int64 ActuallySavedShaderSize = 0;
for (const FShaderCodeEntry& ShaderEntry : ShaderEntries)
{
ActuallySavedShaderSize += ShaderEntry.Size;
}
OutDebugStats.ShadersUniqueSize = ActuallySavedShaderSize;
// If OutExtendedDebugStats pointer is passed, we're asked to fill out a heavy-weight stats.
if (OutExtendedDebugStats)
{
// textual rep
DumpContentsInPlaintext(OutExtendedDebugStats->TextualRepresentation);
OutExtendedDebugStats->MinNumberOfShadersPerSM = MinSMSizeInShaders;
OutExtendedDebugStats->MaxNumberofShadersPerSM = MaxSMSizeInShaders;
// median SM size in shaders
TArray<int32> ShadersInSM;
// shader usage
TMap<int32, int32> ShaderToUsageMap;
for (const FShaderMapEntry& SMEntry : ShaderMapEntries)
{
const int32 ThisSMShaders = SMEntry.NumShaders;
ShadersInSM.Add(ThisSMShaders);
for (int32 ShaderIdx = 0; ShaderIdx < ThisSMShaders; ++ShaderIdx)
{
int ShaderIndex = ShaderIndices[SMEntry.ShaderIndicesOffset + ShaderIdx];
int32& Usage = ShaderToUsageMap.FindOrAdd(ShaderIndex, 0);
++Usage;
}
}
ShadersInSM.Sort();
OutExtendedDebugStats->MedianNumberOfShadersPerSM = ShadersInSM.Num() ? ShadersInSM[ShadersInSM.Num() / 2] : 0;
ShaderToUsageMap.ValueSort(TGreater<int32>());
// add top 10 shaders
for (const TTuple<int32, int32>& UsagePair : ShaderToUsageMap)
{
OutExtendedDebugStats->TopShaderUsages.Add(UsagePair.Value);
if (OutExtendedDebugStats->TopShaderUsages.Num() >= 10)
{
break;
}
}
// calculate per-frequency stats
FMemory::Memzero(OutExtendedDebugStats->NumShadersPerFrequency);
FMemory::Memzero(OutExtendedDebugStats->UncompressedSizePerFrequency);
FMemory::Memzero(OutExtendedDebugStats->CompressedSizePerFrequency);
for (const FShaderCodeEntry& ShaderEntry : ShaderEntries)
{
check(ShaderEntry.Frequency < UE_ARRAY_COUNT(OutExtendedDebugStats->NumShadersPerFrequency));
++OutExtendedDebugStats->NumShadersPerFrequency[ShaderEntry.Frequency];
check(ShaderEntry.Frequency < UE_ARRAY_COUNT(OutExtendedDebugStats->UncompressedSizePerFrequency));
OutExtendedDebugStats->UncompressedSizePerFrequency[ShaderEntry.Frequency] += ShaderEntry.UncompressedSize;
check(ShaderEntry.Frequency < UE_ARRAY_COUNT(OutExtendedDebugStats->UncompressedSizePerFrequency));
OutExtendedDebugStats->CompressedSizePerFrequency[ShaderEntry.Frequency] += ShaderEntry.Size;
}
}
#if 0 // graph visualization - maybe one day we'll return to this
// enumerate all shaders first (so they can be identified by people looking them up in other debug output)
int32 IdxShaderNum = 0;
for (const FSHAHash& ShaderHash : ShaderHashes)
{
FString Numeral = FString::Printf(TEXT("Shd_%d"), IdxShaderNum);
OutRelationshipGraph->Add(TTuple<FString, FString>(Numeral, FString("Hash_") + ShaderHash.ToString()));
++IdxShaderNum;
}
// add all assets if any
for (TMap<FName, FSHAHash>::TConstIterator Iter(AssetToShaderCode); Iter; ++Iter)
{
int32 SMIndex = FindShaderMap(Iter.Value());
OutRelationshipGraph->Add(TTuple<FString, FString>(Iter.Key().ToString(), FString::Printf(TEXT("SM_%d"), SMIndex)));
}
// shadermaps to shaders
int NumSMs = ShaderMapHashes.Num();
for (int32 IdxSM = 0; IdxSM < NumSMs; ++IdxSM)
{
FString SMId = FString::Printf(TEXT("SM_%d"), IdxSM);
const FShaderMapEntry& SMEntry = ShaderMapEntries[IdxSM];
const int32 ThisSMShaders = SMEntry.NumShaders;
for (int32 ShaderIdx = 0; ShaderIdx < ThisSMShaders; ++ShaderIdx)
{
FString ReferencedShader = FString::Printf(TEXT("Shd_%d"), ShaderIndices[SMEntry.ShaderIndicesOffset + ShaderIdx]);
OutRelationshipGraph->Add(TTuple<FString, FString>(SMId, ReferencedShader));
}
}
#endif // 0
}
void FSerializedShaderArchive::DumpContentsInPlaintext(FString& OutText) const
{
TStringBuilder<256> Out;
Out << TEXT("FSerializedShaderArchive\n{\n");
{
Out << TEXT("\tShaderMapHashes\n\t{\n");
for (int32 IdxMapHash = 0, NumMapHashes = ShaderMapHashes.Num(); IdxMapHash < NumMapHashes; ++IdxMapHash)
{
Out << TEXT("\t\t");
Out << ShaderMapHashes[IdxMapHash].ToString();
Out << TEXT("\n");
}
Out << TEXT("\t}\n");
}
{
Out << TEXT("\tShaderHashes\n\t{\n");
for (int32 IdxHash = 0, NumHashes = ShaderHashes.Num(); IdxHash < NumHashes; ++IdxHash)
{
Out << TEXT("\t\t");
Out << ShaderHashes[IdxHash].ToString();
Out << TEXT("\n");
}
Out << TEXT("\t}\n");
}
{
Out << TEXT("\tShaderMapEntries\n\t{\n");
for (int32 IdxEntry = 0, NumEntries = ShaderMapEntries.Num(); IdxEntry < NumEntries; ++IdxEntry)
{
Out << TEXT("\t\tFShaderMapEntry\n\t\t{\n");
Out << TEXT("\t\t\tShaderIndicesOffset : ");
Out << ShaderMapEntries[IdxEntry].ShaderIndicesOffset;
Out << TEXT("\n");
Out << TEXT("\t\t\tNumShaders : ");
Out << ShaderMapEntries[IdxEntry].NumShaders;
Out << TEXT("\n");
Out << TEXT("\t\t\tFirstPreloadIndex : ");
Out << ShaderMapEntries[IdxEntry].FirstPreloadIndex;
Out << TEXT("\n");
Out << TEXT("\t\t\tNumPreloadEntries : ");
Out << ShaderMapEntries[IdxEntry].NumPreloadEntries;
Out << TEXT("\n");
Out << TEXT("\t\t}\n");
}
Out << TEXT("\t}\n");
}
{
Out << TEXT("\tShaderEntries\n\t{\n");
for (int32 IdxEntry = 0, NumEntries = ShaderEntries.Num(); IdxEntry < NumEntries; ++IdxEntry)
{
Out << TEXT("\t\tFShaderCodeEntry\n\t\t{\n");
Out << TEXT("\t\t\tOffset : ");
Out << ShaderEntries[IdxEntry].Offset;
Out << TEXT("\n");
Out << TEXT("\t\t\tSize : ");
Out << ShaderEntries[IdxEntry].Size;
Out << TEXT("\n");
Out << TEXT("\t\t\tUncompressedSize : ");
Out << ShaderEntries[IdxEntry].UncompressedSize;
Out << TEXT("\n");
Out << TEXT("\t\t\tFrequency : ");
Out << ShaderEntries[IdxEntry].Frequency;
Out << TEXT("\n");
Out << TEXT("\t\t}\n");
}
Out << TEXT("\t}\n");
}
{
Out << TEXT("\tPreloadEntries\n\t{\n");
for (int32 IdxEntry = 0, NumEntries = PreloadEntries.Num(); IdxEntry < NumEntries; ++IdxEntry)
{
Out << TEXT("\t\tFFileCachePreloadEntry\n\t\t{\n");
Out << TEXT("\t\t\tOffset : ");
Out << PreloadEntries[IdxEntry].Offset;
Out << TEXT("\n");
Out << TEXT("\t\t\tSize : ");
Out << PreloadEntries[IdxEntry].Size;
Out << TEXT("\n");
Out << TEXT("\t\t}\n");
}
Out << TEXT("\t}\n");
}
{
Out << TEXT("\tShaderIndices\n\t{\n");
// split it by shadermaps
int32 IdxSMEntry = 0;
int32 NumShadersLeftInSM = ShaderMapEntries.Num() ? ShaderMapEntries[0].NumShaders : 0;
bool bNewSM = true;
for (int32 IdxEntry = 0, NumEntries = ShaderIndices.Num(); IdxEntry < NumEntries; ++IdxEntry)
{
if (UNLIKELY(bNewSM))
{
Out << TEXT("\t\t");
bNewSM = false;
}
else
{
Out << TEXT(", ");
}
Out << ShaderIndices[IdxEntry];
--NumShadersLeftInSM;
while (NumShadersLeftInSM == 0)
{
bNewSM = true;
++IdxSMEntry;
if (IdxSMEntry >= ShaderMapEntries.Num())
{
break;
}
NumShadersLeftInSM = ShaderMapEntries[IdxSMEntry].NumShaders;
}
if (bNewSM)
{
Out << TEXT("\n");
}
}
Out << TEXT("\t}\n");
}
Out << TEXT("}\n");
OutText = FStringView(Out);
}
#endif // WITH_EDITOR
FShaderCodeArchive* FShaderCodeArchive::Create(EShaderPlatform InPlatform, FArchive& Ar, const FString& InDestFilePath, const FString& InLibraryDir, const FString& InLibraryName)
{
FShaderCodeArchive* Library = new FShaderCodeArchive(InPlatform, InLibraryDir, InLibraryName);
Ar << Library->SerializedShaders;
Library->ShaderPreloads.SetNum(Library->SerializedShaders.GetNumShaders());
Library->LibraryCodeOffset = Ar.Tell();
// Open library for async reads
Library->FileCacheHandle = IFileCacheHandle::CreateFileCacheHandle(*InDestFilePath);
Library->DebugVisualizer.Initialize(Library->SerializedShaders.GetShaderEntries().Num());
UE_LOG(LogShaderLibrary, Display, TEXT("Using %s for material shader code. Total %d unique shaders."), *InDestFilePath, Library->SerializedShaders.GetShaderEntries().Num());
INC_DWORD_STAT_BY(STAT_Shaders_ShaderResourceMemory, Library->GetSizeBytes());
return Library;
}
FShaderCodeArchive::FShaderCodeArchive(EShaderPlatform InPlatform, const FString& InLibraryDir, const FString& InLibraryName)
: FRHIShaderLibrary(InPlatform, InLibraryName)
, LibraryDir(InLibraryDir)
, LibraryCodeOffset(0)
, FileCacheHandle(nullptr)
{
}
FShaderCodeArchive::~FShaderCodeArchive()
{
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderResourceMemory, GetSizeBytes());
Teardown();
}
void FShaderCodeArchive::Teardown()
{
if (FileCacheHandle)
{
delete FileCacheHandle;
FileCacheHandle = nullptr;
}
for (int32 ShaderIndex = 0; ShaderIndex < SerializedShaders.GetNumShaders(); ++ShaderIndex)
{
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[ShaderIndex];
if (ShaderPreloadEntry.Code)
{
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[ShaderIndex];
FMemory::Free(ShaderPreloadEntry.Code);
ShaderPreloadEntry.Code = nullptr;
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderEntry.Size);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Sub((float)ShaderEntry.Size / (1024.0f * 1024.0f));
#endif
}
}
DebugVisualizer.SaveShaderUsageBitmap(GetName(), GetPlatform());
}
void FShaderCodeArchive::OnShaderPreloadFinished(int32 ShaderIndex, const IMemoryReadStreamRef& PreloadData)
{
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[ShaderIndex];
PreloadData->EnsureReadNonBlocking(); // Ensure data is ready before taking the lock
{
FWriteScopeLock Lock(ShaderPreloadLock);
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[ShaderIndex];
PreloadData->CopyTo(ShaderPreloadEntry.Code, 0, ShaderEntry.Size);
ShaderPreloadEntry.PreloadEvent.SafeRelease();
}
}
struct FPreloadShaderTask
{
explicit FPreloadShaderTask(FShaderCodeArchive* InArchive, int32 InShaderIndex, const IMemoryReadStreamRef& InData)
: Archive(InArchive), Data(InData), ShaderIndex(InShaderIndex)
{}
FShaderCodeArchive* Archive;
IMemoryReadStreamRef Data;
int32 ShaderIndex;
void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
{
Archive->OnShaderPreloadFinished(ShaderIndex, Data);
Data.SafeRelease();
}
FORCEINLINE static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
FORCEINLINE ENamedThreads::Type GetDesiredThread() { return ENamedThreads::AnyNormalThreadNormalTask; }
FORCEINLINE TStatId GetStatId() const { return TStatId(); }
};
bool FShaderCodeArchive::PreloadShader(int32 ShaderIndex, FGraphEventArray& OutCompletionEvents)
{
LLM_SCOPE(ELLMTag::Shaders);
FWriteScopeLock Lock(ShaderPreloadLock);
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[ShaderIndex];
checkf(!ShaderPreloadEntry.bNeverToBePreloaded, TEXT("We are preloading a shader that shouldn't be preloaded in this run (e.g. raytracing shader on D3D11)."));
const uint32 ShaderNumRefs = ShaderPreloadEntry.NumRefs++;
if (ShaderNumRefs == 0u)
{
check(!ShaderPreloadEntry.PreloadEvent);
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[ShaderIndex];
ShaderPreloadEntry.Code = FMemory::Malloc(ShaderEntry.Size);
ShaderPreloadEntry.FramePreloadStarted = GFrameNumber;
DebugVisualizer.MarkExplicitlyPreloadedForVisualization(ShaderIndex);
const EAsyncIOPriorityAndFlags IOPriority = (EAsyncIOPriorityAndFlags)GShaderCodeLibraryAsyncLoadingPriority;
FGraphEventArray ReadCompletionEvents;
EAsyncIOPriorityAndFlags DontCache = GShaderCodeLibraryAsyncLoadingAllowDontCache ? AIOP_FLAG_DONTCACHE : AIOP_MIN;
IMemoryReadStreamRef PreloadData = FileCacheHandle->ReadData(ReadCompletionEvents, LibraryCodeOffset + ShaderEntry.Offset, ShaderEntry.Size, IOPriority | DontCache);
auto Task = TGraphTask<FPreloadShaderTask>::CreateTask(&ReadCompletionEvents).ConstructAndHold(this, ShaderIndex, MoveTemp(PreloadData));
ShaderPreloadEntry.PreloadEvent = Task->GetCompletionEvent();
Task->Unlock();
INC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderEntry.Size);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Add((float)ShaderEntry.Size / (1024.0f * 1024.0f));
#endif
}
if (ShaderPreloadEntry.PreloadEvent)
{
OutCompletionEvents.Add(ShaderPreloadEntry.PreloadEvent);
}
return true;
}
bool FShaderCodeArchive::PreloadShaderMap(int32 ShaderMapIndex, FGraphEventArray& OutCompletionEvents)
{
LLM_SCOPE(ELLMTag::Shaders);
const FShaderMapEntry& ShaderMapEntry = SerializedShaders.GetShaderMapEntries()[ShaderMapIndex];
const EAsyncIOPriorityAndFlags IOPriority = (EAsyncIOPriorityAndFlags)GShaderCodeLibraryAsyncLoadingPriority;
const uint32 FrameNumber = GFrameNumber;
uint32 PreloadMemory = 0u;
FWriteScopeLock Lock(ShaderPreloadLock);
TArrayView ShaderIndices = SerializedShaders.GetShaderIndices();
for (uint32 i = 0u; i < ShaderMapEntry.NumShaders; ++i)
{
const int32 ShaderIndex = ShaderIndices[ShaderMapEntry.ShaderIndicesOffset + i];
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[ShaderIndex];
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[ShaderIndex];
#if RHI_RAYTRACING
if (!IsRayTracingAllowed() && !IsCreateShadersOnLoadEnabled() && IsRayTracingShaderFrequency(static_cast<EShaderFrequency>(ShaderEntry.Frequency)))
{
ShaderPreloadEntry.bNeverToBePreloaded = 1;
continue;
}
#endif
const uint32 ShaderNumRefs = ShaderPreloadEntry.NumRefs++;
if (ShaderNumRefs == 0u)
{
check(!ShaderPreloadEntry.PreloadEvent);
ShaderPreloadEntry.Code = FMemory::Malloc(ShaderEntry.Size);
ShaderPreloadEntry.FramePreloadStarted = FrameNumber;
DebugVisualizer.MarkExplicitlyPreloadedForVisualization(ShaderIndex);
PreloadMemory += ShaderEntry.Size;
FGraphEventArray ReadCompletionEvents;
EAsyncIOPriorityAndFlags DontCache = GShaderCodeLibraryAsyncLoadingAllowDontCache ? AIOP_FLAG_DONTCACHE : AIOP_MIN;
IMemoryReadStreamRef PreloadData = FileCacheHandle->ReadData(ReadCompletionEvents, LibraryCodeOffset + ShaderEntry.Offset, ShaderEntry.Size, IOPriority | DontCache);
auto Task = TGraphTask<FPreloadShaderTask>::CreateTask(&ReadCompletionEvents).ConstructAndHold(this, ShaderIndex, MoveTemp(PreloadData));
ShaderPreloadEntry.PreloadEvent = Task->GetCompletionEvent();
Task->Unlock();
OutCompletionEvents.Add(ShaderPreloadEntry.PreloadEvent);
}
else if (ShaderPreloadEntry.PreloadEvent)
{
OutCompletionEvents.Add(ShaderPreloadEntry.PreloadEvent);
}
}
INC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, PreloadMemory);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Add((float)PreloadMemory / (1024.0f * 1024.0f));
#endif
return true;
}
bool FShaderCodeArchive::WaitForPreload(FShaderPreloadEntry& ShaderPreloadEntry)
{
FGraphEventRef Event;
{
FReadScopeLock Lock(ShaderPreloadLock);
if(ShaderPreloadEntry.NumRefs > 0u)
{
Event = ShaderPreloadEntry.PreloadEvent;
}
else
{
check(!ShaderPreloadEntry.PreloadEvent);
}
}
const bool bNeedToWait = Event && !Event->IsComplete();
if (bNeedToWait)
{
FTaskGraphInterface::Get().WaitUntilTaskCompletes(Event);
}
return bNeedToWait;
}
void FShaderCodeArchive::ReleasePreloadedShader(int32 ShaderIndex)
{
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[ShaderIndex];
if (!ShaderPreloadEntry.bNeverToBePreloaded)
{
WaitForPreload(ShaderPreloadEntry);
FWriteScopeLock Lock(ShaderPreloadLock);
ShaderPreloadEntry.PreloadEvent.SafeRelease();
const uint32 ShaderNumRefs = ShaderPreloadEntry.NumRefs--;
check(ShaderPreloadEntry.Code);
check(ShaderNumRefs > 0u);
if (ShaderNumRefs == 1u)
{
FMemory::Free(ShaderPreloadEntry.Code);
ShaderPreloadEntry.Code = nullptr;
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[ShaderIndex];
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderEntry.Size);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Sub((float)ShaderEntry.Size / (1024.0f * 1024.0f));
#endif
}
}
}
TRefCountPtr<FRHIShader> FShaderCodeArchive::CreateShader(int32 Index, bool bRequired)
{
LLM_SCOPE(ELLMTag::Shaders);
TRefCountPtr<FRHIShader> Shader;
FMemStackBase& MemStack = FMemStack::Get();
FMemMark Mark(MemStack);
const FShaderCodeEntry& ShaderEntry = SerializedShaders.GetShaderEntries()[Index];
FShaderPreloadEntry& ShaderPreloadEntry = ShaderPreloads[Index];
checkf(!ShaderPreloadEntry.bNeverToBePreloaded, TEXT("We are creating a shader that shouldn't be preloaded in this run (e.g. raytracing shader on D3D11)."));
{
FGraphEventArray Dummy;
PreloadShader(Index, Dummy);
}
{
TRACE_CPUPROFILER_EVENT_SCOPE(BlockingShaderLoad);
double TimeStarted = FPlatformTime::Seconds();
const bool bNeededToWait = WaitForPreload(ShaderPreloadEntry);
if (bNeededToWait)
{
const double WaitDuration = FPlatformTime::Seconds() - TimeStarted;
// only complain if we spent more than 1ms waiting
if (WaitDuration > GShaderCodeLibraryMaxShaderPreloadWaitTime)
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Spent %.2f ms in a blocking wait for shader preload, NumRefs: %d, FramePreloadStarted: %d, CurrentFrame: %d"), WaitDuration * 1000.0, ShaderPreloadEntry.NumRefs, ShaderPreloadEntry.FramePreloadStarted, GFrameNumber);
}
}
}
const uint8* ShaderCode = (uint8*)ShaderPreloadEntry.Code;
if (ShaderEntry.UncompressedSize != ShaderEntry.Size)
{
uint8* UncompressedCode = reinterpret_cast<uint8*>(MemStack.Alloc(ShaderEntry.UncompressedSize, 16));
ShaderCodeArchive::DecompressShaderWithOodle(UncompressedCode, ShaderEntry.UncompressedSize, ShaderCode, ShaderEntry.Size);
ShaderCode = (uint8*)UncompressedCode;
}
// detect the breach of contract early
ensureAlwaysMsgf(IsInRenderingThread() || GRHISupportsMultithreadedShaderCreation, TEXT("More than one thread is creating shaders, but GRHISupportsMultithreadedShaderCreation is false."));
const auto ShaderCodeView = MakeArrayView(ShaderCode, ShaderEntry.UncompressedSize);
const FSHAHash& ShaderHash = SerializedShaders.GetShaderHashes()[Index];
switch (ShaderEntry.Frequency)
{
case SF_Vertex: Shader = RHICreateVertexShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_Mesh: Shader = RHICreateMeshShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_Amplification: Shader = RHICreateAmplificationShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_Pixel: Shader = RHICreatePixelShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_Geometry: Shader = RHICreateGeometryShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_Compute: Shader = RHICreateComputeShader(ShaderCodeView, ShaderHash); CheckShaderCreation(Shader, Index); break;
case SF_WorkGraphRoot: Shader = RHICreateWorkGraphShader(ShaderCodeView, ShaderHash, SF_WorkGraphRoot); CheckShaderCreation(Shader, Index); break;
case SF_WorkGraphComputeNode: Shader = RHICreateWorkGraphShader(ShaderCodeView, ShaderHash, SF_WorkGraphComputeNode); CheckShaderCreation(Shader, Index); break;
case SF_RayGen: case SF_RayMiss: case SF_RayHitGroup: case SF_RayCallable:
#if RHI_RAYTRACING
if (GRHISupportsRayTracing && GRHISupportsRayTracingShaders)
{
Shader = RHICreateRayTracingShader(ShaderCodeView, ShaderHash, ShaderEntry.GetFrequency());
CheckShaderCreation(Shader, Index);
}
#endif // RHI_RAYTRACING
break;
default: checkNoEntry(); break;
}
DebugVisualizer.MarkCreatedForVisualization(Index);
// Release the reference we were holding
ReleasePreloadedShader(Index);
if (Shader)
{
INC_DWORD_STAT(STAT_Shaders_NumShadersCreated);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<int>* CsvStatNumShadersCreated = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatInt(TEXT("NumShadersCreated"), CSV_CATEGORY_INDEX(Shaders));
CsvStatNumShadersCreated->Add(1);
#endif
Shader->SetHash(ShaderHash);
}
return Shader;
}
FIoChunkId FIoStoreShaderCodeArchive::GetShaderCodeArchiveChunkId(const FString& LibraryName, FName FormatName)
{
FString Name = FString::Printf(TEXT("%s-%s"), *LibraryName, *FormatName.ToString());
Name.ToLowerInline();
uint64 Hash = CityHash64(reinterpret_cast<const char*>(*Name), Name.Len() * sizeof(TCHAR));
return CreateIoChunkId(Hash, 0, EIoChunkType::ShaderCodeLibrary);
}
FIoChunkId FIoStoreShaderCodeArchive::GetShaderCodeChunkId(const FSHAHash& ShaderHash)
{
uint8 Data[12];
FMemory::Memcpy(Data, ShaderHash.Hash, 11);
*reinterpret_cast<uint8*>(&Data[11]) = static_cast<uint8>(EIoChunkType::ShaderCode);
FIoChunkId ChunkId;
ChunkId.Set(Data, 12);
return ChunkId;
}
void FIoStoreShaderCodeArchive::CreateIoStoreShaderCodeArchiveHeader(const FName& Format, const FSerializedShaderArchive& SerializedShaders, FIoStoreShaderCodeArchiveHeader& OutHeader)
{
OutHeader.ShaderMapHashes = SerializedShaders.GetShaderMapHashes();
OutHeader.ShaderHashes = SerializedShaders.GetShaderHashes();
// shader group hashes will be populated later
OutHeader.ShaderMapEntries.Empty(SerializedShaders.GetShaderMapEntries().Num());
for (const FShaderMapEntry& ShaderMapEntry : SerializedShaders.GetShaderMapEntries())
{
FIoStoreShaderMapEntry& IoStoreShaderMapEntry = OutHeader.ShaderMapEntries.AddDefaulted_GetRef();
IoStoreShaderMapEntry.ShaderIndicesOffset = ShaderMapEntry.ShaderIndicesOffset;
IoStoreShaderMapEntry.NumShaders = ShaderMapEntry.NumShaders;
}
// indices should be copied before grouping as the groups will append to the array
OutHeader.ShaderIndices = SerializedShaders.GetShaderIndices();
// shader entries are copied, the remainder of the field will be assigned when splitting into groups
OutHeader.ShaderEntries.Empty(SerializedShaders.GetShaderEntries().Num());
for (const FShaderCodeEntry& ShaderEntry : SerializedShaders.GetShaderEntries())
{
FIoStoreShaderCodeEntry& IoStoreShaderEntry = OutHeader.ShaderEntries.AddDefaulted_GetRef();
IoStoreShaderEntry.Frequency = ShaderEntry.Frequency;
}
// Higher level description of the group splitting algo that follows:
// We compress together shaders that are loaded together (all other strategies, like grouping similar shaders, were found to compress better but not reduce RAM usage).
// For that, we find for each shader which shadermaps are referencing it (often times it will be just one, but for some simple and shared shaders it can be thousands).
// We group the shaders by those sets of shadermaps - all shaders referenced by the same shadermap(s) are a candidate for being a single group. Then we potentially split this candidate group
// into raytracing and non-raytracing groups (so we can avoid preloading RTX shaders run-time if RTX is disabled), and then each of those is potentially split further by size
// (to avoid too large groups that will take too much time to decompress - this is regulated by r.ShaderCodeLibrary.MaxShaderGroupSize). The results of that process (note, it can still be
// a single group) is added to the header.
// Each group's indices, like in case of shadermaps, are stored in ShaderIndices array. Before we append a new group's indices however, we look if we can find an existing range that we can reuse.
const bool bSeparateRaytracingShaders = (Format == FName("PCD3D_SM5"));
TArray<TPair<uint32, TArray<int32>>> ShaderToShadermapsArray;
{
ShaderToShadermapsArray.AddDefaulted(OutHeader.ShaderEntries.Num());
{
FCriticalSection ShaderLocks[1024];
// for each shader, find all the shadermaps it belongs to
ParallelFor(OutHeader.ShaderMapEntries.Num(),
[&ShaderToShadermapsArray, &OutHeader, &ShaderLocks](int32 ShaderMapIndex)
{
FIoStoreShaderMapEntry& ShaderMapEntry = OutHeader.ShaderMapEntries[ShaderMapIndex];
for (int32 ShaderIdxIdx = ShaderMapEntry.ShaderIndicesOffset, StopBeforeIdxIdx = ShaderMapEntry.ShaderIndicesOffset + ShaderMapEntry.NumShaders; ShaderIdxIdx < StopBeforeIdxIdx; ++ShaderIdxIdx)
{
int32 ShaderIndex = OutHeader.ShaderIndices[ShaderIdxIdx];
// add this shadermap as a dependency.
int32 ShaderLockNumber = ShaderIndex % UE_ARRAY_COUNT(ShaderLocks);
FScopeLock Locker(&ShaderLocks[ShaderLockNumber]);
ShaderToShadermapsArray[ShaderIndex].Value.Add(ShaderMapIndex);
}
}
);
}
// sort shadermaps entries in shaders
{
const int32 kShaderSortedPerThread = 1024;
const int32 NumThreads = (ShaderToShadermapsArray.Num() / kShaderSortedPerThread) + 1;
ParallelFor(NumThreads,
[&ShaderToShadermapsArray, kShaderSortedPerThread](int ThreadIndex)
{
int32 StartingShader = ThreadIndex * kShaderSortedPerThread;
int32 EndingShader = FMath::Min(StartingShader + kShaderSortedPerThread, ShaderToShadermapsArray.Num());
for (int32 Idx = StartingShader; Idx < EndingShader; ++Idx)
{
ShaderToShadermapsArray[Idx].Value.Sort();
}
},
EParallelForFlags::Unbalanced
);
}
// now assigning the indices in the array so we can sort it
for (int32 Idx = 0, Num = ShaderToShadermapsArray.Num(); Idx < Num; ++Idx)
{
// check that no shader is unreferenced
checkf(!ShaderToShadermapsArray[Idx].Value.IsEmpty(), TEXT("Error converting to IoStore archive: shader (index=%d) is not referenced by any of the shadermaps!"), Idx);
ShaderToShadermapsArray[Idx].Key = Idx;
}
}
// sort the mapping so the first are shaders that are referenced by a smaller number of shadermaps, then by index for determinism
Algo::Sort(ShaderToShadermapsArray,
[](const TPair<uint32, TArray<int32>>& EntryA, const TPair<uint32, TArray<int32>>& EntryB)
{
const TArray<int32>& A = EntryA.Value;
const TArray<int32>& B = EntryB.Value;
// if the number of shadermaps is the same, we need to sort "alphabetically"
if (A.Num() == B.Num())
{
for (int32 Idx = 0, Num = A.Num(); Idx < Num; ++Idx)
{
if (A[Idx] != B[Idx])
{
return A[Idx] < B[Idx];
}
}
return EntryA.Key < EntryB.Key;
}
return A.Num() < B.Num();
}
);
// get the effective maximum uncompressed group size
uint32 MaxUncompressedShaderGroupSize = FMath::Max(static_cast<uint32>(GShaderCodeLibraryMaxShaderGroupSize), 1U); // cannot be lower than 1
// for statistics
uint64 Stats_TotalUncompressedMemory = 0;
uint32 Stats_MinGroupSize = MAX_uint32;
uint32 Stats_MaxGroupSize = 0;
// We want to avoid adding group indices to ShaderIndices, however looking them up sequentially is to slow. Store them here for a future ParallelFor lookup.
TArray<TArray<uint32>> StoredGroupShaderIndices;
/** Third and last stage of processing the shader group. We actually add it here, and do the book-keeping. */
auto ProcessShaderGroup_AddNewGroup = [&Format, &OutHeader, &SerializedShaders, &StoredGroupShaderIndices, &Stats_TotalUncompressedMemory, &Stats_MinGroupSize, &Stats_MaxGroupSize](TArray<uint32>& ShaderIndicesInGroup)
{
// first, sort the shaders by uncompressed size, as this was found to compress better
ShaderIndicesInGroup.Sort(
[&SerializedShaders](const int32 ShaderIndexA, const int32 ShaderIndexB)
{
const FShaderCodeEntry& ShaderEntryA = SerializedShaders.GetShaderEntries()[ShaderIndexA];
const FShaderCodeEntry& ShaderEntryB = SerializedShaders.GetShaderEntries()[ShaderIndexB];
if (ShaderEntryA.UncompressedSize != ShaderEntryB.UncompressedSize)
{
return ShaderEntryA.UncompressedSize < ShaderEntryB.UncompressedSize;
}
if (ShaderEntryA.Size != ShaderEntryB.Size)
{
return ShaderEntryA.Size < ShaderEntryB.Size;
}
if (ShaderEntryA.Frequency != ShaderEntryB.Frequency)
{
return ShaderEntryA.Frequency < ShaderEntryB.Frequency;
}
return ShaderEntryA.Offset < ShaderEntryB.Offset;
}
);
// add a new group entry
const int32 CurrentGroupIdx = OutHeader.ShaderGroupEntries.Num();
FIoStoreShaderGroupEntry& GroupEntry = OutHeader.ShaderGroupEntries.AddDefaulted_GetRef();
StoredGroupShaderIndices.Add(ShaderIndicesInGroup);
GroupEntry.NumShaders = ShaderIndicesInGroup.Num();
// ShaderIndicesOffset will be filled later, once we know all the groups (see comment about StoredGroupShaderIndices above).
const TArrayView<const FSHAHash> ShaderHashes = SerializedShaders.GetShaderHashes();
const TArrayView<const FShaderCodeEntry> ShaderEntries = SerializedShaders.GetShaderEntries();
// update shader entries both with the group number and their uncompressed offset in the group
FSHA1 GroupHasher;
uint32 CurrentGroupSize = 0;
for (int32 ShaderIdxIdx = 0, NumIdxIdx = ShaderIndicesInGroup.Num(); ShaderIdxIdx < NumIdxIdx; ++ShaderIdxIdx)
{
int32 ShaderIndex = ShaderIndicesInGroup[ShaderIdxIdx];
FIoStoreShaderCodeEntry& IoStoreShaderEntry = OutHeader.ShaderEntries[ShaderIndex];
IoStoreShaderEntry.ShaderGroupIndex = CurrentGroupIdx;
IoStoreShaderEntry.UncompressedOffsetInGroup = CurrentGroupSize;
// group hash is constructed from hashing the shaders in the group.
GroupHasher.Update(ShaderHashes[ShaderIndex].Hash, sizeof(FSHAHash));
// shader hash as of now excludes optional data, so we cannot rely on it, especially across the shader formats. Make the group hash a bit more robust by including the shader size in it.
GroupHasher.Update(reinterpret_cast<const uint8*>(&ShaderEntries[ShaderIndex].UncompressedSize), sizeof(FShaderCodeEntry::UncompressedSize));
CurrentGroupSize += ShaderEntries[ShaderIndex].UncompressedSize;
}
// Shader hashes cannot be used to uniquely identify across shader formats due to aforementioned exclusion of optional data from it.
// Include the shader format (in a platform-agnostic way) into the group hash to lower the risk of collision of shaders of different formats.
const TStringConversion<TStringConvert<TCHAR, UTF8CHAR>> Utf8Name(Format.ToString());
GroupHasher.Update(reinterpret_cast<const uint8*>(Utf8Name.Get()), Utf8Name.Length());
static_assert(sizeof(uint8) == sizeof(UTF8CHAR), "Unexpected UTF-8 char size.");
GroupEntry.UncompressedSize = CurrentGroupSize;
OutHeader.ShaderGroupIoHashes.Add(FIoStoreShaderCodeArchive::GetShaderCodeChunkId(GroupHasher.Finalize()));
Stats_TotalUncompressedMemory += CurrentGroupSize;
Stats_MinGroupSize = FMath::Min(Stats_MinGroupSize, CurrentGroupSize);
Stats_MaxGroupSize = FMath::Max(Stats_MaxGroupSize, CurrentGroupSize);
};
/** Second stage of processing shader group. Here we potentially split the group into smaller ones (as equally as possible), striving to meet limit imposed by r.ShaderCodeLibrary.MaxShaderGroupSize */
auto ProcessShaderGroup_SplitBySize = [&OutHeader, &SerializedShaders, &ProcessShaderGroup_AddNewGroup, &MaxUncompressedShaderGroupSize](TArray<uint32>& CurrentShaderGroup)
{
// calculate current group size
uint32 GroupSize = 0;
const TArrayView<const FShaderCodeEntry> ShaderEntries = SerializedShaders.GetShaderEntries();
for (uint32 ShaderIdx : CurrentShaderGroup)
{
GroupSize += ShaderEntries[ShaderIdx].UncompressedSize;
}
if (LIKELY(GroupSize <= MaxUncompressedShaderGroupSize || CurrentShaderGroup.Num() == 1))
{
ProcessShaderGroup_AddNewGroup(CurrentShaderGroup);
}
else
{
// split the shaders evenly into N new groups (don't allow more new groups than there are shaders)
int32 NumNewGroups = FMath::Min(static_cast<int32>(GroupSize / MaxUncompressedShaderGroupSize + 1), CurrentShaderGroup.Num());
checkf(NumNewGroups > 1, TEXT("Off by one error in group count calculation? NumNewGroups=%d, GroupSize=%u, MaxUncompressedShaderGroupSize=%u, CurrentShaderGroup.Num()=%d"), NumNewGroups, GroupSize, MaxUncompressedShaderGroupSize, CurrentShaderGroup.Num());
TArray<TArray<uint32>> NewGroups;
TArray<uint32> NewGroupSizes;
NewGroups.AddDefaulted(NumNewGroups);
NewGroupSizes.AddZeroed(NumNewGroups);
// sort the shaders descending, as this is easier to split (greedy algorithm)
CurrentShaderGroup.Sort(
[&SerializedShaders](const int32 ShaderIndexA, const int32 ShaderIndexB)
{
const TArrayView<const FShaderCodeEntry> ShaderEntries = SerializedShaders.GetShaderEntries();
const FShaderCodeEntry& ShaderEntryA = ShaderEntries[ShaderIndexA];
const FShaderCodeEntry& ShaderEntryB = ShaderEntries[ShaderIndexB];
if (ShaderEntryA.UncompressedSize != ShaderEntryB.UncompressedSize)
{
return ShaderEntryA.UncompressedSize > ShaderEntryB.UncompressedSize;
}
if (ShaderEntryA.Size != ShaderEntryB.Size)
{
return ShaderEntryA.Size > ShaderEntryB.Size;
}
if (ShaderEntryA.Frequency != ShaderEntryB.Frequency)
{
return ShaderEntryA.Frequency > ShaderEntryB.Frequency;
}
return ShaderEntryA.Offset > ShaderEntryB.Offset;
}
);
for (int32 ShaderIdx : CurrentShaderGroup)
{
// add the shader to the group of smallest size
int32 SmallestNewGroupIdx = 0;
for (int32 IdxNewGroup = 1; IdxNewGroup < NumNewGroups; ++IdxNewGroup)
{
if (NewGroupSizes[IdxNewGroup] < NewGroupSizes[SmallestNewGroupIdx])
{
SmallestNewGroupIdx = IdxNewGroup;
}
}
NewGroups[SmallestNewGroupIdx].Add(ShaderIdx);
NewGroupSizes[SmallestNewGroupIdx] += ShaderEntries[ShaderIdx].UncompressedSize;
}
#if DO_CHECK // sanity checks
uint32 NewGroupTotalSize = 0;
for (uint32 NewGroupSize : NewGroupSizes)
{
NewGroupTotalSize += NewGroupSize;
}
checkf(NewGroupTotalSize == GroupSize, TEXT("Original shader group was of size %u bytes, which was larger than limit %u, but it was split into %d group of total size %u, which is not %u - sizes must agree"),
GroupSize,
MaxUncompressedShaderGroupSize,
NumNewGroups,
NewGroupTotalSize, GroupSize
);
#endif
for (TArray<uint32>& NewGroup : NewGroups)
{
// note there can be empty groups (take a very edge case of MaxUncompressedShaderGroupSize = 2 bytes and a shader group of 1 shader)
if (!NewGroup.IsEmpty())
{
ProcessShaderGroup_AddNewGroup(NewGroup);
}
}
}
};
/** First stage of processing a streak of shaders all referenced by the same set of shadermaps. We begin with separating raytracing and non-raytracing shaders, so we can avoid preloading RTX in non-RT runs. */
auto ProcessShaderGroup_SplitRaytracing = [&bSeparateRaytracingShaders, &OutHeader, &ProcessShaderGroup_SplitBySize](TArray<uint32>& CurrentShaderGroup)
{
if (!bSeparateRaytracingShaders)
{
ProcessShaderGroup_SplitBySize(CurrentShaderGroup);
}
else
{
// The streak changed. Create the group, but first, determine if the group needs to be split in two because of the raytracing shaders.
// We want to isolate them into separate groups so their preload can be skipped if raytracing is off.
TArray<uint32> RaytracingShaders;
TArray<uint32> NonraytracingShaders;
for (int32 ShaderIndex : CurrentShaderGroup)
{
if (LIKELY(!IsRayTracingShaderFrequency(static_cast<EShaderFrequency>(OutHeader.ShaderEntries[ShaderIndex].Frequency))))
{
NonraytracingShaders.Add(ShaderIndex);
}
else
{
RaytracingShaders.Add(ShaderIndex);
}
}
check(CurrentShaderGroup.Num() == NonraytracingShaders.Num() + RaytracingShaders.Num());
if (LIKELY(!NonraytracingShaders.IsEmpty()))
{
ProcessShaderGroup_SplitBySize(NonraytracingShaders);
}
if (UNLIKELY(!RaytracingShaders.IsEmpty()))
{
ProcessShaderGroup_SplitBySize(RaytracingShaders);
}
}
};
// now split this into streaks of shaders that are referenced by the same set of shadermaps and compress
TArray<uint32> CurrentShaderGroup;
TArray<int32> LastShadermapSetSeen;
for (TPair<uint32, TArray<int32>>& Iter : ShaderToShadermapsArray)
{
// if we have have just started the group, we don't check the last seen
if (UNLIKELY(CurrentShaderGroup.IsEmpty()))
{
CurrentShaderGroup.Add(Iter.Key);
LastShadermapSetSeen = Iter.Value;
}
else if (UNLIKELY(LastShadermapSetSeen != Iter.Value))
{
ProcessShaderGroup_SplitRaytracing(CurrentShaderGroup);
// reset the collection, but don't forget to add to it the current element
CurrentShaderGroup.SetNum(1);
CurrentShaderGroup[0] = Iter.Key;
LastShadermapSetSeen = Iter.Value;
}
else
{
// keep adding to the same group
CurrentShaderGroup.Add(Iter.Key);
}
}
// add the last group
if (!CurrentShaderGroup.IsEmpty())
{
ProcessShaderGroup_SplitRaytracing(CurrentShaderGroup);
}
/** Tries to find whether NewIndices exist as a subsequence in ExistingIndices.Returns - 1 if not found. */
auto FindSequenceInArray = [](const TArrayView<const uint32> ExistingIndices, const TArrayView<const uint32> NewIndices) -> int32
{
check(NewIndices.Num() > 0);
uint32 FirstNew = NewIndices[0];
int32 NumNew = NewIndices.Num();
for (int32 IdxExisting = 0, NumExisting = ExistingIndices.Num(); IdxExisting < NumExisting - NumNew + 1; ++IdxExisting)
{
if (LIKELY(ExistingIndices[IdxExisting] != FirstNew))
{
continue;
}
// check the rest
bool bFoundSequence = true;
for (int32 IdxNew = 1; IdxNew < NumNew; ++IdxNew)
{
checkSlow(IdxExisting + IdxNew < NumExisting);
if (LIKELY(ExistingIndices[IdxExisting + IdxNew] != NewIndices[IdxNew]))
{
bFoundSequence = false;
break;
}
}
if (UNLIKELY(bFoundSequence))
{
return IdxExisting;
}
}
return INDEX_NONE;
};
// now, try to see if we can look up group's indices in the existing ShaderIndicesArray to avoid storing them there.
checkf(StoredGroupShaderIndices.Num() == OutHeader.ShaderGroupEntries.Num(), TEXT("We should have stored shader indices for all groups."));
ParallelFor(StoredGroupShaderIndices.Num(),
[&OutHeader, &SerializedShaders, &StoredGroupShaderIndices, &FindSequenceInArray](int ShaderGroupIndex)
{
const TArray<uint32>& ShaderIndicesInGroup = StoredGroupShaderIndices[ShaderGroupIndex];
FIoStoreShaderGroupEntry& GroupEntry = OutHeader.ShaderGroupEntries[ShaderGroupIndex];
// See if we can find indices in that order somewhere in the ShaderIndices array already, to avoid adding new indices.
// We are looking in the read-only original array, because there's no sense to look in OutHeader.ShaderIndices - groups don't overlap,
// so we know that newly added (by some previous group) indices aren't useful for us.
int32 ExistingOffset = FindSequenceInArray(SerializedShaders.GetShaderIndices(), ShaderIndicesInGroup);
if (ExistingOffset != INDEX_NONE)
{
GroupEntry.ShaderIndicesOffset = static_cast<uint32>(ExistingOffset);
}
else
{
GroupEntry.ShaderIndicesOffset = MAX_uint32;
}
}
);
// Now append all the groups that weren't found to the end of ShaderIndices, slow (we could have done that in above ParallelFor, with a lock), but good for determinism
int32 Stats_GroupsThatAppendedToShaderIndices = 0;
for (int32 ShaderGroupIndex = 0, NumGroups = OutHeader.ShaderGroupEntries.Num(); ShaderGroupIndex < NumGroups; ++ShaderGroupIndex)
{
FIoStoreShaderGroupEntry& GroupEntry = OutHeader.ShaderGroupEntries[ShaderGroupIndex];
if (GroupEntry.ShaderIndicesOffset == MAX_uint32)
{
const TArray<uint32>& ShaderIndicesInGroup = StoredGroupShaderIndices[ShaderGroupIndex];
GroupEntry.ShaderIndicesOffset = OutHeader.ShaderIndices.Num();
OutHeader.ShaderIndices.Append(ShaderIndicesInGroup);
++Stats_GroupsThatAppendedToShaderIndices;
}
}
checkf(OutHeader.ShaderEntries.Num() == SerializedShaders.GetShaderEntries().Num(), TEXT("Error creating IoStoreShaderArchive header - shader entries differ (%d in IoStore, %d original). Bug in grouping logic?"),
OutHeader.ShaderEntries.Num(), SerializedShaders.GetShaderEntries().Num());
checkf(OutHeader.ShaderGroupIoHashes.Num() == OutHeader.ShaderGroupEntries.Num(), TEXT("Error creating IoStoreShaderArchive header - mismatch between shader group hashes and descriptors (%d descriptors, %d hashes). Bug in grouping logic?"),
OutHeader.ShaderGroupEntries.Num(), OutHeader.ShaderGroupIoHashes.Num());
checkf(OutHeader.ShaderGroupEntries.Num() != 0, TEXT("At least one group must have been created"));
UE_LOG(LogShaderLibrary, Display, TEXT("Created IoStoreShaderArchive header: shaders grouped in %d groups (%d of them didn't need new indices), average uncompressed size %llu bytes, min %u bytes, max %u bytes (r.ShaderCodeLibrary.MaxShaderGroupSize=%u)"),
OutHeader.ShaderGroupEntries.Num(), OutHeader.ShaderGroupEntries.Num() - Stats_GroupsThatAppendedToShaderIndices, Stats_TotalUncompressedMemory / static_cast<uint64>(OutHeader.ShaderGroupEntries.Num()), Stats_MinGroupSize, Stats_MaxGroupSize, MaxUncompressedShaderGroupSize);
}
FArchive& operator <<(FArchive& Ar, FIoStoreShaderCodeArchiveHeader& Ref)
{
Ar << Ref.ShaderMapHashes;
Ar << Ref.ShaderHashes;
Ar << Ref.ShaderGroupIoHashes;
Ar << Ref.ShaderMapEntries;
Ar << Ref.ShaderEntries;
Ar << Ref.ShaderGroupEntries;
Ar << Ref.ShaderIndices;
return Ar;
}
void FIoStoreShaderCodeArchive::SaveIoStoreShaderCodeArchive(const FIoStoreShaderCodeArchiveHeader& Header, FArchive& OutLibraryAr)
{
uint32 Version = CurrentVersion;
OutLibraryAr << Version;
OutLibraryAr << const_cast<FIoStoreShaderCodeArchiveHeader &>(Header);
}
FIoStoreShaderCodeArchive* FIoStoreShaderCodeArchive::Create(EShaderPlatform InPlatform, const FString& InLibraryName, FIoDispatcher& InIoDispatcher)
{
const FName PlatformName = FName(FDataDrivenShaderPlatformInfo::GetShaderFormat(InPlatform).ToString() + TEXT("-") + FDataDrivenShaderPlatformInfo::GetName(InPlatform).ToString());
FIoChunkId ChunkId = GetShaderCodeArchiveChunkId(InLibraryName, PlatformName);
if (InIoDispatcher.DoesChunkExist(ChunkId))
{
FIoBatch IoBatch = InIoDispatcher.NewBatch();
FIoRequest IoRequest = IoBatch.Read(ChunkId, FIoReadOptions(), IoDispatcherPriority_Max);
FEvent* Event = FPlatformProcess::GetSynchEventFromPool();
IoBatch.IssueAndTriggerEvent(Event);
Event->Wait();
FPlatformProcess::ReturnSynchEventToPool(Event);
const FIoBuffer& IoBuffer = IoRequest.GetResultOrDie();
FMemoryReaderView Ar(MakeArrayView(IoBuffer.Data(), IoBuffer.DataSize()));
uint32 Version = 0;
Ar << Version;
if (Version == CurrentVersion)
{
FIoStoreShaderCodeArchive* Library = new FIoStoreShaderCodeArchive(InPlatform, InLibraryName, InIoDispatcher);
Ar << Library->Header;
{
const uint32 HashSize = FMath::Min<uint32>(0x10000, 1u << FMath::CeilLogTwo(Library->Header.ShaderMapHashes.Num()));
Library->ShaderMapHashTable.Clear(HashSize, Library->Header.ShaderMapHashes.Num());
for (int32 Index = 0, Num = Library->Header.ShaderMapHashes.Num(); Index < Num; ++Index)
{
const uint32 Key = GetTypeHash(Library->Header.ShaderMapHashes[Index]);
Library->ShaderMapHashTable.Add(Key, Index);
}
}
{
const uint32 HashSize = FMath::Min<uint32>(0x10000, 1u << FMath::CeilLogTwo(Library->Header.ShaderHashes.Num()));
Library->ShaderHashTable.Clear(HashSize, Library->Header.ShaderHashes.Num());
for (int32 Index = 0, Num = Library->Header.ShaderHashes.Num(); Index < Num; ++Index)
{
const uint32 Key = GetTypeHash(Library->Header.ShaderHashes[Index]);
Library->ShaderHashTable.Add(Key, Index);
}
}
Library->DebugVisualizer.Initialize(Library->Header.ShaderEntries.Num());
UE_LOG(LogShaderLibrary, Display, TEXT("Using IoDispatcher for shader code library %s. Total %d unique shaders."), *InLibraryName, Library->Header.ShaderEntries.Num());
INC_DWORD_STAT_BY(STAT_Shaders_ShaderResourceMemory, Library->GetSizeBytes());
return Library;
}
}
return nullptr;
}
FIoStoreShaderCodeArchive::FIoStoreShaderCodeArchive(EShaderPlatform InPlatform, const FString& InLibraryName, FIoDispatcher& InIoDispatcher)
: FRHIShaderLibrary(InPlatform, InLibraryName)
, IoDispatcher(InIoDispatcher)
{
}
FIoStoreShaderCodeArchive::~FIoStoreShaderCodeArchive()
{
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderResourceMemory, GetSizeBytes());
Teardown();
}
void FIoStoreShaderCodeArchive::Teardown()
{
DebugVisualizer.SaveShaderUsageBitmap(GetName(), GetPlatform());
uint32 DeletedPreloadEntryBytes = 0;
FWriteScopeLock Lock(PreloadedShaderGroupsLock);
for (TMap<int32, FShaderGroupPreloadEntry*>::TIterator Iter(PreloadedShaderGroups); Iter; ++Iter)
{
FShaderGroupPreloadEntry* PreloadEntry = Iter.Value();
#if UE_SCA_DEBUG_PRELOADING
checkf(PreloadEntry->NumRefs == 0, TEXT("Group %d has still %d references on deletion. Group extended debug info: \n%s"), Iter.Key(), PreloadEntry->NumRefs,
*PreloadEntry->DebugInfo);
#else
checkf(PreloadEntry->NumRefs == 0, TEXT("Group %d has still %d references on deletion. Group extended debug info: \n%s"), Iter.Key(), PreloadEntry->NumRefs,
TEXT("Not compiled in (set UE_SCA_DEBUG_PRELOADING to 1 in ShaderCodeArchive.h and recompile the game binary)"));
#endif
const FIoStoreShaderGroupEntry& GroupEntry = Header.ShaderGroupEntries[Iter.Key()];
DeletedPreloadEntryBytes += (GroupEntry.CompressedSize + sizeof(FShaderGroupPreloadEntry));
delete PreloadEntry;
}
PreloadedShaderGroups.Empty();
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, DeletedPreloadEntryBytes);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Sub((float)DeletedPreloadEntryBytes / (1024.0f * 1024.0f));
#endif
}
void FIoStoreShaderCodeArchive::SetupPreloadEntryForLoading(int32 ShaderGroupIndex, FShaderGroupPreloadEntry& PreloadEntry)
{
PreloadEntry.FramePreloadStarted = GFrameNumber;
check(!PreloadEntry.PreloadEvent);
PreloadEntry.PreloadEvent = FGraphEvent::CreateGraphEvent();
#if UE_SCA_VISUALIZE_SHADER_USAGE
const FIoStoreShaderGroupEntry& GroupEntry = Header.ShaderGroupEntries[ShaderGroupIndex];
for (uint32 ShaderIdxIdx = GroupEntry.ShaderIndicesOffset, StopBeforeIdxIdx = GroupEntry.ShaderIndicesOffset + GroupEntry.NumShaders; ShaderIdxIdx < StopBeforeIdxIdx; ++ShaderIdxIdx)
{
DebugVisualizer.MarkPreloadedForVisualization(Header.ShaderIndices[ShaderIdxIdx]);
}
#endif // UE_SCA_VISUALIZE_SHADER_USAGE
}
bool FIoStoreShaderCodeArchive::PreloadShaderGroup(int32 ShaderGroupIndex, FGraphEventArray& OutCompletionEvents,
#if UE_SCA_DEBUG_PRELOADING
const FString& CallsiteInfo,
#endif
FCoreDelegates::FAttachShaderReadRequestFunc* AttachShaderReadRequestFuncPtr)
{
// should be called within LLMTag::Shaders scope
FWriteScopeLock Lock(PreloadedShaderGroupsLock);
FShaderGroupPreloadEntry& PreloadEntry = *FindOrAddPreloadEntry(ShaderGroupIndex);
checkf(!PreloadEntry.bNeverToBePreloaded, TEXT("We are preloading a shader group (index=%d) that shouldn't be preloaded in this run (e.g. raytracing shaders on D3D11)."), ShaderGroupIndex);
const uint32 NumRefs = PreloadEntry.NumRefs++;
#if UE_SCA_DEBUG_PRELOADING
FString AppendInfo = FString::Printf(TEXT("PreloadShaderGroup: NumRefs %d -> %d CallsiteInfo: %s\n"),
NumRefs, PreloadEntry.NumRefs, *CallsiteInfo
);
PreloadEntry.DebugInfo.Append(AppendInfo);
#endif
if (PreloadEntry.IoRequest.Status() == FIoStatus::Invalid)
{
SetupPreloadEntryForLoading(ShaderGroupIndex, PreloadEntry);
// only global shaders are going to hit this path, all other shaders will be preloaded with the package
if (UNLIKELY(AttachShaderReadRequestFuncPtr == nullptr))
{
FIoBatch IoBatch = IoDispatcher.NewBatch();
PreloadEntry.IoRequest = IoBatch.Read(Header.ShaderGroupIoHashes[ShaderGroupIndex], FIoReadOptions(), GetShaderCodeArchivePriority());
IoBatch.IssueAndDispatchSubsequents(PreloadEntry.PreloadEvent);
}
else
{
PreloadEntry.IoRequest = (*AttachShaderReadRequestFuncPtr)(Header.ShaderGroupIoHashes[ShaderGroupIndex], PreloadEntry.PreloadEvent);
}
uint32 ShaderGroupSize = Header.ShaderGroupEntries[ShaderGroupIndex].CompressedSize + sizeof(FShaderGroupPreloadEntry);
INC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderGroupSize);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Add((float)ShaderGroupSize / (1024.0f * 1024.0f));
#endif
}
if (AttachShaderReadRequestFuncPtr == nullptr && PreloadEntry.PreloadEvent && !PreloadEntry.PreloadEvent->IsComplete())
{
OutCompletionEvents.Add(PreloadEntry.PreloadEvent);
}
return true;
}
void FIoStoreShaderCodeArchive::MarkPreloadEntrySkipped(int32 ShaderGroupIndex
#if UE_SCA_DEBUG_PRELOADING
, const FString& CallsiteInfo
#endif
)
{
// should be called within LLMTag::Shaders scope
FWriteScopeLock Lock(PreloadedShaderGroupsLock);
FShaderGroupPreloadEntry& PreloadEntry = *FindOrAddPreloadEntry(ShaderGroupIndex);
const uint32 NumRefs = PreloadEntry.NumRefs++;
#if UE_SCA_DEBUG_PRELOADING
FString AppendInfo = FString::Printf(TEXT("MarkPreloadEntrySkipped: NumRefs %d -> %d CallsiteInfo: %s\n"),
NumRefs, PreloadEntry.NumRefs, *CallsiteInfo
);
PreloadEntry.DebugInfo.Append(AppendInfo);
#endif
if (NumRefs == 0u)
{
PreloadEntry.bNeverToBePreloaded = 1;
uint32 ShaderGroupSize = sizeof(FShaderGroupPreloadEntry);
INC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderGroupSize);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Add((float)ShaderGroupSize / (1024.0f * 1024.0f));
#endif
}
}
void FIoStoreShaderCodeArchive::AddRefPreloadedShaderGroup(int32 ShaderGroupIndex)
{
FWriteScopeLock Lock(PreloadedShaderGroupsLock);
FShaderGroupPreloadEntry& PreloadEntry = *FindOrAddPreloadEntry(ShaderGroupIndex);
PreloadEntry.NumRefs++;
}
void FIoStoreShaderCodeArchive::ReleasePreloadedShaderGroup(int32 ShaderGroupIndex)
{
ReleasePreloadEntry(ShaderGroupIndex);
}
bool FIoStoreShaderCodeArchive::IsPreloading(int32 ShaderIndex, FGraphEventArray& OutCompletionEvents)
{
LLM_SCOPE(ELLMTag::Shaders);
int32 ShaderGroupIndex = GetGroupIndexForShader(ShaderIndex);
FReadScopeLock Lock(PreloadedShaderGroupsLock);
FShaderGroupPreloadEntry** EntryPtrPtr = PreloadedShaderGroups.Find(ShaderGroupIndex);
if (EntryPtrPtr)
{
FShaderGroupPreloadEntry& Entry = **EntryPtrPtr;
if (Entry.PreloadEvent && !Entry.PreloadEvent->IsComplete())
{
OutCompletionEvents.Add(Entry.PreloadEvent);
return true;
}
}
return false;
}
bool FIoStoreShaderCodeArchive::PreloadShader(int32 ShaderIndex, FGraphEventArray& OutCompletionEvents)
{
LLM_SCOPE(ELLMTag::Shaders);
DebugVisualizer.MarkExplicitlyPreloadedForVisualization(ShaderIndex);
return PreloadShaderGroup(GetGroupIndexForShader(ShaderIndex), OutCompletionEvents
#if UE_SCA_DEBUG_PRELOADING
, FString::Printf(TEXT("PreloadShader %d"), ShaderIndex)
#endif
);
}
bool FIoStoreShaderCodeArchive::GroupOnlyContainsRaytracingShaders(int32 ShaderGroupIndex)
{
const FIoStoreShaderGroupEntry& GroupEntry = Header.ShaderGroupEntries[ShaderGroupIndex];
for (uint32 ShaderIdxIdx = GroupEntry.ShaderIndicesOffset, StopIdxIdx = GroupEntry.ShaderIndicesOffset + GroupEntry.NumShaders; ShaderIdxIdx < StopIdxIdx; ++ShaderIdxIdx)
{
int32 ShaderIndex = Header.ShaderIndices[ShaderIdxIdx];
if (!IsRayTracingShaderFrequency(static_cast<EShaderFrequency>(Header.ShaderEntries[ShaderIndex].Frequency)))
{
return false;
}
}
return true;
}
bool FIoStoreShaderCodeArchive::PreloadShaderMap(int32 ShaderMapIndex, FGraphEventArray& OutCompletionEvents)
{
LLM_SCOPE(ELLMTag::Shaders);
const FIoStoreShaderMapEntry& ShaderMapEntry = Header.ShaderMapEntries[ShaderMapIndex];
#if UE_SCA_DEBUG_PRELOADING
FString Callsite = FString::Printf(TEXT("PreloadShaderMap %d"), ShaderMapIndex);
#endif
for (uint32 i = 0u; i < ShaderMapEntry.NumShaders; ++i)
{
const int32 ShaderIndex = Header.ShaderIndices[ShaderMapEntry.ShaderIndicesOffset + i];
const int32 ShaderGroupIndex = GetGroupIndexForShader(ShaderIndex);
#if RHI_RAYTRACING
if (!IsRayTracingAllowed() && !IsCreateShadersOnLoadEnabled() && GroupOnlyContainsRaytracingShaders(ShaderGroupIndex))
{
MarkPreloadEntrySkipped(ShaderGroupIndex
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
continue;
}
#endif
// only shaders we actually want to preload should be marked as such, not just everything in the group
DebugVisualizer.MarkExplicitlyPreloadedForVisualization(ShaderIndex);
PreloadShaderGroup(ShaderGroupIndex, OutCompletionEvents
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
}
return true;
}
bool FIoStoreShaderCodeArchive::PreloadShaderMap(int32 ShaderMapIndex, FCoreDelegates::FAttachShaderReadRequestFunc AttachShaderReadRequestFunc)
{
LLM_SCOPE(ELLMTag::Shaders);
const FIoStoreShaderMapEntry& ShaderMapEntry = Header.ShaderMapEntries[ShaderMapIndex];
FGraphEventArray Dummy;
#if UE_SCA_DEBUG_PRELOADING
FString Callsite = FString::Printf(TEXT("PreloadShaderMap(AttachShaderReadRequestFunc) %d"), ShaderMapIndex);
#endif
for (uint32 i = 0u; i < ShaderMapEntry.NumShaders; ++i)
{
const int32 ShaderIndex = Header.ShaderIndices[ShaderMapEntry.ShaderIndicesOffset + i];
const int32 ShaderGroupIndex = GetGroupIndexForShader(ShaderIndex);
#if RHI_RAYTRACING
if (!IsRayTracingAllowed() && !IsCreateShadersOnLoadEnabled() && GroupOnlyContainsRaytracingShaders(ShaderGroupIndex))
{
MarkPreloadEntrySkipped(ShaderGroupIndex
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
continue;
}
#endif
// only shaders we actually want to preload should be marked as such, not just everything in the group
DebugVisualizer.MarkExplicitlyPreloadedForVisualization(ShaderIndex);
PreloadShaderGroup(ShaderGroupIndex, Dummy,
#if UE_SCA_DEBUG_PRELOADING
Callsite,
#endif
&AttachShaderReadRequestFunc);
}
return true;
}
void FIoStoreShaderCodeArchive::ReleasePreloadEntry(int32 ShaderGroupIndex
#if UE_SCA_DEBUG_PRELOADING
, const FString& CallsiteInfo
#endif
)
{
FWriteScopeLock Lock(PreloadedShaderGroupsLock);
FShaderGroupPreloadEntry** ExistingEntry = PreloadedShaderGroups.Find(ShaderGroupIndex);
ensureMsgf(ExistingEntry, TEXT("Preload entry for shader group %d should exist if we're asked to release it"), ShaderGroupIndex);
if (ExistingEntry)
{
FShaderGroupPreloadEntry* PreloadEntry = *ExistingEntry;
const uint32 ShaderNumRefs = PreloadEntry->NumRefs--;
check(ShaderNumRefs > 0u);
#if UE_SCA_DEBUG_PRELOADING
FString AppendInfo = FString::Printf(TEXT("ReleasePreloadEntry: NumRefs %d -> %d CallsiteInfo: %s\n"),
ShaderNumRefs, PreloadEntry->NumRefs, *CallsiteInfo
);
PreloadEntry->DebugInfo.Append(AppendInfo);
#endif
if (ShaderNumRefs == 1u)
{
uint32 ShaderGroupLoadBytes = 0;
if (!PreloadEntry->bNeverToBePreloaded)
{
PreloadEntry->IoRequest.Cancel();
PreloadEntry->IoRequest = FIoRequest();
PreloadEntry->PreloadEvent.SafeRelease();
const FIoStoreShaderGroupEntry& GroupEntry = Header.ShaderGroupEntries[ShaderGroupIndex];
ShaderGroupLoadBytes = GroupEntry.CompressedSize + sizeof(FShaderGroupPreloadEntry);
}
else
{
ShaderGroupLoadBytes = sizeof(FShaderGroupPreloadEntry);
}
#if UE_SCA_DEBUG_PRELOADING
if (0) // use this if you need comparison with all other groups
{
UE_LOG(LogInit, Log, TEXT("Group %d has still %d references on deletion. Group extended debug info: \n%s"), ShaderGroupIndex, PreloadEntry->NumRefs,
*PreloadEntry->DebugInfo);
}
#endif
delete PreloadEntry;
PreloadedShaderGroups.Remove(ShaderGroupIndex);
DEC_DWORD_STAT_BY(STAT_Shaders_ShaderPreloadMemory, ShaderGroupLoadBytes);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<float>* CsvStatPreloadedShaderMB = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatFloat(TEXT("PreloadedShaderMB"), CSV_CATEGORY_INDEX(Shaders));
CsvStatPreloadedShaderMB->Sub((float)ShaderGroupLoadBytes / (1024.0f * 1024.0f));
#endif
}
}
}
void FIoStoreShaderCodeArchive::ReleasePreloadedShader(int32 ShaderIndex)
{
ReleasePreloadEntry(GetGroupIndexForShader(ShaderIndex)
#if UE_SCA_DEBUG_PRELOADING
, FString::Printf(TEXT("ReleasePreloadedShader %d"), ShaderIndex)
#endif
);
}
int32 FIoStoreShaderCodeArchive::FindShaderMapIndex(const FSHAHash& Hash)
{
const uint32 Key = GetTypeHash(Hash);
for (uint32 Index = ShaderMapHashTable.First(Key); ShaderMapHashTable.IsValid(Index); Index = ShaderMapHashTable.Next(Index))
{
if (Header.ShaderMapHashes[Index] == Hash)
{
return Index;
}
}
return INDEX_NONE;
}
int32 FIoStoreShaderCodeArchive::FindShaderIndex(const FSHAHash& Hash)
{
const uint32 Key = GetTypeHash(Hash);
for (uint32 Index = ShaderHashTable.First(Key); ShaderHashTable.IsValid(Index); Index = ShaderHashTable.Next(Index))
{
if (Header.ShaderHashes[Index] == Hash)
{
return Index;
}
}
return INDEX_NONE;
}
TRefCountPtr<FRHIShader> FIoStoreShaderCodeArchive::CreateShader(int32 ShaderIndex, bool bRequired)
{
LLM_SCOPE(ELLMTag::Shaders);
TRACE_CPUPROFILER_EVENT_SCOPE(FIoStoreShaderCodeArchive::CreateShader);
TRefCountPtr<FRHIShader> Shader;
const FIoStoreShaderCodeEntry& ShaderEntry = Header.ShaderEntries[ShaderIndex];
int32 GroupIndex = GetGroupIndexForShader(ShaderIndex);
// Preload shader group if it wasn't yet. This will also addref it so we can be sure it will exist.
FGraphEventArray Dummy;
#if UE_SCA_DEBUG_PRELOADING
FString Callsite = FString::Printf(TEXT("CreateShader %d"), ShaderIndex);
#endif
PreloadShaderGroup(GroupIndex, Dummy
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
FShaderGroupPreloadEntry* PreloadEntryPtr;
{
FReadScopeLock Lock(PreloadedShaderGroupsLock);
PreloadEntryPtr = FindExistingPreloadEntry(GroupIndex);
}
// raise the prio if still ongoing
if (!PreloadEntryPtr->IoRequest.Status().IsCompleted())
{
PreloadEntryPtr->IoRequest.UpdatePriority(IoDispatcherPriority_Max);
}
FGraphEventRef Event = PreloadEntryPtr->PreloadEvent;
bool bMissedPreLoaded = false;
const bool bNeededToWait = Event.IsValid() && !Event->IsComplete();
if (bNeededToWait)
{
if (!bRequired)
{
PreloadEntryPtr = nullptr;
ReleasePreloadEntry(GroupIndex
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
return Shader;
}
bMissedPreLoaded = true;
TRACE_CPUPROFILER_EVENT_SCOPE(BlockingShaderLoad);
const double TimeStarted = FPlatformTime::Seconds();
FTaskGraphInterface::Get().WaitUntilTaskCompletes(Event);
const double WaitDuration = FPlatformTime::Seconds() - TimeStarted;
// only complain if we spent more than 1ms waiting
if (WaitDuration > GShaderCodeLibraryMaxShaderPreloadWaitTime)
{
UE_LOG(LogShaderLibrary, Warning, TEXT("Spent %.2f ms in a blocking wait for shader preload, NumRefs: %d, FramePreloadStarted: %d, CurrentFrame: %d"), WaitDuration * 1000.0, PreloadEntryPtr->NumRefs, PreloadEntryPtr->FramePreloadStarted, GFrameNumber);
}
CSV_CUSTOM_STAT(Shaders, PreloadShaderMiss, 1, ECsvCustomStatOp::Accumulate);
CSV_CUSTOM_STAT(Shaders, PreloadShaderWaitTime, WaitDuration * 1000.0f, ECsvCustomStatOp::Accumulate);
}
const uint8* ShaderCode = PreloadEntryPtr->IoRequest.GetResultOrDie().Data();
FMemStackBase& MemStack = FMemStack::Get();
FMemMark Mark(MemStack);
FIoStoreShaderGroupEntry& GroupEntry = Header.ShaderGroupEntries[GroupIndex];
uint32 CompressedSize = PreloadEntryPtr->IoRequest.GetResultOrDie().DataSize();
if (GroupEntry.IsGroupCompressed())
{
uint8* UncompressedCode = reinterpret_cast<uint8*>(MemStack.Alloc(GroupEntry.UncompressedSize, 16));
DecompressShadergroupWithOodleAndExtraLogging(GroupIndex, Header.ShaderGroupIoHashes[GroupIndex], GroupEntry, ShaderIndex, ShaderEntry.ShaderGroupIndex, Header.ShaderHashes[ShaderIndex], UncompressedCode, GroupEntry.UncompressedSize, ShaderCode, CompressedSize);
ShaderCode = reinterpret_cast<uint8*>(UncompressedCode) + ShaderEntry.UncompressedOffsetInGroup;
#if UE_SCA_VISUALIZE_SHADER_USAGE
for (uint32 ShaderIdxIdx = GroupEntry.ShaderIndicesOffset, StopBeforeIdxIdx = GroupEntry.ShaderIndicesOffset + GroupEntry.NumShaders; ShaderIdxIdx < StopBeforeIdxIdx; ++ShaderIdxIdx)
{
DebugVisualizer.MarkDecompressedForVisualization(Header.ShaderIndices[ShaderIdxIdx]);
}
#endif // UE_SCA_VISUALIZE_SHADER_USAGE
}
else
{
ShaderCode += ShaderEntry.UncompressedOffsetInGroup;
}
const auto ShaderCodeView = MakeArrayView(ShaderCode, Header.GetShaderUncompressedSize(ShaderIndex));
const FSHAHash& ShaderHash = Header.ShaderHashes[ShaderIndex];
switch (ShaderEntry.Frequency)
{
case SF_Vertex: Shader = RHICreateVertexShader(ShaderCodeView, ShaderHash); break;
case SF_Mesh: Shader = RHICreateMeshShader(ShaderCodeView, ShaderHash); break;
case SF_Amplification: Shader = RHICreateAmplificationShader(ShaderCodeView, ShaderHash); break;
case SF_Pixel: Shader = RHICreatePixelShader(ShaderCodeView, ShaderHash); break;
case SF_Geometry: Shader = RHICreateGeometryShader(ShaderCodeView, ShaderHash); break;
case SF_Compute: Shader = RHICreateComputeShader(ShaderCodeView, ShaderHash); break;
case SF_WorkGraphRoot: Shader = RHICreateWorkGraphShader(ShaderCodeView, ShaderHash, SF_WorkGraphRoot); break;
case SF_WorkGraphComputeNode: Shader = RHICreateWorkGraphShader(ShaderCodeView, ShaderHash, SF_WorkGraphComputeNode); break;
case SF_RayGen: case SF_RayMiss: case SF_RayHitGroup: case SF_RayCallable:
#if RHI_RAYTRACING
if (GRHISupportsRayTracing && GRHISupportsRayTracingShaders)
{
Shader = RHICreateRayTracingShader(ShaderCodeView, ShaderHash, ShaderEntry.GetFrequency());
}
#endif // RHI_RAYTRACING
break;
default: checkNoEntry(); break;
}
DebugVisualizer.MarkCreatedForVisualization(ShaderIndex);
PreloadEntryPtr = nullptr;
ReleasePreloadEntry(GroupIndex
#if UE_SCA_DEBUG_PRELOADING
, Callsite
#endif
);
if (Shader)
{
INC_DWORD_STAT(STAT_Shaders_NumShadersCreated);
#if (CSV_PROFILER_STATS && !UE_BUILD_SHIPPING)
TCsvPersistentCustomStat<int>* CsvStatNumShadersCreated = FCsvProfiler::Get()->GetOrCreatePersistentCustomStatInt(TEXT("NumShadersCreated"), CSV_CATEGORY_INDEX(Shaders));
CsvStatNumShadersCreated->Add(1);
#endif
Shader->SetHash(ShaderHash);
}
return Shader;
}