// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "CoreMinimal.h" #include "DerivedDataBackendInterface.h" #include "ProfilingDebugging/CookStats.h" #include "DerivedDataCacheUsageStats.h" #include "Misc/SecureHash.h" namespace UE::DerivedData::Backends { /** * A backend wrapper that limits the key size and uses hashing...in this case it wraps the payload and the payload contains the full key to verify the integrity of the hash **/ class FDerivedDataLimitKeyLengthWrapper : public FDerivedDataBackendInterface { public: /** * Constructor * * @param InInnerBackend Backend to use for storage, my responsibilities are about key length */ FDerivedDataLimitKeyLengthWrapper(FDerivedDataBackendInterface* InInnerBackend, int32 InMaxKeyLength) : InnerBackend(InInnerBackend) , MaxKeyLength(InMaxKeyLength) { check(InnerBackend); } /** Return a type for this interface */ virtual FString GetDisplayName() const override { return FString(TEXT("LimitKeyLengthWrapper")); } /** Return a name for this interface */ virtual FString GetName() const override { return FString::Printf(TEXT("LimitKeyLengthWrapper (%s)"), *InnerBackend->GetName()); } /** return true if this cache is writable **/ virtual bool IsWritable() const override { return InnerBackend->IsWritable(); } /** This is a wrapepr type **/ virtual bool IsWrapper() const override { return true; } /** Returns a class of speed for this interface **/ virtual ESpeedClass GetSpeedClass() const override { return InnerBackend->GetSpeedClass(); } /** * Synchronous test for the existence of a cache item * * @param CacheKey Alphanumeric+underscore key of this cache item * @return true if the data probably will be found, this can't be guaranteed because of concurrency in the backends, corruption, etc */ virtual bool CachedDataProbablyExists(const TCHAR* CacheKey) override { COOK_STAT(auto Timer = UsageStats.TimeProbablyExists()); FString NewKey; ShortenKey(CacheKey, NewKey); bool Result = InnerBackend->CachedDataProbablyExists(*NewKey); if (Result) { COOK_STAT(Timer.AddHit(0)); } return Result; } /** * Synchronous test for the existence of multiple cache items * * @param CacheKeys Alphanumeric+underscore key of the cache items * @return A bit array with bits indicating whether the data for the corresponding key will probably be found */ virtual TBitArray<> CachedDataProbablyExistsBatch(TConstArrayView CacheKeys) override { COOK_STAT(auto Timer = UsageStats.TimeProbablyExists()); TArray NewKeys; NewKeys.Reserve(CacheKeys.Num()); for (const FString& CacheKey : CacheKeys) { ShortenKey(*CacheKey, NewKeys.Emplace_GetRef()); } TBitArray<> Result = InnerBackend->CachedDataProbablyExistsBatch(NewKeys); if (Result.CountSetBits() == CacheKeys.Num()) { COOK_STAT(Timer.AddHit(0)); } return Result; } /** * Attempts to make sure the cached data will be available as optimally as possible. This is left up to the implementation to do * @param CacheKey Alphanumeric+underscore key of this cache item * @return true if any steps were performed to optimize future retrieval */ virtual bool TryToPrefetch(TConstArrayView CacheKeys) override { COOK_STAT(auto Timer = UsageStats.TimePrefetch()); TArray NewKeys; NewKeys.Reserve(CacheKeys.Num()); for (const FString& CacheKey : CacheKeys) { ShortenKey(*CacheKey, NewKeys.Emplace_GetRef()); } bool Result = InnerBackend->TryToPrefetch(NewKeys); if (Result) { COOK_STAT(Timer.AddHit(0)); } return Result; } /* Determines if we have any interest in caching this data */ virtual bool WouldCache(const TCHAR* CacheKey, TArrayView InData) override { return InnerBackend->WouldCache(CacheKey, InData); } virtual bool ApplyDebugOptions(FBackendDebugOptions& InOptions) override { return InnerBackend->ApplyDebugOptions(InOptions); } /** * Synchronous retrieve of a cache item * * @param CacheKey Alphanumeric+underscore key of this cache item * @param OutData Buffer to receive the results, if any were found * @return true if any data was found, and in this case OutData is non-empty */ virtual bool GetCachedData(const TCHAR* CacheKey, TArray& OutData) override { COOK_STAT(auto Timer = UsageStats.TimeGet()); int64 InnerGetCycles = 0; FString NewKey; bool bOk; if (!ShortenKey(CacheKey, NewKey)) { // no shortening needed bOk = InnerBackend->GetCachedData(CacheKey, OutData); } else { bOk = InnerBackend->GetCachedData(*NewKey, OutData); if (bOk) { int32 KeyLen = FCString::Strlen(CacheKey) + 1; if (OutData.Num() < KeyLen) { UE_LOG(LogDerivedDataCache, Display, TEXT("FDerivedDataLimitKeyLengthWrapper: Short file or Hash Collision, ignoring and deleting %s."), CacheKey); bOk = false; } else { int32 Compare = FCStringAnsi::Strcmp(TCHAR_TO_ANSI(CacheKey), (char*)&OutData[OutData.Num() - KeyLen]); OutData.RemoveAt(OutData.Num() - KeyLen, KeyLen); if (Compare == 0) { UE_LOG(LogDerivedDataCache, VeryVerbose, TEXT("FDerivedDataLimitKeyLengthWrapper: cache hit, key match is ok %s"), CacheKey); } else { UE_LOG(LogDerivedDataCache, Warning, TEXT("FDerivedDataLimitKeyLengthWrapper: HASH COLLISION, ignoring and deleting %s."), CacheKey); bOk = false; } } if (!bOk) { // _we_ detected corruption, so _we_ will force a flush of the corrupted data InnerBackend->RemoveCachedData(*NewKey, /*bTransient=*/ false); } } } if (!bOk) { OutData.Empty(); } else { COOK_STAT(Timer.AddHit(OutData.Num())); } return bOk; } /** * Asynchronous, fire-and-forget placement of a cache item * * @param CacheKey Alphanumeric+underscore key of this cache item * @param InData Buffer containing the data to cache, can be destroyed after the call returns, immediately * @param bPutEvenIfExists If true, then do not attempt skip the put even if CachedDataProbablyExists returns true */ virtual EPutStatus PutCachedData(const TCHAR* CacheKey, TArrayView InData, bool bPutEvenIfExists) override { COOK_STAT(auto Timer = UsageStats.TimePut()); if (!InnerBackend->IsWritable()) { return EPutStatus::NotCached; // no point in continuing down the chain } COOK_STAT(Timer.AddHit(InData.Num())); FString NewKey; if (!ShortenKey(CacheKey, NewKey)) { return InnerBackend->PutCachedData(CacheKey, InData, bPutEvenIfExists); } TArray Data(InData.GetData(), InData.Num()); check(Data.Num()); int32 KeyLen = FCString::Strlen(CacheKey) + 1; checkf(int64(Data.Num()) + KeyLen <= INT32_MAX, TEXT("FDerivedDataLimitKeyLengthWrapper: shortening the key makes the data exceed 2 GiB for %s"), CacheKey); Data.AddUninitialized(KeyLen); FCStringAnsi::Strcpy((char*)&Data[Data.Num() - KeyLen], KeyLen, TCHAR_TO_ANSI(CacheKey)); check(Data.Last()==0); return InnerBackend->PutCachedData(*NewKey, Data, bPutEvenIfExists); } virtual void RemoveCachedData(const TCHAR* CacheKey, bool bTransient) override { if (!InnerBackend->IsWritable()) { return; // no point in continuing down the chain } FString NewKey; ShortenKey(CacheKey, NewKey); return InnerBackend->RemoveCachedData(*NewKey, bTransient); } virtual TSharedRef GatherUsageStats() const override { TSharedRef Usage = MakeShared(this, TEXT("LimitKeyLength")); Usage->Stats.Add(TEXT(""), UsageStats); if (InnerBackend) { Usage->Children.Add(InnerBackend->GatherUsageStats()); } return Usage; } virtual void Put( TConstArrayView Records, FStringView Context, ECachePolicy Policy, IRequestOwner& Owner, FOnCachePutComplete&& OnComplete) override { return InnerBackend->Put(Records, Context, Policy, Owner, MoveTemp(OnComplete)); } virtual void Get( TConstArrayView Keys, FStringView Context, FCacheRecordPolicy Policy, IRequestOwner& Owner, FOnCacheGetComplete&& OnComplete) override { return InnerBackend->Get(Keys, Context, Policy, Owner, MoveTemp(OnComplete)); } virtual void GetChunks( TConstArrayView Chunks, FStringView Context, IRequestOwner& Owner, FOnCacheGetChunkComplete&& OnComplete) override { return InnerBackend->GetChunks(Chunks, Context, Owner, MoveTemp(OnComplete)); } private: FDerivedDataCacheUsageStats UsageStats; /** Shorten the cache key and return true if shortening was required **/ bool ShortenKey(const TCHAR* CacheKey, FString& Result) { Result = FString(CacheKey); if (Result.Len() <= MaxKeyLength) { return false; } FSHA1 HashState; int32 Length = Result.Len(); HashState.Update((const uint8*)&Length, sizeof(int32)); auto ResultSrc = StringCast(*Result); // This is pretty redundant. Incorporating the CRC of the name into the hash // which also ends up computing SHA1 of the name is not really going to make // any meaningful difference to the strength of the key so it's just a waste // of CPU. However it's difficult to get rid of without invalidating the DDC // contents so here we are. const uint32 CRCofPayload(FCrc::MemCrc32(ResultSrc.Get(), Length * sizeof(UCS2CHAR))); HashState.Update((const uint8*)&CRCofPayload, sizeof(uint32)); HashState.Update((const uint8*)ResultSrc.Get(), Length * sizeof(UCS2CHAR)); HashState.Final(); uint8 Hash[FSHA1::DigestSize]; HashState.GetHash(Hash); const FString HashString = BytesToHex(Hash, FSHA1::DigestSize); int32 HashStringSize = HashString.Len(); int32 OriginalPart = MaxKeyLength - HashStringSize - 2; Result = Result.Left(OriginalPart) + TEXT("__") + HashString; check(Result.Len() == MaxKeyLength && Result.Len() > 0); UE_LOG(LogDerivedDataCache, VeryVerbose, TEXT("ShortenKey %s -> %s"), CacheKey, *Result); return true; } /** Backend to use for storage, my responsibilities are about key length **/ FDerivedDataBackendInterface* InnerBackend; /** Maximum size of the backend key length **/ int32 MaxKeyLength; }; } // UE::DerivedData::Backends