Files
UnrealEngineUWP/Engine/Source/Developer/DerivedDataCache/Private/DerivedDataLimitKeyLengthWrapper.h
Devin Doucette e42bbbedab DDC: Implement ICacheStore on FDerivedDataBackendInterface
This allows the new cache to be implemented more efficiently using the old cache backends because functionality like the corruption wrapper and key length limiter can be bypassed and the individual backends can store cache records in whatever way is most efficient for them.

The hierarchical backend may request payloads when they are not required due to incomplete tracking of backend state, and GetPayload will never fill other backends due to the inefficiency of the existing backend framework.

The filesystem backend does not cache any state in memory, which makes requests for individual payloads less efficient than if it cached a mapping of cache payload key to raw hash after the first request for a cache key.

The HTTP, S3, and pak file backends are not implemented for the new interface.

The backends do not implement ICacheStore::CancelAll() because the existing backend framework provides WaitForQuiescence to wait for completion of async requests, and the implementation of ICacheStore by those backends works with that mechanism.

The non-leaf backends (hierarchical, async put, etc.) do not update stats from the ICacheStore functions.

#rb Zousar.Shaker
#rnx
#preflight 60899f35d324590001b47517

[CL 16148296 by Devin Doucette in ue5-main branch]
2021-04-28 16:22:18 -04:00

329 lines
9.9 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#pragma once
#include "CoreMinimal.h"
#include "DerivedDataBackendInterface.h"
#include "ProfilingDebugging/CookStats.h"
#include "DerivedDataCacheUsageStats.h"
#include "Misc/SecureHash.h"
namespace UE::DerivedData::Backends
{
/**
* A backend wrapper that limits the key size and uses hashing...in this case it wraps the payload and the payload contains the full key to verify the integrity of the hash
**/
class FDerivedDataLimitKeyLengthWrapper : public FDerivedDataBackendInterface
{
public:
/**
* Constructor
*
* @param InInnerBackend Backend to use for storage, my responsibilities are about key length
*/
FDerivedDataLimitKeyLengthWrapper(FDerivedDataBackendInterface* InInnerBackend, int32 InMaxKeyLength)
: InnerBackend(InInnerBackend)
, MaxKeyLength(InMaxKeyLength)
{
check(InnerBackend);
}
/** Return a name for this interface */
virtual FString GetName() const override
{
return FString::Printf(TEXT("LimitKeyLengthWrapper (%s)"), *InnerBackend->GetName());
}
/** return true if this cache is writable **/
virtual bool IsWritable() const override
{
return InnerBackend->IsWritable();
}
/** Returns a class of speed for this interface **/
virtual ESpeedClass GetSpeedClass() const override
{
return InnerBackend->GetSpeedClass();
}
/**
* Synchronous test for the existence of a cache item
*
* @param CacheKey Alphanumeric+underscore key of this cache item
* @return true if the data probably will be found, this can't be guaranteed because of concurrency in the backends, corruption, etc
*/
virtual bool CachedDataProbablyExists(const TCHAR* CacheKey) override
{
COOK_STAT(auto Timer = UsageStats.TimeProbablyExists());
FString NewKey;
ShortenKey(CacheKey, NewKey);
bool Result = InnerBackend->CachedDataProbablyExists(*NewKey);
if (Result)
{
COOK_STAT(Timer.AddHit(0));
}
return Result;
}
/**
* Synchronous test for the existence of multiple cache items
*
* @param CacheKeys Alphanumeric+underscore key of the cache items
* @return A bit array with bits indicating whether the data for the corresponding key will probably be found
*/
virtual TBitArray<> CachedDataProbablyExistsBatch(TConstArrayView<FString> CacheKeys) override
{
COOK_STAT(auto Timer = UsageStats.TimeProbablyExists());
TArray<FString> NewKeys;
NewKeys.Reserve(CacheKeys.Num());
for (const FString& CacheKey : CacheKeys)
{
ShortenKey(*CacheKey, NewKeys.Emplace_GetRef());
}
TBitArray<> Result = InnerBackend->CachedDataProbablyExistsBatch(NewKeys);
if (Result.CountSetBits() == CacheKeys.Num())
{
COOK_STAT(Timer.AddHit(0));
}
return Result;
}
/**
* Attempts to make sure the cached data will be available as optimally as possible. This is left up to the implementation to do
* @param CacheKey Alphanumeric+underscore key of this cache item
* @return true if any steps were performed to optimize future retrieval
*/
virtual bool TryToPrefetch(TConstArrayView<FString> CacheKeys) override
{
COOK_STAT(auto Timer = UsageStats.TimePrefetch());
TArray<FString> NewKeys;
NewKeys.Reserve(CacheKeys.Num());
for (const FString& CacheKey : CacheKeys)
{
ShortenKey(*CacheKey, NewKeys.Emplace_GetRef());
}
bool Result = InnerBackend->TryToPrefetch(NewKeys);
if (Result)
{
COOK_STAT(Timer.AddHit(0));
}
return Result;
}
/*
Determines if we have any interest in caching this data
*/
virtual bool WouldCache(const TCHAR* CacheKey, TArrayView<const uint8> InData) override
{
return InnerBackend->WouldCache(CacheKey, InData);
}
virtual bool ApplyDebugOptions(FBackendDebugOptions& InOptions) override
{
return InnerBackend->ApplyDebugOptions(InOptions);
}
/**
* Synchronous retrieve of a cache item
*
* @param CacheKey Alphanumeric+underscore key of this cache item
* @param OutData Buffer to receive the results, if any were found
* @return true if any data was found, and in this case OutData is non-empty
*/
virtual bool GetCachedData(const TCHAR* CacheKey, TArray<uint8>& OutData) override
{
COOK_STAT(auto Timer = UsageStats.TimeGet());
int64 InnerGetCycles = 0;
FString NewKey;
bool bOk;
if (!ShortenKey(CacheKey, NewKey))
{
// no shortening needed
bOk = InnerBackend->GetCachedData(CacheKey, OutData);
}
else
{
bOk = InnerBackend->GetCachedData(*NewKey, OutData);
if (bOk)
{
int32 KeyLen = FCString::Strlen(CacheKey) + 1;
if (OutData.Num() < KeyLen)
{
UE_LOG(LogDerivedDataCache, Display, TEXT("FDerivedDataLimitKeyLengthWrapper: Short file or Hash Collision, ignoring and deleting %s."), CacheKey);
bOk = false;
}
else
{
int32 Compare = FCStringAnsi::Strcmp(TCHAR_TO_ANSI(CacheKey), (char*)&OutData[OutData.Num() - KeyLen]);
OutData.RemoveAt(OutData.Num() - KeyLen, KeyLen);
if (Compare == 0)
{
UE_LOG(LogDerivedDataCache, VeryVerbose, TEXT("FDerivedDataLimitKeyLengthWrapper: cache hit, key match is ok %s"), CacheKey);
}
else
{
UE_LOG(LogDerivedDataCache, Warning, TEXT("FDerivedDataLimitKeyLengthWrapper: HASH COLLISION, ignoring and deleting %s."), CacheKey);
bOk = false;
}
}
if (!bOk)
{
// _we_ detected corruption, so _we_ will force a flush of the corrupted data
InnerBackend->RemoveCachedData(*NewKey, /*bTransient=*/ false);
}
}
}
if (!bOk)
{
OutData.Empty();
}
else
{
COOK_STAT(Timer.AddHit(OutData.Num()));
}
return bOk;
}
/**
* Asynchronous, fire-and-forget placement of a cache item
*
* @param CacheKey Alphanumeric+underscore key of this cache item
* @param InData Buffer containing the data to cache, can be destroyed after the call returns, immediately
* @param bPutEvenIfExists If true, then do not attempt skip the put even if CachedDataProbablyExists returns true
*/
virtual EPutStatus PutCachedData(const TCHAR* CacheKey, TArrayView<const uint8> InData, bool bPutEvenIfExists) override
{
COOK_STAT(auto Timer = UsageStats.TimePut());
if (!InnerBackend->IsWritable())
{
return EPutStatus::NotCached; // no point in continuing down the chain
}
COOK_STAT(Timer.AddHit(InData.Num()));
FString NewKey;
if (!ShortenKey(CacheKey, NewKey))
{
return InnerBackend->PutCachedData(CacheKey, InData, bPutEvenIfExists);
}
TArray<uint8> Data(InData.GetData(), InData.Num());
check(Data.Num());
int32 KeyLen = FCString::Strlen(CacheKey) + 1;
checkf(int64(Data.Num()) + KeyLen <= INT32_MAX,
TEXT("FDerivedDataLimitKeyLengthWrapper: shortening the key makes the data exceed 2 GiB for %s"), CacheKey);
Data.AddUninitialized(KeyLen);
FCStringAnsi::Strcpy((char*)&Data[Data.Num() - KeyLen], KeyLen, TCHAR_TO_ANSI(CacheKey));
check(Data.Last()==0);
return InnerBackend->PutCachedData(*NewKey, Data, bPutEvenIfExists);
}
virtual void RemoveCachedData(const TCHAR* CacheKey, bool bTransient) override
{
if (!InnerBackend->IsWritable())
{
return; // no point in continuing down the chain
}
FString NewKey;
ShortenKey(CacheKey, NewKey);
return InnerBackend->RemoveCachedData(*NewKey, bTransient);
}
virtual TSharedRef<FDerivedDataCacheStatsNode> GatherUsageStats() const override
{
TSharedRef<FDerivedDataCacheStatsNode> Usage = MakeShared<FDerivedDataCacheStatsNode>(this, TEXT("LimitKeyLength"));
Usage->Stats.Add(TEXT(""), UsageStats);
if (InnerBackend)
{
Usage->Children.Add(InnerBackend->GatherUsageStats());
}
return Usage;
}
virtual FRequest Put(
TArrayView<FCacheRecord> Records,
FStringView Context,
ECachePolicy Policy,
EPriority Priority,
FOnCachePutComplete&& OnComplete) override
{
return InnerBackend->Put(Records, Context, Policy, Priority, MoveTemp(OnComplete));
}
virtual FRequest Get(
TConstArrayView<FCacheKey> Keys,
FStringView Context,
ECachePolicy Policy,
EPriority Priority,
FOnCacheGetComplete&& OnComplete) override
{
return InnerBackend->Get(Keys, Context, Policy, Priority, MoveTemp(OnComplete));
}
virtual FRequest GetPayload(
TConstArrayView<FCachePayloadKey> Keys,
FStringView Context,
ECachePolicy Policy,
EPriority Priority,
FOnCacheGetPayloadComplete&& OnComplete) override
{
return InnerBackend->GetPayload(Keys, Context, Policy, Priority, MoveTemp(OnComplete));
}
virtual void CancelAll() override
{
InnerBackend->CancelAll();
}
private:
FDerivedDataCacheUsageStats UsageStats;
/** Shorten the cache key and return true if shortening was required **/
bool ShortenKey(const TCHAR* CacheKey, FString& Result)
{
Result = FString(CacheKey);
if (Result.Len() <= MaxKeyLength)
{
return false;
}
FSHA1 HashState;
int32 Length = Result.Len();
HashState.Update((const uint8*)&Length, sizeof(int32));
auto ResultSrc = StringCast<UCS2CHAR>(*Result);
// This is pretty redundant. Incorporating the CRC of the name into the hash
// which also ends up computing SHA1 of the name is not really going to make
// any meaningful difference to the strength of the key so it's just a waste
// of CPU. However it's difficult to get rid of without invalidating the DDC
// contents so here we are.
const uint32 CRCofPayload(FCrc::MemCrc32(ResultSrc.Get(), Length * sizeof(UCS2CHAR)));
HashState.Update((const uint8*)&CRCofPayload, sizeof(uint32));
HashState.Update((const uint8*)ResultSrc.Get(), Length * sizeof(UCS2CHAR));
HashState.Final();
uint8 Hash[FSHA1::DigestSize];
HashState.GetHash(Hash);
const FString HashString = BytesToHex(Hash, FSHA1::DigestSize);
int32 HashStringSize = HashString.Len();
int32 OriginalPart = MaxKeyLength - HashStringSize - 2;
Result = Result.Left(OriginalPart) + TEXT("__") + HashString;
check(Result.Len() == MaxKeyLength && Result.Len() > 0);
UE_LOG(LogDerivedDataCache, VeryVerbose, TEXT("ShortenKey %s -> %s"), CacheKey, *Result);
return true;
}
/** Backend to use for storage, my responsibilities are about key length **/
FDerivedDataBackendInterface* InnerBackend;
/** Maximum size of the backend key length **/
int32 MaxKeyLength;
};
} // UE::DerivedData::Backends