Files
UnrealEngineUWP/Engine/Source/Developer/DerivedDataCache/Private/DerivedDataCacheStoreThrottle.cpp
devin doucette 2800210897 DDC: Enabled compression of legacy cache data
- FileSystem, Http, Pak, S3 use the ValueWithLegacyFallback mode by default, which cause them to fall back to searching for uncompressed data if compressed data is not found.
- FileSystem has been fixed to store up to 1 MiB of compressed data inline with the value package rather than separately in content-addressable storage.
- Pak has been optimized to have GetChunks only load the required region of the requested value, rather than the whole value.
- Pak has been changed to stop storing data inline in the record package, since it will end up in the same file anyway when stored separately.
- Pak will upgrade the compressor and compression level when a compressed pak file is requested. Default cache compression uses Oodle Mermaid VeryFast and will upgrade to Oodle Kraken Optimal2.
- Zen does not have compression enabled by default, pending deployment of a new version that stores compressed values to Horde Storage.
- Added a missing request barrier when saving uncompressed data as compressed.

Example reduction in file system cache size when cooking for Windows:
- CitySample dropped from 66.5 GiB to 21.8 GiB.
- Lyra dropped from 2.54 GiB to 672 MiB.
- ShooterGame dropped from 1.21 GiB to 380 MiB.

Example reduction in compressed pak file cache size when cooking for Windows:
- CitySample dropped from 22.3 GiB to 18.5 GiB.
- Lyra dropped from 691 MiB to 543 MiB.
- ShooterGame dropped from 387 MiB to 313 MiB.

#jira UE-134381
#preflight 620a703f583261b0a658e043, 620a6fb2803d9066e6805310, 620a733117632e948459b6af
#lockdown Aurel.Cordonnier
#rb Zousar.Shaker

#ROBOMERGE-AUTHOR: devin.doucette
#ROBOMERGE-SOURCE: CL 18983671 in //UE5/Release-5.0/... via CL 18983890 via CL 18984096
#ROBOMERGE-BOT: UE5 (Release-Engine-Test -> Main) (v917-18934589)

[CL 18984126 by devin doucette in ue5-main branch]
2022-02-14 14:43:39 -05:00

242 lines
7.7 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "Algo/Find.h"
#include "Algo/Transform.h"
#include "DerivedDataCachePrivate.h"
#include "DerivedDataCacheRecord.h"
#include "DerivedDataLegacyCacheStore.h"
#include "HAL/PlatformProcess.h"
#include "HAL/PlatformTime.h"
#include <atomic>
namespace UE::DerivedData
{
/**
* A cache store that increases the latency and reduces the throughput of another cache store.
* 1. Reproduce timings for a remote cache with a local cache, to reduce both network usage and measurement noise.
* 2. Reproduce HDD latency and throughput even when data is stored on SSD.
*/
class FCacheStoreThrottle final : public ILegacyCacheStore
{
public:
FCacheStoreThrottle(ILegacyCacheStore* InInnerCache, uint32 InLatencyMS, uint32 InMaxBytesPerSecond)
: InnerCache(InInnerCache)
, Latency(float(InLatencyMS) / 1000.0f)
, MaxBytesPerSecond(InMaxBytesPerSecond)
{
check(InnerCache);
}
void Put(
const TConstArrayView<FCachePutRequest> Requests,
IRequestOwner& Owner,
FOnCachePutComplete&& OnComplete) final
{
struct FRecordSize
{
FCacheKey Key;
uint64 Size;
};
TArray<FRecordSize, TInlineAllocator<1>> RecordSizes;
RecordSizes.Reserve(Requests.Num());
Algo::Transform(Requests, RecordSizes, [](const FCachePutRequest& Request) -> FRecordSize
{
return {Request.Record.GetKey(), Private::GetCacheRecordCompressedSize(Request.Record)};
});
InnerCache->Put(Requests, Owner,
[this, RecordSizes = MoveTemp(RecordSizes), State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FCachePutResponse&& Response)
{
const FRecordSize* Size = Algo::FindBy(RecordSizes, Response.Key, &FRecordSize::Key);
CloseThrottlingScope(State, FThrottlingState(this, Size ? Size->Size : 0));
OnComplete(MoveTemp(Response));
});
}
void Get(
const TConstArrayView<FCacheGetRequest> Requests,
IRequestOwner& Owner,
FOnCacheGetComplete&& OnComplete) final
{
InnerCache->Get(Requests, Owner,
[this, State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FCacheGetResponse&& Response)
{
CloseThrottlingScope(State, FThrottlingState(this, Private::GetCacheRecordCompressedSize(Response.Record)));
OnComplete(MoveTemp(Response));
});
}
void PutValue(
const TConstArrayView<FCachePutValueRequest> Requests,
IRequestOwner& Owner,
FOnCachePutValueComplete&& OnComplete) final
{
struct FValueSize
{
FCacheKey Key;
uint64 Size;
};
TArray<FValueSize, TInlineAllocator<1>> ValueSizes;
ValueSizes.Reserve(Requests.Num());
Algo::Transform(Requests, ValueSizes, [](const FCachePutValueRequest& Request) -> FValueSize
{
return {Request.Key, Request.Value.GetData().GetCompressedSize()};
});
InnerCache->PutValue(Requests, Owner,
[this, ValueSizes = MoveTemp(ValueSizes), State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FCachePutValueResponse&& Response)
{
const FValueSize* Size = Algo::FindBy(ValueSizes, Response.Key, &FValueSize::Key);
CloseThrottlingScope(State, FThrottlingState(this, Size ? Size->Size : 0));
OnComplete(MoveTemp(Response));
});
}
void GetValue(
const TConstArrayView<FCacheGetValueRequest> Requests,
IRequestOwner& Owner,
FOnCacheGetValueComplete&& OnComplete) final
{
InnerCache->GetValue(Requests, Owner,
[this, State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FCacheGetValueResponse&& Response)
{
CloseThrottlingScope(State, FThrottlingState(this, Response.Value.GetData().GetCompressedSize()));
OnComplete(MoveTemp(Response));
});
}
void GetChunks(
const TConstArrayView<FCacheGetChunkRequest> Requests,
IRequestOwner& Owner,
FOnCacheGetChunkComplete&& OnComplete) final
{
InnerCache->GetChunks(Requests, Owner,
[this, State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FCacheGetChunkResponse&& Response)
{
CloseThrottlingScope(State, FThrottlingState(this, Response.RawData.GetSize()));
OnComplete(MoveTemp(Response));
});
}
void LegacyPut(
const TConstArrayView<FLegacyCachePutRequest> Requests,
IRequestOwner& Owner,
FOnLegacyCachePutComplete&& OnComplete) final
{
struct FValueSize
{
FCacheKey Key;
uint64 Size;
};
TArray<FValueSize, TInlineAllocator<1>> ValueSizes;
ValueSizes.Reserve(Requests.Num());
Algo::Transform(Requests, ValueSizes, [](const FLegacyCachePutRequest& Request) -> FValueSize
{
return {Request.Key.GetKey(), Request.Value.GetRawSize()};
});
InnerCache->LegacyPut(Requests, Owner,
[this, ValueSizes = MoveTemp(ValueSizes), State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FLegacyCachePutResponse&& Response)
{
const FValueSize* Size = Algo::FindBy(ValueSizes, Response.Key.GetKey(), &FValueSize::Key);
CloseThrottlingScope(State, FThrottlingState(this, Size ? Size->Size : 0));
OnComplete(MoveTemp(Response));
});
}
void LegacyGet(
const TConstArrayView<FLegacyCacheGetRequest> Requests,
IRequestOwner& Owner,
FOnLegacyCacheGetComplete&& OnComplete) final
{
InnerCache->LegacyGet(Requests, Owner,
[this, State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FLegacyCacheGetResponse&& Response)
{
CloseThrottlingScope(State, FThrottlingState(this, Response.Value.GetRawSize()));
OnComplete(MoveTemp(Response));
});
}
void LegacyDelete(
const TConstArrayView<FLegacyCacheDeleteRequest> Requests,
IRequestOwner& Owner,
FOnLegacyCacheDeleteComplete&& OnComplete) final
{
InnerCache->LegacyDelete(Requests, Owner,
[this, State = EnterThrottlingScope(), OnComplete = MoveTemp(OnComplete)](FLegacyCacheDeleteResponse&& Response)
{
CloseThrottlingScope(State, FThrottlingState(this, 0));
OnComplete(MoveTemp(Response));
});
}
void LegacyStats(FDerivedDataCacheStatsNode& OutNode) final
{
InnerCache->LegacyStats(OutNode);
}
bool LegacyDebugOptions(FBackendDebugOptions& Options) final
{
return InnerCache->LegacyDebugOptions(Options);
}
private:
struct FThrottlingState
{
double Time;
uint64 TotalBytesTransferred;
explicit FThrottlingState(FCacheStoreThrottle* ThrottleWrapper)
: Time(FPlatformTime::Seconds())
, TotalBytesTransferred(ThrottleWrapper->TotalBytesTransferred.load(std::memory_order_relaxed))
{
}
explicit FThrottlingState(FCacheStoreThrottle* ThrottleWrapper, uint64 BytesTransferred)
: Time(FPlatformTime::Seconds())
, TotalBytesTransferred(ThrottleWrapper->TotalBytesTransferred.fetch_add(BytesTransferred, std::memory_order_relaxed) + BytesTransferred)
{
}
};
FThrottlingState EnterThrottlingScope()
{
if (Latency > 0)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ThrottlingLatency);
FPlatformProcess::Sleep(Latency);
}
return FThrottlingState(this);
}
void CloseThrottlingScope(FThrottlingState PreviousState, FThrottlingState CurrentState)
{
if (MaxBytesPerSecond)
{
// Take into account any other transfer that might have happened during that time from any other thread so we have a global limit
const double ExpectedTime = double(CurrentState.TotalBytesTransferred - PreviousState.TotalBytesTransferred) / MaxBytesPerSecond;
const double ActualTime = CurrentState.Time - PreviousState.Time;
if (ExpectedTime > ActualTime)
{
TRACE_CPUPROFILER_EVENT_SCOPE(ThrottlingBandwidth);
FPlatformProcess::Sleep(ExpectedTime - ActualTime);
}
}
}
/** Backend to use for storage, my responsibilities are about throttling **/
ILegacyCacheStore* InnerCache;
float Latency;
uint32 MaxBytesPerSecond;
std::atomic<uint64> TotalBytesTransferred{0};
};
ILegacyCacheStore* CreateCacheStoreThrottle(ILegacyCacheStore* InnerCache, uint32 LatencyMS, uint32 MaxBytesPerSecond)
{
return new FCacheStoreThrottle(InnerCache, LatencyMS, MaxBytesPerSecond);
}
} // UE::DerivedData