2024-04-10 20:29:18 -04:00
// Copyright Epic Games, Inc. All Rights Reserved.
# include "UbaCacheClient.h"
2024-05-23 01:51:09 -04:00
# include "UbaApplicationRules.h"
2024-05-25 16:58:10 -04:00
# include "UbaCacheEntry.h"
2024-04-12 18:04:58 -04:00
# include "UbaCompactTables.h"
2024-06-04 14:07:46 -04:00
# include "UbaCompressedObjFileHeader.h"
2024-06-06 13:00:09 -04:00
# include "UbaConfig.h"
2024-06-03 01:31:27 -04:00
# include "UbaDirectoryIterator.h"
2024-04-10 20:29:18 -04:00
# include "UbaFileAccessor.h"
# include "UbaNetworkMessage.h"
2024-04-24 15:26:58 -04:00
# include "UbaProcessStartInfo.h"
2024-04-12 01:05:04 -04:00
# include "UbaRootPaths.h"
2024-04-24 15:26:58 -04:00
# include "UbaSession.h"
2024-04-10 20:29:18 -04:00
# include "UbaStorage.h"
# include "UbaStorageUtils.h"
2024-04-24 15:26:58 -04:00
# define UBA_LOG_WRITE_CACHE_INFO 0 // 0 = Disabled, 1 = Normal, 2 = Detailed
2024-04-28 01:07:00 -04:00
# define UBA_LOG_FETCH_CACHE_INFO 0 // 0 = Disabled, 1 = Misses, 2 = Both misses and hits
2024-04-13 19:21:42 -04:00
2024-04-10 20:29:18 -04:00
namespace uba
{
2024-06-06 13:00:09 -04:00
void CacheClientCreateInfo : : Apply ( Config & config )
{
2024-06-15 01:11:17 -04:00
const ConfigTable * tablePtr = config . GetTable ( TC ( " CacheClient " ) ) ;
if ( ! tablePtr )
return ;
const ConfigTable & table = * tablePtr ;
2024-06-06 13:00:09 -04:00
table . GetValueAsBool ( useDirectoryPreparsing , TC ( " UseDirectoryPreparsing " ) ) ;
table . GetValueAsBool ( validateCacheWritesInput , TC ( " ValidateCacheWritesInput " ) ) ;
table . GetValueAsBool ( validateCacheWritesOutput , TC ( " ValidateCacheWritesOutput " ) ) ;
table . GetValueAsBool ( reportMissReason , TC ( " ReportMissReason " ) ) ;
2024-06-06 15:45:03 -04:00
table . GetValueAsBool ( useRoots , TC ( " UseRoots " ) ) ;
table . GetValueAsBool ( useCacheHit , TC ( " UseCacheHit " ) ) ;
2024-06-06 13:00:09 -04:00
}
2024-04-12 18:04:58 -04:00
struct CacheClient : : Bucket
{
Bucket ( u32 id_ )
: id ( id_ )
2024-04-25 00:44:38 -04:00
, serverPathTable ( CachePathTableMaxSize , CompactPathTable : : V1 , CaseInsensitiveFs )
2024-04-12 18:04:58 -04:00
, serverCasKeyTable ( CacheCasKeyTableMaxSize )
2024-04-25 00:44:38 -04:00
, sendPathTable ( CachePathTableMaxSize , CompactPathTable : : V1 , CaseInsensitiveFs )
2024-04-12 18:04:58 -04:00
, sendCasKeyTable ( CacheCasKeyTableMaxSize )
{
}
u32 id = 0 ;
CompactPathTable serverPathTable ;
CompactCasKeyTable serverCasKeyTable ;
CompactPathTable sendPathTable ;
CompactCasKeyTable sendCasKeyTable ;
ReaderWriterLock pathTableNetworkLock ;
u32 pathTableSizeSent = 0 ;
ReaderWriterLock casKeyTableNetworkLock ;
u32 casKeyTableSizeSent = 0 ;
2024-04-28 01:07:00 -04:00
Atomic < u32 > availableCasKeyTableSize ;
2024-04-12 18:04:58 -04:00
} ;
2024-04-24 15:26:58 -04:00
CacheClient : : CacheClient ( const CacheClientCreateInfo & info )
: m_logger ( info . writer , TC ( " UbaCacheClient " ) )
, m_storage ( info . storage )
, m_client ( info . client )
, m_session ( info . session )
2024-04-10 20:29:18 -04:00
{
2024-04-24 15:26:58 -04:00
m_reportMissReason = info . reportMissReason ;
2024-04-28 01:07:00 -04:00
# if UBA_LOG_FETCH_CACHE_INFO
m_reportMissReason = true ;
# endif
2024-06-03 01:31:27 -04:00
m_useDirectoryPreParsing = info . useDirectoryPreparsing ;
2024-06-04 14:07:46 -04:00
m_validateCacheWritesInput = info . validateCacheWritesInput ;
m_validateCacheWritesOutput = info . validateCacheWritesOutput ;
2024-06-06 15:45:03 -04:00
m_useCacheHit = info . useCacheHit ;
m_useRoots = info . useRoots ;
2024-06-03 01:31:27 -04:00
2024-04-10 20:29:18 -04:00
m_client . RegisterOnConnected ( [ this ] ( )
{
2024-05-23 19:16:15 -04:00
u32 retryCount = 0 ;
while ( retryCount < 10 )
2024-04-10 20:29:18 -04:00
{
2024-05-23 19:16:15 -04:00
StackBinaryWriter < 1024 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_Connect , writer ) ;
writer . WriteU32 ( CacheNetworkVersion ) ;
StackBinaryReader < 1024 > reader ;
2024-06-14 20:06:57 -04:00
u64 sendTime = GetTime ( ) ;
2024-05-23 19:16:15 -04:00
if ( ! msg . Send ( reader ) )
{
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " Failed to send connect message to cache server (%u). Version mismatch? (%s) " ) , msg . GetError ( ) , TimeToText ( GetTime ( ) - sendTime ) . str ) ;
2024-05-23 19:16:15 -04:00
return ;
}
bool success = reader . ReadBool ( ) ;
if ( success )
{
if ( retryCount ! = 0 )
m_logger . Info ( TC ( " Connected to cache server " ) ) ;
m_connected = true ;
return ;
}
if ( retryCount = = 0 )
{
StringBuffer < > reason ;
reader . ReadString ( reason ) ;
m_logger . Info ( TC ( " Cache server busy, retrying... (Reason: %s) " ) , reason . data ) ;
}
Sleep ( 1000 ) ;
+ + retryCount ;
2024-04-10 20:29:18 -04:00
}
2024-05-23 19:16:15 -04:00
m_logger . Info ( TC ( " Failed to connect to cache server after %u retries. Giving up. " ) , retryCount ) ;
2024-04-10 20:29:18 -04:00
} ) ;
m_client . RegisterOnDisconnected ( [ this ] ( )
{
m_connected = false ;
} ) ;
}
CacheClient : : ~ CacheClient ( ) = default ;
2024-07-29 13:48:36 -04:00
bool CacheClient : : WriteToCache ( const RootPaths & rootPaths , u32 bucketId , const ProcessStartInfo & info , const u8 * inputs , u64 inputsSize , const u8 * outputs , u64 outputsSize , const u8 * logLines , u64 logLinesSize , u32 processId )
2024-04-10 20:29:18 -04:00
{
if ( ! m_connected )
return false ;
2024-04-24 15:26:58 -04:00
if ( ! inputsSize )
2024-04-10 20:29:18 -04:00
return false ;
2024-04-24 15:26:58 -04:00
CasKey cmdKey = GetCmdKey ( rootPaths , info ) ;
2024-04-10 20:29:18 -04:00
if ( cmdKey = = CasKeyZero )
2024-04-24 15:26:58 -04:00
{
# if UBA_LOG_WRITE_CACHE_INFO
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " WRITECACHE FAIL: %s " ) , info . GetDescription ( ) ) ;
2024-04-24 15:26:58 -04:00
# endif
2024-04-10 20:29:18 -04:00
return false ;
2024-04-24 15:26:58 -04:00
}
2024-04-10 20:29:18 -04:00
2024-05-23 19:16:15 -04:00
bool finished = false ;
u64 bytesSent = 0 ;
if ( processId )
m_session . GetTrace ( ) . CacheBeginWrite ( processId ) ;
auto tg = MakeGuard ( [ & ] ( ) { if ( processId ) m_session . GetTrace ( ) . CacheEndWrite ( processId , finished , bytesSent ) ; } ) ;
2024-04-24 15:26:58 -04:00
BinaryReader inputsReader ( inputs , 0 , inputsSize ) ;
BinaryReader outputsReader ( outputs , 0 , outputsSize ) ;
2024-04-10 20:29:18 -04:00
Map < u32 , u32 > inputsStringToCasKey ;
Map < u32 , u32 > outputsStringToCasKey ;
u32 requiredPathTableSize = 0 ;
u32 requiredCasTableSize = 0 ;
bool success = true ;
2024-04-12 18:04:58 -04:00
SCOPED_WRITE_LOCK ( m_bucketsLock , bucketsLock ) ;
Bucket & bucket = m_buckets . try_emplace ( bucketId , bucketId ) . first - > second ;
bucketsLock . Leave ( ) ;
2024-06-06 15:45:03 -04:00
TString qualifiedPath ;
2024-04-10 20:29:18 -04:00
// Traverse all inputs and outputs. to create cache entry that we can send to server
while ( true )
{
CasKey casKey ;
StringBuffer < 512 > path ;
bool isOutput = outputsReader . GetLeft ( ) ;
if ( isOutput )
outputsReader . ReadString ( path ) ;
else if ( inputsReader . GetLeft ( ) )
inputsReader . ReadString ( path ) ;
else
break ;
2024-05-28 15:07:55 -04:00
if ( path . count < 2 )
{
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " Got messed up path from caller to WriteToCache: %s (%s) " ) , path . data , info . GetDescription ( ) ) ;
2024-05-28 15:07:55 -04:00
success = false ;
}
2024-04-10 20:29:18 -04:00
// For .exe and .dll we sometimes get relative paths so we need to expand them to full
if ( path [ 1 ] ! = ' : ' & & ( path . EndsWith ( TC ( " .dll " ) ) | | path . EndsWith ( TC ( " .exe " ) ) ) )
{
tchar temp [ 512 ] ;
bool res = SearchPathW ( NULL , path . data , NULL , 512 , temp , NULL ) ;
path . Clear ( ) . Append ( temp ) ;
if ( ! res )
{
m_logger . Info ( TC ( " Can't find file: %s " ) , path . data ) ;
return false ;
}
}
2024-04-24 15:26:58 -04:00
else if ( ShouldNormalize ( path ) ) // Paths can be absolute in rsp files so we need to normalize those paths
2024-04-10 20:29:18 -04:00
{
2024-04-24 15:26:58 -04:00
casKey = rootPaths . NormalizeAndHashFile ( m_logger , path . data ) ;
if ( casKey = = CasKeyZero )
{
success = false ;
continue ;
}
2024-05-23 19:16:15 -04:00
casKey = IsNormalized ( casKey ) ? AsCompressed ( casKey , true ) : CasKeyZero ;
2024-04-10 20:29:18 -04:00
}
else if ( path [ path . count - 1 ] = = ' : ' )
{
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " GOT UNKNOWN RELATIVE PATH: %s (%s) " ) , path . data , info . GetDescription ( ) ) ;
2024-04-10 20:29:18 -04:00
success = false ;
continue ;
}
2024-06-06 15:45:03 -04:00
if ( m_useRoots )
2024-04-10 20:29:18 -04:00
{
2024-06-06 15:45:03 -04:00
// Find root for path in order to be able to normalize it.
auto root = rootPaths . FindRoot ( path ) ;
if ( ! root )
{
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " FILE WITHOUT ROOT: %s (%s) " ) , path . data , info . GetDescription ( ) ) ;
2024-06-06 15:45:03 -04:00
success = false ;
continue ;
}
if ( ! root - > includeInKey )
continue ;
u32 rootLen = u32 ( root - > path . size ( ) ) ;
qualifiedPath = path . data + rootLen - 1 ;
qualifiedPath [ 0 ] = tchar ( RootPaths : : RootStartByte + root - > index ) ;
}
else
{
qualifiedPath = path . data ;
2024-04-10 20:29:18 -04:00
}
2024-04-12 18:04:58 -04:00
u32 pathOffset = bucket . sendPathTable . Add ( qualifiedPath . c_str ( ) , u32 ( qualifiedPath . size ( ) ) , & requiredPathTableSize ) ;
2024-04-10 20:29:18 -04:00
if ( ! isOutput ) // Output files should be removed from input files.. For example when cl.exe compiles pch it reads previous pch file and we don't want it to be input
2024-04-24 15:26:58 -04:00
{
2024-04-10 20:29:18 -04:00
if ( outputsStringToCasKey . find ( pathOffset ) ! = outputsStringToCasKey . end ( ) )
continue ;
2024-04-24 15:26:58 -04:00
//m_logger.Info(TC("INPUT ENTRY: %s -> %u"), qualifiedPath.c_str(), pathOffset);
}
else
{
inputsStringToCasKey . erase ( pathOffset ) ;
//m_logger.Info(TC("OUT ENTRY: %s -> %u"), qualifiedPath.c_str(), pathOffset);
}
2024-04-10 20:29:18 -04:00
2024-04-24 15:26:58 -04:00
auto & stringToCasKey = isOutput ? outputsStringToCasKey : inputsStringToCasKey ;
auto insres = stringToCasKey . try_emplace ( pathOffset ) ;
2024-04-10 20:29:18 -04:00
if ( ! insres . second )
2024-04-24 15:26:58 -04:00
{
//m_logger.Warning(TC("Input file %s exists multiple times"), qualifiedPath.c_str());
2024-04-10 20:29:18 -04:00
continue ;
2024-04-24 15:26:58 -04:00
}
2024-04-10 20:29:18 -04:00
// Get file caskey using storage
if ( casKey = = CasKeyZero )
{
2024-06-05 17:57:46 -04:00
bool shouldValidate = ( m_validateCacheWritesInput & & ! isOutput ) | | ( m_validateCacheWritesOutput & & isOutput ) ;
2024-04-10 20:29:18 -04:00
bool deferCreation = true ;
2024-05-27 23:22:26 -04:00
bool fileIsCompressed = IsFileCompressed ( info , path ) ;
2024-06-05 17:57:46 -04:00
2024-05-31 02:18:27 -04:00
if ( isOutput )
{
if ( ! m_storage . StoreCasFile ( casKey , path . data , CasKeyZero , deferCreation , fileIsCompressed ) )
return false ;
}
else
{
if ( ! m_storage . StoreCasKey ( casKey , path . data , CasKeyZero , fileIsCompressed ) )
return false ;
}
2024-04-10 20:29:18 -04:00
if ( casKey = = CasKeyZero ) // If file is not found it was a temporary file that was deleted and is not really an output
2024-04-24 15:26:58 -04:00
{
2024-06-04 14:07:46 -04:00
if ( shouldValidate & & FileExists ( m_logger , path . data ) )
{
2024-06-14 20:06:57 -04:00
m_logger . Warning ( TC ( " CasDb claims file %s does not exist but it does! Will not populate cache for %s " ) , path . data , info . GetDescription ( ) ) ;
2024-06-04 14:07:46 -04:00
return false ;
}
2024-04-24 15:26:58 -04:00
//m_logger.Warning(TC("Can't find file %s"), path.data);
stringToCasKey . erase ( insres . first ) ;
2024-04-10 20:29:18 -04:00
continue ; // m_logger.Info(TC("This should never happen! (%s)"), path.data);
2024-04-24 15:26:58 -04:00
}
2024-06-04 14:07:46 -04:00
if ( shouldValidate )
{
FileAccessor fa ( m_logger , path . data ) ;
if ( ! fa . OpenMemoryRead ( ) )
{
2024-06-14 20:06:57 -04:00
m_logger . Warning ( TC ( " CasDb claims file %s does exist but can't open it. Will not populate cache for %s " ) , path . data , info . GetDescription ( ) ) ;
2024-06-04 14:07:46 -04:00
return false ;
}
CasKey oldKey = AsCompressed ( casKey , false ) ;
CasKey newKey ;
u64 fileSize = fa . GetSize ( ) ;
u8 * fileMem = fa . GetData ( ) ;
if ( fileSize > sizeof ( CompressedObjFileHeader ) & & ( ( CompressedObjFileHeader * ) fileMem ) - > IsValid ( ) )
newKey = AsCompressed ( ( ( CompressedObjFileHeader * ) fileMem ) - > casKey , false ) ;
else
newKey = CalculateCasKey ( fileMem , fileSize , false , nullptr , path . data ) ;
if ( newKey ! = oldKey )
{
FileInformation fileInfo ;
fa . GetFileInformationByHandle ( fileInfo ) ;
auto & fileEntry = m_storage . GetOrCreateFileEntry ( CaseInsensitiveFs ? ToStringKeyLower ( path ) : ToStringKey ( path ) ) ;
SCOPED_READ_LOCK ( fileEntry . lock , lock ) ;
2024-06-05 17:57:46 -04:00
auto ToString = [ ] ( bool b ) { return b ? TC ( " true " ) : TC ( " false " ) ; } ;
2024-06-07 00:03:54 -04:00
m_logger . Warning ( TC ( " CasDb claims file %s has caskey %s but recalculating it gives us %s (FileEntry: %llu/%llu/%s, Real: %llu/%llu). Will not populate cache for %s " ) ,
2024-06-14 20:06:57 -04:00
path . data , CasKeyString ( oldKey ) . str , CasKeyString ( newKey ) . str , fileEntry . size , fileEntry . lastWritten , ToString ( fileEntry . verified ) , fileSize , fileInfo . lastWriteTime , info . GetDescription ( ) ) ;
2024-06-04 14:07:46 -04:00
return false ;
}
}
2024-04-10 20:29:18 -04:00
}
UBA_ASSERT ( IsCompressed ( casKey ) ) ;
2024-04-12 18:04:58 -04:00
insres . first - > second = bucket . sendCasKeyTable . Add ( casKey , pathOffset , & requiredCasTableSize ) ;
2024-04-10 20:29:18 -04:00
}
if ( ! success )
return false ;
if ( outputsStringToCasKey . empty ( ) )
2024-06-14 20:06:57 -04:00
m_logger . Warning ( TC ( " NO OUTPUTS FROM process %s " ) , info . GetDescription ( ) ) ;
2024-04-10 20:29:18 -04:00
// Make sure server has enough of the path table to be able to resolve offsets from cache entry
2024-04-12 18:04:58 -04:00
if ( ! SendPathTable ( bucket , requiredPathTableSize ) )
2024-04-10 20:29:18 -04:00
return false ;
// Make sure server has enough of the cas table to be able to resolve offsets from cache entry
2024-04-12 18:04:58 -04:00
if ( ! SendCasTable ( bucket , requiredCasTableSize ) )
2024-04-10 20:29:18 -04:00
return false ;
// actual cache entry now when we know server has the needed tables
2024-07-29 13:48:36 -04:00
if ( ! SendCacheEntry ( bucket , rootPaths , cmdKey , inputsStringToCasKey , outputsStringToCasKey , logLines , logLinesSize , bytesSent ) )
2024-04-10 20:29:18 -04:00
return false ;
2024-04-13 19:21:42 -04:00
2024-04-24 15:26:58 -04:00
# if UBA_LOG_WRITE_CACHE_INFO
m_logger . BeginScope ( ) ;
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " WRITECACHE: %s -> %u %s " ) , info . GetDescription ( ) , bucketId , CasKeyString ( cmdKey ) . str ) ;
2024-04-24 15:26:58 -04:00
# if UBA_LOG_WRITE_CACHE_INFO == 2
for ( auto & kv : inputsStringToCasKey )
{
StringBuffer < > path ;
CasKey casKey ;
bucket . sendCasKeyTable . GetPathAndKey ( path , casKey , bucket . sendPathTable , kv . second ) ;
m_logger . Info ( TC ( " IN: %s -> %s " ) , path . data , CasKeyString ( casKey ) . str ) ;
}
for ( auto & kv : outputsStringToCasKey )
{
StringBuffer < > path ;
CasKey casKey ;
bucket . sendCasKeyTable . GetPathAndKey ( path , casKey , bucket . sendPathTable , kv . second ) ;
m_logger . Info ( TC ( " OUT: %s -> %s " ) , path . data , CasKeyString ( casKey ) . str ) ;
}
# endif // 2
m_logger . EndScope ( ) ;
2024-04-13 19:21:42 -04:00
# endif
2024-05-23 19:16:15 -04:00
finished = true ;
2024-04-10 20:29:18 -04:00
return true ;
}
2024-06-06 15:45:03 -04:00
u64 CacheClient : : MakeId ( u32 bucketId )
{
2024-06-06 16:15:14 -04:00
return u64 ( bucketId ) | ( ( u64 ( ! CaseInsensitiveFs ) + ( RootPathsVersion < < 1 ) + ( u8 ( ! m_useRoots ) < < 2 ) ) < < 32 ) ;
2024-06-06 15:45:03 -04:00
}
2024-07-29 13:48:36 -04:00
bool CacheClient : : FetchFromCache ( CacheResult & outResult , const RootPaths & rootPaths , u32 bucketId , const ProcessStartInfo & info )
2024-04-10 20:29:18 -04:00
{
2024-07-29 13:48:36 -04:00
outResult . hit = false ;
2024-06-03 01:31:27 -04:00
2024-04-10 20:29:18 -04:00
if ( ! m_connected )
return false ;
2024-04-12 01:05:04 -04:00
CacheStats cacheStats ;
StorageStats storageStats ;
2024-06-02 18:14:50 -04:00
KernelStats kernelStats ;
auto kg = MakeGuard ( [ & ] ( ) { KernelStats : : GetGlobal ( ) . Add ( kernelStats ) ; m_storage . AddStats ( storageStats ) ; } ) ;
2024-04-12 01:05:04 -04:00
StorageStatsScope __ ( storageStats ) ;
2024-06-02 18:14:50 -04:00
KernelStatsScope _ ( kernelStats ) ;
2024-04-12 01:05:04 -04:00
CasKey cmdKey = GetCmdKey ( rootPaths , info ) ;
2024-04-10 20:29:18 -04:00
if ( cmdKey = = CasKeyZero )
return false ;
2024-04-12 01:05:04 -04:00
u8 memory [ SendMaxSize ] ;
u32 fetchId = m_session . CreateProcessId ( ) ;
2024-06-14 20:06:57 -04:00
m_session . GetTrace ( ) . CacheBeginFetch ( fetchId , info . GetDescription ( ) ) ;
2024-04-12 01:05:04 -04:00
bool success = false ;
auto tg = MakeGuard ( [ & ] ( )
{
2024-05-29 13:31:50 -04:00
cacheStats . testEntry . time - = ( cacheStats . fetchCasTable . time + cacheStats . normalizeFile . time ) ;
2024-04-12 01:05:04 -04:00
BinaryWriter writer ( memory , 0 , sizeof_array ( memory ) ) ;
cacheStats . Write ( writer ) ;
2024-05-30 19:43:04 -04:00
storageStats . Write ( writer ) ;
2024-06-02 18:14:50 -04:00
kernelStats . Write ( writer ) ;
2024-04-12 01:05:04 -04:00
m_session . GetTrace ( ) . CacheEndFetch ( fetchId , success , memory , writer . GetPosition ( ) ) ;
} ) ;
BinaryReader reader ( memory , 0 , sizeof_array ( memory ) ) ;
2024-04-10 20:29:18 -04:00
2024-04-12 18:04:58 -04:00
SCOPED_WRITE_LOCK ( m_bucketsLock , bucketsLock ) ;
Bucket & bucket = m_buckets . try_emplace ( bucketId , bucketId ) . first - > second ;
bucketsLock . Leave ( ) ;
2024-04-10 20:29:18 -04:00
{
2024-04-12 01:05:04 -04:00
TimerScope ts ( cacheStats . fetchEntries ) ;
2024-04-10 20:29:18 -04:00
// Fetch entries.. server will provide as many as fits. TODO: Should it be possible to ask for more entries?
StackBinaryWriter < 32 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_FetchEntries , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-10 20:29:18 -04:00
writer . WriteCasKey ( cmdKey ) ;
if ( ! msg . Send ( reader ) )
return false ;
}
2024-05-31 02:18:27 -04:00
u32 entryCount = reader . ReadU16 ( ) ;
2024-04-13 19:21:42 -04:00
2024-04-24 15:26:58 -04:00
# if UBA_LOG_FETCH_CACHE_INFO
2024-04-28 01:07:00 -04:00
auto mg = MakeGuard ( [ & ] ( )
{
if ( ! success | | UBA_LOG_FETCH_CACHE_INFO = = 2 )
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " FETCHCACHE %s: %s -> %u %s (%u) " ) , success ? TC ( " SUCC " ) : TC ( " FAIL " ) , info . GetDescription ( ) , bucketId , CasKeyString ( cmdKey ) . str , entryCount ) ;
2024-04-28 01:07:00 -04:00
} ) ;
2024-04-13 19:21:42 -04:00
# endif
2024-05-31 02:18:27 -04:00
if ( ! entryCount )
return false ;
2024-04-24 15:26:58 -04:00
struct MissInfo { TString path ; u32 entryIndex ; CasKey cache ; CasKey local ; } ;
Vector < MissInfo > misses ;
2024-05-28 18:03:47 -04:00
UnorderedMap < StringKey , CasKey > normalizedCasKeys ;
2024-05-31 02:18:27 -04:00
UnorderedMap < u32 , bool > isCasKeyMatchCache ;
2024-05-28 18:03:47 -04:00
2024-05-31 02:18:27 -04:00
auto IsCasKeyMatch = [ & ] ( bool & outIsMatch , u32 casKeyOffset , u32 entryIndex , bool useLookup )
2024-04-10 20:29:18 -04:00
{
2024-05-31 02:18:27 -04:00
outIsMatch = false ;
StringBuffer < MaxPath > path ;
2024-04-24 15:26:58 -04:00
2024-05-31 02:18:27 -04:00
CasKey cacheCasKey ;
CasKey localCasKey ;
bool * cachedIsMatch = nullptr ;
if ( useLookup )
{
auto insres = isCasKeyMatchCache . try_emplace ( casKeyOffset ) ;
if ( ! insres . second )
2024-04-12 01:05:04 -04:00
{
2024-05-31 02:18:27 -04:00
outIsMatch = insres . first - > second ;
2024-05-25 16:58:10 -04:00
return true ;
2024-05-31 02:18:27 -04:00
}
cachedIsMatch = & insres . first - > second ;
}
2024-04-10 20:29:18 -04:00
2024-05-31 02:18:27 -04:00
if ( ! FetchCasTable ( bucket , cacheStats , casKeyOffset ) )
2024-05-25 16:58:10 -04:00
return false ;
2024-04-12 01:05:04 -04:00
2024-05-31 02:18:27 -04:00
if ( ! GetLocalPathAndCasKey ( bucket , rootPaths , path , cacheCasKey , bucket . serverCasKeyTable , bucket . serverPathTable , casKeyOffset ) )
return false ;
2024-06-14 20:06:57 -04:00
UBA_ASSERTF ( IsCompressed ( cacheCasKey ) , TC ( " Cache entry for %s has uncompressed cache key for path %s (%s) " ) , info . GetDescription ( ) , path . data , CasKeyString ( cacheCasKey ) . str ) ;
2024-05-31 02:18:27 -04:00
if ( IsNormalized ( cacheCasKey ) ) // Need to normalize caskey for these files since they contain absolute paths
2024-04-10 20:29:18 -04:00
{
2024-05-31 02:18:27 -04:00
auto insres2 = normalizedCasKeys . try_emplace ( ToStringKeyNoCheck ( path . data , path . count ) ) ;
if ( insres2 . second )
{
TimerScope ts ( cacheStats . normalizeFile ) ;
localCasKey = rootPaths . NormalizeAndHashFile ( m_logger , path . data ) ;
if ( localCasKey ! = CasKeyZero )
localCasKey = AsCompressed ( localCasKey , true ) ;
insres2 . first - > second = localCasKey ;
}
else
localCasKey = insres2 . first - > second ;
}
else
{
2024-06-03 01:31:27 -04:00
StringBuffer < MaxPath > forKey ;
forKey . Append ( path ) ;
if ( CaseInsensitiveFs )
forKey . MakeLower ( ) ;
StringKey fileNameKey = ToStringKey ( forKey ) ;
if ( m_useDirectoryPreParsing )
PreparseDirectory ( fileNameKey , path ) ;
2024-05-31 02:18:27 -04:00
bool fileIsCompressed = IsFileCompressed ( info , path ) ;
2024-06-03 01:31:27 -04:00
m_storage . StoreCasKey ( localCasKey , fileNameKey , path . data , CasKeyZero , fileIsCompressed ) ;
2024-05-31 02:18:27 -04:00
UBA_ASSERT ( localCasKey = = CasKeyZero | | IsCompressed ( localCasKey ) ) ;
2024-04-10 20:29:18 -04:00
}
2024-05-31 02:18:27 -04:00
outIsMatch = localCasKey = = cacheCasKey ;
if ( useLookup )
* cachedIsMatch = outIsMatch ;
2024-05-25 16:58:10 -04:00
2024-05-31 02:18:27 -04:00
if ( ! outIsMatch )
if ( m_reportMissReason & & path . count ) // if empty this has already been reported
misses . push_back ( { TString ( path . data ) , entryIndex , cacheCasKey , localCasKey } ) ;
return true ;
2024-05-25 17:52:40 -04:00
} ;
2024-05-31 02:18:27 -04:00
struct Range
{
u32 begin ;
u32 end ;
} ;
Vector < Range > sharedMatchingRanges ;
2024-07-29 13:48:36 -04:00
const u8 * sharedLogLines ;
u64 sharedLogLinesSize ;
2024-05-31 02:18:27 -04:00
// Create ranges out of shared offsets that matches local state
{
TimerScope ts ( cacheStats . testEntry ) ;
u64 sharedSize = reader . Read7BitEncoded ( ) ;
BinaryReader sharedReader ( reader . GetPositionData ( ) , 0 , sharedSize ) ;
reader . Skip ( sharedSize ) ;
2024-07-29 13:48:36 -04:00
sharedLogLinesSize = reader . Read7BitEncoded ( ) ;
sharedLogLines = reader . GetPositionData ( ) ;
reader . Skip ( sharedLogLinesSize ) ;
2024-05-31 02:18:27 -04:00
u32 rangeBegin = 0 ;
auto addRange = [ & ] ( u32 rangeEnd )
{
if ( rangeBegin ! = rangeEnd )
{
auto & range = sharedMatchingRanges . emplace_back ( ) ;
range . begin = rangeBegin ;
range . end = rangeEnd ;
}
} ;
while ( sharedReader . GetLeft ( ) )
{
u32 position = u32 ( sharedReader . GetPosition ( ) ) ;
bool isMatch ;
if ( ! IsCasKeyMatch ( isMatch , u32 ( sharedReader . Read7BitEncoded ( ) ) , 0 , false ) )
return false ;
if ( isMatch )
{
if ( rangeBegin ! = ~ 0u )
continue ;
rangeBegin = position ;
}
else
{
if ( rangeBegin = = ~ 0u )
continue ;
addRange ( position ) ;
rangeBegin = ~ 0u ;
}
}
if ( rangeBegin ! = ~ 0u )
addRange ( u32 ( sharedReader . GetPosition ( ) ) ) ;
2024-06-06 15:45:03 -04:00
if ( sharedMatchingRanges . empty ( ) )
{
auto & range = sharedMatchingRanges . emplace_back ( ) ;
range . begin = 0 ;
range . end = 0 ;
}
2024-05-31 02:18:27 -04:00
}
// Read entries
{
- - cacheStats . testEntry . count ; // Remove the shared one
BinaryReader entryReader ( reader . GetPositionData ( ) , 0 , reader . GetLeft ( ) ) ;
u32 entryIndex = 0 ;
for ( ; entryIndex ! = entryCount ; + + entryIndex )
{
u32 entryId = u32 ( reader . Read7BitEncoded ( ) ) ;
u64 extraSize = reader . Read7BitEncoded ( ) ;
BinaryReader extraReader ( reader . GetPositionData ( ) , 0 , extraSize ) ;
reader . Skip ( extraSize ) ;
u64 rangeSize = reader . Read7BitEncoded ( ) ;
BinaryReader rangeReader ( reader . GetPositionData ( ) , 0 , rangeSize ) ;
reader . Skip ( rangeSize ) ;
u64 outSize = reader . Read7BitEncoded ( ) ;
BinaryReader outputsReader ( reader . GetPositionData ( ) , 0 , outSize ) ;
reader . Skip ( outSize ) ;
2024-07-29 13:48:36 -04:00
auto logLinesType = LogLinesType ( reader . ReadByte ( ) ) ;
2024-05-31 02:18:27 -04:00
{
TimerScope ts ( cacheStats . testEntry ) ;
bool isMatch = true ;
// Check ranges first
auto sharedRangeIt = sharedMatchingRanges . begin ( ) ;
while ( isMatch & & rangeReader . GetLeft ( ) )
{
u64 begin = rangeReader . Read7BitEncoded ( ) ;
u64 end = rangeReader . Read7BitEncoded ( ) ;
Range matchingRange = * sharedRangeIt ;
while ( matchingRange . end < = begin )
{
+ + sharedRangeIt ;
if ( sharedRangeIt = = sharedMatchingRanges . end ( ) )
break ;
matchingRange = * sharedRangeIt ;
}
isMatch = matchingRange . begin < = begin & & matchingRange . end > = end ;
}
// Check extra keys after
while ( isMatch & & extraReader . GetLeft ( ) )
if ( ! IsCasKeyMatch ( isMatch , u32 ( extraReader . Read7BitEncoded ( ) ) , entryIndex , true ) )
return false ;
if ( ! isMatch )
continue ;
}
2024-06-06 15:45:03 -04:00
if ( ! m_useCacheHit )
return false ;
2024-07-29 13:48:36 -04:00
if ( logLinesType = = LogLinesType_Shared )
if ( ! PopulateLogLines ( outResult . logLines , sharedLogLines , sharedLogLinesSize ) )
return false ;
if ( ! ReportUsedEntry ( outResult . logLines , logLinesType = = LogLinesType_Owned , bucket , cmdKey , entryId ) )
return false ;
2024-05-31 02:18:27 -04:00
// Fetch output files from cache (and some files need to be "denormalized" before written to disk
struct DowngradedLogger : public LoggerWithWriter
{
DowngradedLogger ( LogWriter & writer , const tchar * prefix ) : LoggerWithWriter ( writer , prefix ) { }
virtual void Log ( LogEntryType type , const tchar * str , u32 strLen ) override { LoggerWithWriter : : Log ( Max ( type , LogEntryType_Info ) , str , strLen ) ; }
} ;
while ( outputsReader . GetLeft ( ) )
{
u32 casKeyOffset = u32 ( outputsReader . Read7BitEncoded ( ) ) ;
2024-05-25 16:58:10 -04:00
if ( ! FetchCasTable ( bucket , cacheStats , casKeyOffset ) )
2024-04-10 20:29:18 -04:00
return false ;
2024-05-31 02:18:27 -04:00
TimerScope fts ( cacheStats . fetchOutput ) ;
2024-04-10 20:29:18 -04:00
2024-05-25 16:58:10 -04:00
StringBuffer < MaxPath > path ;
CasKey casKey ;
if ( ! GetLocalPathAndCasKey ( bucket , rootPaths , path , casKey , bucket . serverCasKeyTable , bucket . serverPathTable , casKeyOffset ) )
return false ;
UBA_ASSERT ( IsCompressed ( casKey ) ) ;
2024-05-23 19:16:15 -04:00
2024-06-02 18:14:50 -04:00
FileFetcher fetcher { m_storage . m_bufferSlots , storageStats } ;
2024-05-25 16:58:10 -04:00
fetcher . m_errorOnFail = false ;
if ( IsNormalized ( casKey ) )
2024-04-10 20:29:18 -04:00
{
2024-05-25 17:52:40 -04:00
DowngradedLogger logger ( m_logger . m_writer , TC ( " UbaCacheClientNormalizedDownload " ) ) ;
2024-05-25 16:58:10 -04:00
// Fetch into memory, file is in special format without absolute paths
MemoryBlock normalizedBlock ( 4 * 1024 * 1024 ) ;
bool destinationIsCompressed = false ;
2024-05-25 17:52:40 -04:00
if ( ! fetcher . RetrieveFile ( logger , m_client , casKey , path . data , destinationIsCompressed , & normalizedBlock ) )
2024-06-14 20:06:57 -04:00
return logger . Error ( TC ( " Failed to download cache output for %s " ) , info . GetDescription ( ) ) ;
2024-05-25 16:58:10 -04:00
MemoryBlock localBlock ( 4 * 1024 * 1024 ) ;
u32 rootOffsets = * ( u32 * ) ( normalizedBlock . memory ) ;
char * fileStart = ( char * ) ( normalizedBlock . memory + sizeof ( u32 ) ) ;
UBA_ASSERT ( rootOffsets < = normalizedBlock . writtenSize ) ;
// "denormalize" fetched file into another memory block that will be written to disk
u64 lastWritten = 0 ;
BinaryReader reader2 ( normalizedBlock . memory , rootOffsets , normalizedBlock . writtenSize ) ;
while ( reader2 . GetLeft ( ) )
{
u64 rootOffset = reader2 . Read7BitEncoded ( ) ;
if ( u64 toWrite = rootOffset - lastWritten )
memcpy ( localBlock . Allocate ( toWrite , 1 , TC ( " " ) ) , fileStart + lastWritten , toWrite ) ;
u8 rootIndex = fileStart [ rootOffset ] - RootPaths : : RootStartByte ;
2024-05-30 19:17:36 -04:00
const TString & root = rootPaths . GetRoot ( rootIndex ) ;
if ( root . empty ( ) )
2024-06-14 20:06:57 -04:00
return logger . Error ( TC ( " Cache entry uses root path index %u which is not set for this startupinfo (%s) " ) , rootIndex , info . GetDescription ( ) ) ;
2024-05-25 16:58:10 -04:00
# if PLATFORM_WINDOWS
StringBuffer < > pathTemp ;
2024-05-30 19:17:36 -04:00
pathTemp . Append ( root ) ;
2024-05-25 16:58:10 -04:00
char rootPath [ 512 ] ;
u32 rootPathLen = pathTemp . Parse ( rootPath , sizeof_array ( rootPath ) ) ;
# else
2024-05-30 19:17:36 -04:00
const char * rootPath = root . data ( ) ;
u32 rootPathLen = root . size ( ) ;
2024-05-25 16:58:10 -04:00
# endif
if ( u32 toWrite = rootPathLen - 1 )
memcpy ( localBlock . Allocate ( toWrite , 1 , TC ( " " ) ) , rootPath , toWrite ) ;
lastWritten = rootOffset + 1 ;
}
u64 fileSize = rootOffsets - sizeof ( u32 ) ;
if ( u64 toWrite = fileSize - lastWritten )
2024-04-10 20:29:18 -04:00
memcpy ( localBlock . Allocate ( toWrite , 1 , TC ( " " ) ) , fileStart + lastWritten , toWrite ) ;
2024-05-25 17:52:40 -04:00
FileAccessor destFile ( logger , path . data ) ;
2024-06-03 01:31:27 -04:00
bool useFileMapping = true ;
if ( useFileMapping )
{
if ( ! destFile . CreateMemoryWrite ( false , DefaultAttributes ( ) , localBlock . writtenSize ) )
2024-06-14 20:06:57 -04:00
return logger . Error ( TC ( " Failed to create file for cache output %s for %s " ) , path . data , info . GetDescription ( ) ) ;
2024-06-03 01:31:27 -04:00
MapMemoryCopy ( destFile . GetData ( ) , localBlock . memory , localBlock . writtenSize ) ;
}
else
{
if ( ! destFile . CreateWrite ( ) )
2024-06-14 20:06:57 -04:00
return logger . Error ( TC ( " Failed to create file for cache output %s for %s " ) , path . data , info . GetDescription ( ) ) ;
2024-06-03 01:31:27 -04:00
if ( ! destFile . Write ( localBlock . memory , localBlock . writtenSize ) )
return false ;
}
2024-05-25 16:58:10 -04:00
if ( ! destFile . Close ( & fetcher . lastWritten ) )
return false ;
2024-05-23 19:16:15 -04:00
2024-05-25 16:58:10 -04:00
fetcher . sizeOnDisk = localBlock . writtenSize ;
2024-06-03 14:37:42 -04:00
casKey = CalculateCasKey ( localBlock . memory , localBlock . writtenSize , false , nullptr , path . data ) ;
2024-05-25 16:58:10 -04:00
}
else
{
2024-05-25 17:52:40 -04:00
DowngradedLogger logger ( m_logger . m_writer , TC ( " UbaCacheClientDownload " ) ) ;
2024-05-27 23:22:26 -04:00
bool destinationIsCompressed = IsFileCompressed ( info , path ) ;
2024-05-25 17:52:40 -04:00
if ( ! fetcher . RetrieveFile ( logger , m_client , casKey , path . data , destinationIsCompressed ) )
2024-06-14 20:06:57 -04:00
return logger . Error ( TC ( " Failed to download cache output %s for %s " ) , path . data , info . GetDescription ( ) ) ;
2024-04-10 20:29:18 -04:00
}
2024-05-25 16:58:10 -04:00
cacheStats . fetchBytesRaw + = fetcher . sizeOnDisk ;
cacheStats . fetchBytesComp + = fetcher . bytesReceived ;
2024-05-23 19:16:15 -04:00
2024-05-25 16:58:10 -04:00
if ( ! m_storage . FakeCopy ( casKey , path . data , fetcher . sizeOnDisk , fetcher . lastWritten , false ) )
2024-04-10 20:29:18 -04:00
return false ;
2024-05-25 16:58:10 -04:00
if ( ! m_session . RegisterNewFile ( path . data ) )
2024-04-10 20:29:18 -04:00
return false ;
2024-05-31 02:18:27 -04:00
}
2024-07-29 13:48:36 -04:00
outResult . hit = true ;
2024-05-31 02:18:27 -04:00
success = true ;
return true ;
}
2024-04-10 20:29:18 -04:00
}
2024-04-24 15:26:58 -04:00
for ( auto & miss : misses )
2024-06-14 20:06:57 -04:00
m_logger . Info ( TC ( " Cache miss on %s because of mismatch of %s (entry: %u, local: %s cache: %s) " ) , info . GetDescription ( ) , miss . path . data ( ) , miss . entryIndex , CasKeyString ( miss . local ) . str , CasKeyString ( miss . cache ) . str ) ;
2024-04-24 15:26:58 -04:00
2024-04-10 20:29:18 -04:00
return false ;
}
2024-05-09 15:06:47 -04:00
bool CacheClient : : RequestServerShutdown ( const tchar * reason )
{
StackBinaryWriter < 1024 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_RequestShutdown , writer ) ;
writer . WriteString ( reason ) ;
StackBinaryReader < 512 > reader ;
if ( ! msg . Send ( reader ) )
return false ;
return reader . ReadBool ( ) ;
}
2024-05-23 19:16:15 -04:00
bool CacheClient : : ExecuteCommand ( Logger & logger , const tchar * command , const tchar * destinationFile , const tchar * additionalInfo )
2024-04-10 20:29:18 -04:00
{
StackBinaryWriter < 1024 > writer ;
2024-05-23 19:16:15 -04:00
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_ExecuteCommand , writer ) ;
writer . WriteString ( command ) ;
writer . WriteString ( additionalInfo ? additionalInfo : TC ( " " ) ) ;
CasKey statusFileCasKey ;
{
StackBinaryReader < 512 > reader ;
if ( ! msg . Send ( reader ) )
return false ;
statusFileCasKey = reader . ReadCasKey ( ) ;
if ( statusFileCasKey = = CasKeyZero )
return false ;
}
2024-04-10 20:29:18 -04:00
2024-06-02 18:14:50 -04:00
StorageStats storageStats ;
FileFetcher fetcher { m_storage . m_bufferSlots , storageStats } ;
2024-04-25 13:10:20 -04:00
bool destinationIsCompressed = false ;
2024-05-23 19:16:15 -04:00
if ( destinationFile )
{
if ( ! fetcher . RetrieveFile ( m_logger , m_client , statusFileCasKey , destinationFile , destinationIsCompressed ) )
return false ;
}
else
{
MemoryBlock block ( 4 * 1024 * 1024 ) ;
if ( ! fetcher . RetrieveFile ( m_logger , m_client , statusFileCasKey , TC ( " CommandString " ) , destinationIsCompressed , & block ) )
return false ;
BinaryReader reader ( block . memory , 3 , block . writtenSize ) ; // Skipping bom
tchar line [ 1024 ] ;
tchar * it = line ;
while ( true )
{
tchar c = reader . ReadUtf8Char < tchar > ( ) ;
if ( c ! = ' \n ' & & c ! = 0 )
{
* it + + = c ;
continue ;
}
if ( c = = 0 & & it = = line )
break ;
* it = 0 ;
logger . Log ( LogEntryType_Info , line , u32 ( it - line ) ) ;
it = line ;
if ( c = = 0 )
break ;
}
}
2024-04-10 20:29:18 -04:00
return true ;
}
2024-04-12 18:04:58 -04:00
bool CacheClient : : SendPathTable ( Bucket & bucket , u32 requiredPathTableSize )
2024-04-10 20:29:18 -04:00
{
2024-04-12 18:04:58 -04:00
SCOPED_WRITE_LOCK ( bucket . pathTableNetworkLock , lock ) ;
if ( requiredPathTableSize < = bucket . pathTableSizeSent )
2024-04-10 20:29:18 -04:00
return true ;
2024-04-12 18:04:58 -04:00
u32 left = requiredPathTableSize - bucket . pathTableSizeSent ;
2024-04-10 20:29:18 -04:00
while ( left )
{
StackBinaryWriter < SendMaxSize > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_StorePathTable , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-12 18:04:58 -04:00
u32 toSend = Min ( requiredPathTableSize - bucket . pathTableSizeSent , u32 ( m_client . GetMessageMaxSize ( ) - 32 ) ) ;
2024-04-10 20:29:18 -04:00
left - = toSend ;
2024-04-12 18:04:58 -04:00
writer . WriteBytes ( bucket . sendPathTable . GetMemory ( ) + bucket . pathTableSizeSent , toSend ) ;
bucket . pathTableSizeSent + = toSend ;
2024-04-10 20:29:18 -04:00
StackBinaryReader < 16 > reader ;
if ( ! msg . Send ( reader ) )
return false ;
}
return true ;
}
2024-04-12 18:04:58 -04:00
bool CacheClient : : SendCasTable ( Bucket & bucket , u32 requiredCasTableSize )
2024-04-10 20:29:18 -04:00
{
2024-04-12 18:04:58 -04:00
SCOPED_WRITE_LOCK ( bucket . casKeyTableNetworkLock , lock ) ;
if ( requiredCasTableSize < = bucket . casKeyTableSizeSent )
2024-04-10 20:29:18 -04:00
return true ;
2024-04-12 18:04:58 -04:00
u32 left = requiredCasTableSize - bucket . casKeyTableSizeSent ;
2024-04-10 20:29:18 -04:00
while ( left )
{
StackBinaryWriter < SendMaxSize > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_StoreCasTable , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-12 18:04:58 -04:00
u32 toSend = Min ( requiredCasTableSize - bucket . casKeyTableSizeSent , u32 ( m_client . GetMessageMaxSize ( ) - 32 ) ) ;
2024-04-10 20:29:18 -04:00
left - = toSend ;
2024-04-12 18:04:58 -04:00
writer . WriteBytes ( bucket . sendCasKeyTable . GetMemory ( ) + bucket . casKeyTableSizeSent , toSend ) ;
bucket . casKeyTableSizeSent + = toSend ;
2024-04-10 20:29:18 -04:00
StackBinaryReader < 16 > reader ;
if ( ! msg . Send ( reader ) )
return false ;
}
return true ;
}
2024-07-29 13:48:36 -04:00
bool CacheClient : : SendCacheEntry ( Bucket & bucket , const RootPaths & rootPaths , const CasKey & cmdKey , const Map < u32 , u32 > & inputsStringToCasKey , const Map < u32 , u32 > & outputsStringToCasKey , const u8 * logLines , u64 logLinesSize , u64 & outBytesSent )
2024-04-10 20:29:18 -04:00
{
StackBinaryReader < 1024 > reader ;
{
StackBinaryWriter < SendMaxSize > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_StoreEntry , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-10 20:29:18 -04:00
writer . WriteCasKey ( cmdKey ) ;
2024-07-29 13:48:36 -04:00
writer . Write7BitEncoded ( inputsStringToCasKey . size ( ) ) ;
2024-04-10 20:29:18 -04:00
writer . Write7BitEncoded ( outputsStringToCasKey . size ( ) ) ;
for ( auto & kv : outputsStringToCasKey )
writer . Write7BitEncoded ( kv . second ) ;
for ( auto & kv : inputsStringToCasKey )
writer . Write7BitEncoded ( kv . second ) ;
2024-07-29 13:48:36 -04:00
if ( logLinesSize )
if ( writer . GetCapacityLeft ( ) > logLinesSize + Get7BitEncodedCount ( logLinesSize ) )
writer . WriteBytes ( logLines , logLinesSize ) ;
2024-04-10 20:29:18 -04:00
if ( ! msg . Send ( reader ) )
return false ;
}
// Server has all content for caskeys.. upload is done
if ( ! reader . GetLeft ( ) )
return true ;
2024-06-05 17:57:46 -04:00
bool success = false ;
auto doneGuard = MakeGuard ( [ & ] ( )
{
// Send done.. confirm to server
StackBinaryWriter < SendMaxSize > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_StoreEntryDone , writer ) ;
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
writer . WriteCasKey ( cmdKey ) ;
writer . WriteBool ( success ) ;
return msg . Send ( reader ) ;
} ) ;
2024-04-10 20:29:18 -04:00
// There is content we need to upload to server
while ( reader . GetLeft ( ) )
{
u32 casKeyOffset = u32 ( reader . Read7BitEncoded ( ) ) ;
StringBuffer < MaxPath > path ;
CasKey casKey ;
2024-04-12 18:04:58 -04:00
if ( ! GetLocalPathAndCasKey ( bucket , rootPaths , path , casKey , bucket . sendCasKeyTable , bucket . sendPathTable , casKeyOffset ) )
2024-04-10 20:29:18 -04:00
return false ;
casKey = AsCompressed ( casKey , true ) ;
StorageImpl : : CasEntry * casEntry ;
if ( m_storage . HasCasFile ( casKey , & casEntry ) )
{
2024-05-23 19:16:15 -04:00
UBA_ASSERT ( ! IsNormalized ( casKey ) ) ;
2024-04-10 20:29:18 -04:00
StringBuffer < > casKeyFileName ;
if ( ! m_storage . GetCasFileName ( casKeyFileName , casKey ) )
return false ;
const u8 * fileData ;
u64 fileSize ;
MappedView mappedView ;
auto mapViewGuard = MakeGuard ( [ & ] ( ) { m_storage . m_casDataBuffer . UnmapView ( mappedView , path . data ) ; } ) ;
FileAccessor file ( m_logger , casKeyFileName . data ) ;
if ( casEntry - > mappingHandle . IsValid ( ) ) // If file was created by helper it will be in the transient mapped memory
{
mappedView = m_storage . m_casDataBuffer . MapView ( casEntry - > mappingHandle , casEntry - > mappingOffset , casEntry - > mappingSize , path . data ) ;
fileData = mappedView . memory ;
fileSize = mappedView . size ;
}
else
{
if ( ! file . OpenMemoryRead ( ) )
return false ;
fileData = file . GetData ( ) ;
fileSize = file . GetSize ( ) ;
}
if ( ! SendFile ( m_logger , m_client , casKey , fileData , fileSize , casKeyFileName . data ) )
return false ;
2024-05-23 19:16:15 -04:00
outBytesSent + = fileSize ;
2024-04-10 20:29:18 -04:00
}
else // If we don't have the cas key it should be one of the normalized files.... otherwise there is a bug
{
2024-05-23 19:16:15 -04:00
if ( ! IsNormalized ( casKey ) )
return m_logger . Error ( TC ( " Can't find output file %s to send to cache server " ) , path . data ) ;
2024-05-23 02:46:41 -04:00
2024-05-23 19:16:15 -04:00
FileAccessor file ( m_logger , path . data ) ;
if ( ! file . OpenMemoryRead ( ) )
return false ;
MemoryBlock block ( AlignUp ( file . GetSize ( ) + 16 , 64 * 1024 ) ) ;
u32 & rootOffsetsStart = * ( u32 * ) block . Allocate ( sizeof ( u32 ) , 1 , TC ( " " ) ) ;
rootOffsetsStart = 0 ;
Vector < u32 > rootOffsets ;
u32 rootOffsetsSize = 0 ;
2024-04-10 20:29:18 -04:00
2024-05-23 19:16:15 -04:00
auto handleString = [ & ] ( const char * str , u64 strLen , u32 rootPos )
{
void * mem = block . Allocate ( strLen , 1 , TC ( " " ) ) ;
memcpy ( mem , str , strLen ) ;
if ( rootPos ! = ~ 0u )
2024-04-10 20:29:18 -04:00
{
2024-05-23 19:16:15 -04:00
rootOffsets . push_back ( rootPos ) ;
rootOffsetsSize + = Get7BitEncodedCount ( rootPos ) ;
}
} ;
2024-04-10 20:29:18 -04:00
2024-05-23 19:16:15 -04:00
if ( ! rootPaths . NormalizeString < char > ( m_logger , ( const char * ) file . GetData ( ) , file . GetSize ( ) , handleString , path . data ) )
return false ;
2024-04-10 20:29:18 -04:00
2024-05-23 19:16:15 -04:00
if ( rootOffsetsSize )
{
u8 * mem = ( u8 * ) block . Allocate ( rootOffsetsSize , 1 , TC ( " " ) ) ;
rootOffsetsStart = u32 ( mem - block . memory ) ;
BinaryWriter writer ( mem , 0 , rootOffsetsSize ) ;
for ( u32 rootOffset : rootOffsets )
writer . Write7BitEncoded ( rootOffset ) ;
2024-04-10 20:29:18 -04:00
}
else
2024-05-23 19:16:15 -04:00
rootOffsetsStart = u32 ( block . writtenSize ) ;
auto & s = m_storage ;
FileSender sender { m_logger , m_client , s . m_bufferSlots , s . Stats ( ) , m_sendOneAtTheTimeLock , s . m_casCompressor , s . m_casCompressionLevel } ;
u8 * dataToSend = block . memory ;
u64 sizeToSend = block . writtenSize ;
if ( ! sender . SendFileCompressed ( casKey , path . data , dataToSend , sizeToSend , TC ( " SendCacheEntry " ) ) )
return m_logger . Error ( TC ( " Failed to send cas content for file %s " ) , path . data ) ;
outBytesSent + = sender . m_bytesSent ;
2024-04-10 20:29:18 -04:00
}
}
2024-06-05 17:57:46 -04:00
success = true ;
return doneGuard . Execute ( ) ;
2024-04-10 20:29:18 -04:00
}
2024-04-28 01:07:00 -04:00
bool CacheClient : : FetchCasTable ( Bucket & bucket , CacheStats & stats , u32 requiredCasTableOffset )
2024-04-10 20:29:18 -04:00
{
2024-04-28 01:07:00 -04:00
auto hasEnoughData = [ & bucket , requiredCasTableOffset ] ( u32 tableSize )
{
u32 neededSize = requiredCasTableOffset + 4 ;
if ( neededSize > tableSize )
return false ;
BinaryReader r ( bucket . serverCasKeyTable . GetMemory ( ) , requiredCasTableOffset , tableSize ) ;
u8 bytesNeeded = Get7BitEncodedCount ( r . Read7BitEncoded ( ) ) ;
neededSize = requiredCasTableOffset + bytesNeeded + sizeof ( CasKey ) ;
return neededSize < = tableSize ;
} ;
2024-04-24 15:26:58 -04:00
2024-04-28 01:07:00 -04:00
if ( hasEnoughData ( bucket . availableCasKeyTableSize ) )
2024-04-24 15:26:58 -04:00
return true ;
2024-04-10 20:29:18 -04:00
2024-04-28 01:07:00 -04:00
TimerScope ts2 ( stats . fetchCasTable ) ;
2024-04-10 20:29:18 -04:00
StackBinaryReader < SendMaxSize > reader ;
2024-04-28 01:07:00 -04:00
SCOPED_WRITE_LOCK ( bucket . casKeyTableNetworkLock , lock ) ; // Use one lock over both queries
2024-04-10 20:29:18 -04:00
{
2024-04-28 01:07:00 -04:00
bool messageSent = false ;
while ( true )
2024-04-10 20:29:18 -04:00
{
2024-04-28 01:07:00 -04:00
u32 tableSize = bucket . serverCasKeyTable . GetSize ( ) ;
if ( hasEnoughData ( tableSize ) )
{
if ( ! messageSent )
return true ;
break ;
}
2024-04-10 20:29:18 -04:00
StackBinaryWriter < 16 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_FetchCasTable , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-28 01:07:00 -04:00
writer . WriteU32 ( tableSize ) ;
2024-04-10 20:29:18 -04:00
reader . Reset ( ) ;
if ( ! msg . Send ( reader ) )
return false ;
2024-04-28 01:07:00 -04:00
reader . ReadU32 ( ) ;
messageSent = true ;
2024-04-12 18:04:58 -04:00
bucket . serverCasKeyTable . ReadMem ( reader , false ) ;
2024-04-10 20:29:18 -04:00
}
}
{
2024-04-28 01:07:00 -04:00
u32 targetSize = ~ 0u ; // For now, read all because we don't know how much we need (it would require parsing all path offsets in caskey table)
2024-04-12 18:04:58 -04:00
while ( bucket . serverPathTable . GetSize ( ) < targetSize )
2024-04-10 20:29:18 -04:00
{
StackBinaryWriter < 16 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_FetchPathTable , writer ) ;
2024-04-25 00:44:38 -04:00
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
2024-04-12 18:04:58 -04:00
writer . WriteU32 ( bucket . serverPathTable . GetSize ( ) ) ;
2024-04-10 20:29:18 -04:00
reader . Reset ( ) ;
if ( ! msg . Send ( reader ) )
return false ;
u32 size = reader . ReadU32 ( ) ;
if ( targetSize = = ~ 0u )
targetSize = size ;
2024-04-12 18:04:58 -04:00
bucket . serverPathTable . ReadMem ( reader , false ) ;
2024-04-10 20:29:18 -04:00
}
}
2024-04-28 01:07:00 -04:00
bucket . availableCasKeyTableSize = bucket . serverCasKeyTable . GetSize ( ) ;
2024-04-10 20:29:18 -04:00
return true ;
}
2024-07-29 13:48:36 -04:00
bool CacheClient : : ReportUsedEntry ( Vector < ProcessLogLine > & outLogLines , bool ownedLogLines , Bucket & bucket , const CasKey & cmdKey , u32 entryId )
{
StackBinaryWriter < 128 > writer ;
NetworkMessage msg ( m_client , CacheServiceId , CacheMessageType_ReportUsedEntry , writer ) ;
writer . Write7BitEncoded ( MakeId ( bucket . id ) ) ;
writer . WriteCasKey ( cmdKey ) ;
writer . Write7BitEncoded ( entryId ) ;
if ( ! ownedLogLines )
return msg . Send ( ) ;
StackBinaryReader < SendMaxSize > reader ;
if ( ! msg . Send ( reader ) )
return false ;
return PopulateLogLines ( outLogLines , reader . GetPositionData ( ) , reader . GetLeft ( ) ) ;
}
bool CacheClient : : PopulateLogLines ( Vector < ProcessLogLine > & outLogLines , const u8 * mem , u64 memLen )
{
BinaryReader reader ( mem , 0 , memLen ) ;
while ( reader . GetLeft ( ) )
{
auto & logLine = outLogLines . emplace_back ( ) ;
logLine . text = reader . ReadString ( ) ;
logLine . type = LogEntryType ( reader . ReadByte ( ) ) ;
}
return true ;
}
2024-04-12 01:05:04 -04:00
CasKey CacheClient : : GetCmdKey ( const RootPaths & rootPaths , const ProcessStartInfo & info )
2024-04-10 20:29:18 -04:00
{
CasKeyHasher hasher ;
2024-04-24 15:26:58 -04:00
# if PLATFORM_WINDOWS
// cmd.exe is special.. we can't hash it because it might be different on different os versions but should do the same thing regardless of version
if ( Contains ( info . application , TC ( " cmd.exe " ) ) )
{
hasher . Update ( TC ( " cmd.exe " ) , 7 * sizeof ( tchar ) ) ;
}
else
# endif
{
// Add hash of application binary to key
CasKey applicationCasKey ;
2024-04-25 13:10:20 -04:00
bool fileIsCompressed = false ;
2024-05-31 02:18:27 -04:00
if ( ! m_storage . StoreCasKey ( applicationCasKey , info . application , CasKeyZero , fileIsCompressed ) )
2024-04-24 15:26:58 -04:00
return CasKeyZero ;
hasher . Update ( & applicationCasKey , sizeof ( CasKey ) ) ;
}
2024-04-10 20:29:18 -04:00
// Add arguments list to key
auto hashString = [ & ] ( const tchar * str , u64 strLen , u32 rootPos ) { hasher . Update ( str , strLen * sizeof ( tchar ) ) ; } ;
2024-06-14 20:06:57 -04:00
if ( ! rootPaths . NormalizeString ( m_logger , info . arguments , TStrlen ( info . arguments ) , hashString , TC ( " CmdKey " ) , info . GetDescription ( ) ) )
2024-04-10 20:29:18 -04:00
return CasKeyZero ;
// Add content of rsp file to key (This will cost a bit of perf since we need to normalize.. should this be part of key?)
if ( auto rspStart = TStrchr ( info . arguments , ' @ ' ) )
{
if ( rspStart [ 1 ] = = ' " ' )
{
rspStart + = 2 ;
if ( auto rspEnd = TStrchr ( rspStart , ' " ' ) )
{
2024-04-24 15:26:58 -04:00
StringBuffer < MaxPath > workingDir ( info . workingDir ) ;
workingDir . EnsureEndsWithSlash ( ) ;
2024-04-10 20:29:18 -04:00
StringBuffer < > rsp ;
rsp . Append ( rspStart , rspEnd - rspStart ) ;
2024-04-24 15:26:58 -04:00
StringBuffer < > fullPath ;
FixPath ( rsp . data , workingDir . data , workingDir . count , fullPath ) ;
2024-04-12 01:05:04 -04:00
CasKey rspCasKey = rootPaths . NormalizeAndHashFile ( m_logger , rsp . data ) ;
2024-04-10 20:29:18 -04:00
hasher . Update ( & rspCasKey , sizeof ( CasKey ) ) ;
}
}
}
return ToCasKey ( hasher , false ) ;
}
2024-04-24 15:26:58 -04:00
bool CacheClient : : ShouldNormalize ( const StringBufferBase & path )
{
2024-06-06 15:45:03 -04:00
if ( ! m_useRoots )
return false ;
2024-04-29 13:39:21 -04:00
if ( path . EndsWith ( TC ( " .dep.json " ) ) ) // Contains absolute paths (dep file for msvc)
return true ;
if ( path . EndsWith ( TC ( " .d " ) ) ) // Contains absolute paths (dep file for clang)
2024-04-24 15:26:58 -04:00
return true ;
if ( path . EndsWith ( TC ( " .tlh " ) ) ) // Contains absolute path in a comment
return true ;
if ( path . EndsWith ( TC ( " .rsp " ) ) ) // Contains absolute paths in some cases
return true ;
if ( path . EndsWith ( TC ( " .bat " ) ) ) // Contains absolute paths in some cases
return true ;
return false ;
}
2024-04-12 18:04:58 -04:00
bool CacheClient : : GetLocalPathAndCasKey ( Bucket & bucket , const RootPaths & rootPaths , StringBufferBase & outPath , CasKey & outKey , CompactCasKeyTable & casKeyTable , CompactPathTable & pathTable , u32 offset )
2024-04-10 20:29:18 -04:00
{
2024-04-12 18:04:58 -04:00
SCOPED_READ_LOCK ( bucket . casKeyTableNetworkLock , lock ) ; // TODO: Is this needed?
2024-04-10 20:29:18 -04:00
StringBuffer < MaxPath > normalizedPath ;
casKeyTable . GetPathAndKey ( normalizedPath , outKey , pathTable , offset ) ;
UBA_ASSERT ( normalizedPath . count ) ;
2024-04-12 01:05:04 -04:00
u32 rootIndex = normalizedPath [ 0 ] - RootPaths : : RootStartByte ;
2024-05-30 19:17:36 -04:00
const TString & root = rootPaths . GetRoot ( rootIndex ) ;
2024-04-10 20:29:18 -04:00
2024-06-06 15:45:03 -04:00
outPath . Append ( root ) . Append ( normalizedPath . data + u32 ( m_useRoots ) ) ; // If we use root paths, then first byte is root path table index
2024-04-10 20:29:18 -04:00
return true ;
}
2024-05-23 01:51:09 -04:00
2024-05-27 23:22:26 -04:00
bool CacheClient : : IsFileCompressed ( const ProcessStartInfo & info , const StringView & filename )
2024-05-23 01:51:09 -04:00
{
if ( ! m_session . ShouldStoreObjFilesCompressed ( ) )
return false ;
auto rules = info . rules ;
if ( ! rules )
rules = m_session . GetRules ( info ) ;
2024-05-27 23:22:26 -04:00
return rules - > StoreFileCompressed ( filename ) ;
2024-05-23 01:51:09 -04:00
}
2024-06-03 01:31:27 -04:00
void CacheClient : : PreparseDirectory ( const StringKey & fileNameKey , const StringBufferBase & filePath )
{
const tchar * lastSep = filePath . Last ( PathSeparator ) ;
if ( ! lastSep )
return ;
StringBuffer < MaxPath > path ;
path . Append ( filePath . data , lastSep - filePath . data ) ;
if ( CaseInsensitiveFs )
path . MakeLower ( ) ;
StringKeyHasher dirHasher ;
dirHasher . Update ( path . data , path . count ) ;
StringKey pathKey = ToStringKey ( dirHasher ) ;
SCOPED_WRITE_LOCK ( m_directoryPreparserLock , preparserLock ) ;
auto insres = m_directoryPreparser . try_emplace ( pathKey ) ;
PreparedDir & dir = insres . first - > second ;
preparserLock . Leave ( ) ;
SCOPED_WRITE_LOCK ( dir . lock , preparserLock2 ) ;
if ( dir . done )
return ;
dir . done = true ;
// It is likely this folder has already been handled by session if this file is verified
if ( m_storage . IsFileVerified ( fileNameKey ) )
return ;
// Traverse all files in directory and report the file information... but only if it has not been reported before.. we don't want to interfere with other reports
TraverseDir ( m_logger , path . data ,
[ & ] ( const DirectoryEntry & e )
{
2024-06-06 13:00:09 -04:00
if ( IsDirectory ( e . attributes ) )
return ;
2024-06-03 23:14:56 -04:00
path . Clear ( ) . Append ( ' \\ ' ) . Append ( e . name , e . nameLen ) ;
2024-06-03 01:31:27 -04:00
if ( CaseInsensitiveFs )
path . MakeLower ( ) ;
StringKey fileNameKey = ToStringKey ( dirHasher , path . data , path . count ) ;
m_storage . ReportFileInfoWeak ( fileNameKey , e . lastWritten , e . size ) ;
} ) ;
}
2024-04-10 20:29:18 -04:00
}