2021-08-14 17:55:46 -04:00
// Copyright Epic Games, Inc. All Rights Reserved.
using System ;
using System.Collections.Generic ;
using System.Linq ;
using System.Threading ;
using System.Threading.Tasks ;
2022-03-23 14:50:23 -04:00
using EpicGames.Core ;
using EpicGames.Horde.Compute ;
using EpicGames.Horde.Storage ;
using EpicGames.Redis ;
using EpicGames.Serialization ;
using Google.Protobuf ;
using Google.Protobuf.WellKnownTypes ;
2022-06-07 15:53:33 -04:00
using Horde.Build.Agents ;
using Horde.Build.Agents.Leases ;
2022-08-16 09:52:47 -04:00
using Horde.Build.Agents.Pools ;
2022-03-31 17:22:28 -04:00
using Horde.Build.Server ;
2022-03-16 11:18:39 -04:00
using Horde.Build.Tasks ;
using Horde.Build.Utilities ;
2022-03-23 14:50:23 -04:00
using HordeCommon ;
using HordeCommon.Rpc.Tasks ;
2021-08-14 17:55:46 -04:00
using Microsoft.Extensions.Caching.Memory ;
using Microsoft.Extensions.Hosting ;
2022-03-23 14:50:23 -04:00
using Microsoft.Extensions.Logging ;
2022-08-25 11:04:47 -04:00
using OpenTracing ;
using OpenTracing.Util ;
2022-03-23 14:50:23 -04:00
using StackExchange.Redis ;
2021-08-14 17:55:46 -04:00
2022-06-07 15:53:33 -04:00
namespace Horde.Build.Compute
2021-08-14 17:55:46 -04:00
{
2021-10-19 16:12:44 -04:00
using LeaseId = ObjectId < ILease > ;
2021-08-14 17:55:46 -04:00
/// <summary>
/// Information about a particular task
/// </summary>
[RedisConverter(typeof(RedisCbConverter<>))]
class ComputeTaskInfo
{
[CbField("c")]
2022-01-29 14:50:26 -05:00
public ClusterId ClusterId { get ; set ; }
[CbField("h")]
public RefId TaskRefId { get ; set ; }
[CbField("ch")]
2021-08-14 17:55:46 -04:00
public ChannelId ChannelId { get ; set ; }
2022-04-07 15:34:25 -04:00
[CbField("q")]
public DateTime QueuedAt { get ; set ; }
2021-08-14 17:55:46 -04:00
private ComputeTaskInfo ( )
{
}
2022-04-07 15:34:25 -04:00
public ComputeTaskInfo ( ClusterId clusterId , RefId taskRefId , ChannelId channelId , DateTime queuedAt )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
ClusterId = clusterId ;
TaskRefId = taskRefId ;
ChannelId = channelId ;
2022-04-07 15:34:25 -04:00
QueuedAt = queuedAt ;
2021-08-14 17:55:46 -04:00
}
}
/// <summary>
/// Dispatches remote actions. Does not implement any cross-pod communication to satisfy leases; only agents connected to this server instance will be stored.
/// </summary>
2022-09-03 09:28:43 -04:00
public sealed class ComputeService : TaskSourceBase < ComputeTaskMessage > , IHostedService , IDisposable , IComputeService
2021-08-14 17:55:46 -04:00
{
2022-01-29 14:50:26 -05:00
[RedisConverter(typeof(QueueKeySerializer))]
class QueueKey
{
public ClusterId ClusterId { get ; set ; }
public IoHash RequirementsHash { get ; set ; }
2022-03-23 14:50:23 -04:00
public QueueKey ( ClusterId clusterId , IoHash requirementsHash )
2022-01-29 14:50:26 -05:00
{
2022-03-23 14:50:23 -04:00
ClusterId = clusterId ;
RequirementsHash = requirementsHash ;
2022-01-29 14:50:26 -05:00
}
public override string ToString ( ) = > $"{ClusterId}/{RequirementsHash}" ;
}
class QueueKeySerializer : IRedisConverter < QueueKey >
{
2022-03-23 14:50:23 -04:00
public QueueKey FromRedisValue ( RedisValue value )
2022-01-29 14:50:26 -05:00
{
2022-03-23 14:50:23 -04:00
string str = value . ToString ( ) ;
int idx = str . LastIndexOf ( "/" , StringComparison . Ordinal ) ;
return new QueueKey ( new ClusterId ( str . Substring ( 0 , idx ) ) , IoHash . Parse ( str . Substring ( idx + 1 ) ) ) ;
2022-01-29 14:50:26 -05:00
}
2022-03-23 14:50:23 -04:00
public RedisValue ToRedisValue ( QueueKey value ) = > $"{value.ClusterId}/{value.RequirementsHash}" ;
2022-01-29 14:50:26 -05:00
}
class ClusterInfo : IComputeClusterInfo
{
public ClusterId Id { get ; set ; }
public NamespaceId NamespaceId { get ; set ; }
public BucketId RequestBucketId { get ; set ; }
public BucketId ResponseBucketId { get ; set ; }
2022-03-23 14:50:23 -04:00
public ClusterInfo ( ComputeClusterConfig config )
2022-01-29 14:50:26 -05:00
{
2022-03-23 14:50:23 -04:00
Id = new ClusterId ( config . Id ) ;
NamespaceId = new NamespaceId ( config . NamespaceId ) ;
RequestBucketId = new BucketId ( config . RequestBucketId ) ;
ResponseBucketId = new BucketId ( config . ResponseBucketId ) ;
2022-01-29 14:50:26 -05:00
}
}
2022-01-26 16:37:31 -05:00
/// <inheritdoc/>
2021-10-17 15:30:49 -04:00
public override string Type = > "Compute" ;
2021-08-14 17:55:46 -04:00
2022-01-26 16:37:31 -05:00
/// <inheritdoc/>
public override TaskSourceFlags Flags = > TaskSourceFlags . None ;
2022-06-20 08:21:47 -04:00
/// <summary>
/// ID of the default namespace
/// </summary>
2021-08-18 21:35:37 -04:00
public static NamespaceId DefaultNamespaceId { get ; } = new NamespaceId ( "default" ) ;
2021-08-14 17:55:46 -04:00
2022-09-19 21:55:12 -04:00
readonly ILegacyStorageClient _storageClient ;
2022-03-23 14:50:23 -04:00
readonly ITaskScheduler < QueueKey , ComputeTaskInfo > _taskScheduler ;
readonly RedisMessageQueue < ComputeTaskStatus > _messageQueue ;
readonly ITicker _expireTasksTicker ;
readonly IMemoryCache _requirementsCache ;
2022-09-09 19:43:04 -04:00
readonly LazyCachedValue < Task < IGlobals > > _globals ;
2022-03-23 14:50:23 -04:00
readonly ILogger _logger ;
2021-08-14 17:55:46 -04:00
2022-04-07 15:34:25 -04:00
static ComputeService ( )
{
RedisSerializer . RegisterConverter < ComputeTaskStatus , RedisCbConverter < ComputeTaskStatus > > ( ) ;
}
2021-08-14 17:55:46 -04:00
/// <summary>
/// Constructor
/// </summary>
2022-09-19 21:55:12 -04:00
public ComputeService ( GlobalsService globalsService , RedisService redisService , ILegacyStorageClient storageClient , IClock clock , ILogger < ComputeService > logger )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
_storageClient = storageClient ;
2022-08-09 09:55:46 -04:00
_taskScheduler = new RedisTaskScheduler < QueueKey , ComputeTaskInfo > ( redisService . ConnectionPool , "compute/tasks/" , logger ) ;
_messageQueue = new RedisMessageQueue < ComputeTaskStatus > ( redisService . GetDatabase ( ) , "compute/messages/" ) ;
2022-03-23 14:50:23 -04:00
_expireTasksTicker = clock . AddTicker < ComputeService > ( TimeSpan . FromMinutes ( 2.0 ) , ExpireTasksAsync , logger ) ;
_requirementsCache = new MemoryCache ( new MemoryCacheOptions ( ) ) ;
2022-09-09 19:43:04 -04:00
_globals = new LazyCachedValue < Task < IGlobals > > ( async ( ) = > await globalsService . GetAsync ( ) , TimeSpan . FromSeconds ( 120.0 ) ) ;
2022-03-23 14:50:23 -04:00
_logger = logger ;
2021-10-21 15:33:36 -04:00
2022-01-29 14:50:26 -05:00
OnLeaseStartedProperties . Add ( x = > x . TaskRefId ) ;
2021-08-14 17:55:46 -04:00
}
2022-04-07 15:34:25 -04:00
static ComputeTaskStatus CreateStatus ( RefId taskRefId , ComputeTaskState state )
{
ComputeTaskStatus status = new ComputeTaskStatus ( ) ;
status . TaskRefId = taskRefId ;
status . Time = DateTime . UtcNow ;
status . State = state ;
return status ;
}
2021-12-14 15:43:25 -05:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public Task StartAsync ( CancellationToken token ) = > _expireTasksTicker . StartAsync ( ) ;
2021-12-14 15:43:25 -05:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public Task StopAsync ( CancellationToken token ) = > _expireTasksTicker . StopAsync ( ) ;
2021-12-14 15:43:25 -05:00
2021-09-09 16:46:33 -04:00
/// <summary>
/// Expire tasks that are in inactive queues (ie. no machines can execute them)
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="cancellationToken"></param>
2021-09-09 16:46:33 -04:00
/// <returns></returns>
2022-03-23 14:50:23 -04:00
async ValueTask ExpireTasksAsync ( CancellationToken cancellationToken )
2021-09-09 16:46:33 -04:00
{
2022-08-25 11:04:47 -04:00
using IScope scope = GlobalTracer . Instance . BuildSpan ( "ComputeService.ExpireTasksAsync" ) . StartActive ( ) ;
2022-03-23 14:50:23 -04:00
List < QueueKey > queueKeys = await _taskScheduler . GetInactiveQueuesAsync ( ) ;
2022-08-25 11:04:47 -04:00
scope . Span . SetTag ( "numQueueKeys" , queueKeys . Count ) ;
2022-03-23 14:50:23 -04:00
foreach ( QueueKey queueKey in queueKeys )
2021-09-09 16:46:33 -04:00
{
2022-03-23 14:50:23 -04:00
_logger . LogInformation ( "Inactive queue: {QueueKey}" , queueKey ) ;
2021-09-09 16:46:33 -04:00
for ( ; ; )
{
2022-03-23 14:50:23 -04:00
ComputeTaskInfo ? computeTask = await _taskScheduler . DequeueAsync ( queueKey ) ;
if ( computeTask = = null )
2021-09-09 16:46:33 -04:00
{
break ;
}
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( computeTask . TaskRefId , ComputeTaskState . Complete ) ;
2022-03-23 14:50:23 -04:00
status . Outcome = ComputeTaskOutcome . Expired ;
status . Detail = $"No agents monitoring queue {queueKey}" ;
2022-04-07 15:34:25 -04:00
2022-03-23 14:50:23 -04:00
_logger . LogInformation ( "Compute task expired (queue: {RequirementsHash}, task: {TaskHash}, channel: {ChannelId})" , queueKey , computeTask . TaskRefId , computeTask . ChannelId ) ;
await PostStatusMessageAsync ( computeTask , status ) ;
2021-09-09 16:46:33 -04:00
}
}
}
2021-08-14 17:55:46 -04:00
/// <inheritdoc/>
public void Dispose ( )
{
2022-03-23 14:50:23 -04:00
_messageQueue . Dispose ( ) ;
_expireTasksTicker . Dispose ( ) ;
_requirementsCache . Dispose ( ) ;
2021-08-14 17:55:46 -04:00
}
2022-08-18 14:38:44 -04:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public async Task < IComputeClusterInfo > GetClusterInfoAsync ( ClusterId clusterId )
2022-01-29 14:50:26 -05:00
{
2022-03-23 14:50:23 -04:00
ComputeClusterConfig ? config = await GetClusterAsync ( clusterId ) ;
if ( config = = null )
2022-01-29 14:50:26 -05:00
{
throw new KeyNotFoundException ( ) ;
}
2022-03-23 14:50:23 -04:00
return new ClusterInfo ( config ) ;
2022-01-29 14:50:26 -05:00
}
2022-08-18 14:38:44 -04:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public async Task AddTasksAsync ( ClusterId clusterId , ChannelId channelId , List < RefId > taskRefIds , CbObjectAttachment requirementsHash )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
List < Task > tasks = new List < Task > ( ) ;
foreach ( RefId taskRefId in taskRefIds )
2021-08-14 17:55:46 -04:00
{
2022-04-07 15:34:25 -04:00
ComputeTaskInfo taskInfo = new ComputeTaskInfo ( clusterId , taskRefId , channelId , DateTime . UtcNow ) ;
2022-03-23 14:50:23 -04:00
_logger . LogDebug ( "Adding task {TaskHash} from channel {ChannelId} to queue {ClusterId}{RequirementsHash}" , taskRefId . Hash , channelId , clusterId , requirementsHash ) ;
tasks . Add ( _taskScheduler . EnqueueAsync ( new QueueKey ( clusterId , requirementsHash ) , taskInfo , false ) ) ;
2021-08-14 17:55:46 -04:00
}
2022-03-23 14:50:23 -04:00
await Task . WhenAll ( tasks ) ;
2021-08-14 17:55:46 -04:00
}
2022-03-23 14:50:23 -04:00
async ValueTask < ComputeClusterConfig ? > GetClusterAsync ( ClusterId clusterId )
2022-01-29 14:50:26 -05:00
{
2022-09-09 19:43:04 -04:00
IGlobals globals = await _globals . GetCached ( ) ;
return globals . Config . Compute . FirstOrDefault ( x = > new ClusterId ( x . Id ) = = clusterId ) ;
2022-01-29 14:50:26 -05:00
}
2021-08-14 17:55:46 -04:00
/// <inheritdoc/>
2022-03-29 13:49:41 -04:00
public override async Task < Task < AgentLease ? > > AssignLeaseAsync ( IAgent agent , CancellationToken cancellationToken )
2021-08-14 17:55:46 -04:00
{
2022-03-29 13:49:41 -04:00
Task < ( QueueKey , ComputeTaskInfo ) ? > task = await _taskScheduler . DequeueAsync ( queueKey = > CheckRequirements ( agent , queueKey ) , cancellationToken ) ;
2022-03-28 19:03:36 -04:00
return WaitForLeaseAsync ( agent , task , cancellationToken ) ;
}
2022-03-29 13:49:41 -04:00
private async Task < AgentLease ? > WaitForLeaseAsync ( IAgent agent , Task < ( QueueKey , ComputeTaskInfo ) ? > task , CancellationToken cancellationToken )
2022-03-28 19:03:36 -04:00
{
for ( ; ; )
2021-09-03 15:22:47 -04:00
{
2022-03-29 13:49:41 -04:00
( QueueKey , ComputeTaskInfo ) ? entry = await task ;
if ( entry = = null )
{
return null ;
}
2022-03-28 19:03:36 -04:00
2022-03-29 13:49:41 -04:00
AgentLease ? lease = await CreateLeaseForEntryAsync ( agent , entry . Value ) ;
2022-03-28 19:03:36 -04:00
if ( lease ! = null )
{
return lease ;
}
task = await _taskScheduler . DequeueAsync ( queueKey = > CheckRequirements ( agent , queueKey ) , cancellationToken ) ;
}
}
private async Task < AgentLease ? > CreateLeaseForEntryAsync ( IAgent agent , ( QueueKey , ComputeTaskInfo ) entry )
{
( QueueKey queueKey , ComputeTaskInfo taskInfo ) = entry ;
ComputeClusterConfig ? cluster = await GetClusterAsync ( taskInfo . ClusterId ) ;
if ( cluster = = null )
{
_logger . LogWarning ( "Invalid cluster '{ClusterId}'; failing task {TaskRefId}" , taskInfo . ClusterId , taskInfo . TaskRefId ) ;
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( taskInfo . TaskRefId , ComputeTaskState . Complete ) ;
status . AgentId = agent . Id . ToString ( ) ;
status . Detail = $"Invalid cluster '{taskInfo.ClusterId}'" ;
2022-03-28 19:03:36 -04:00
await PostStatusMessageAsync ( taskInfo , status ) ;
2022-04-07 15:34:25 -04:00
2021-09-03 15:22:47 -04:00
return null ;
}
2022-03-28 19:03:36 -04:00
Requirements ? requirements = await GetCachedRequirementsAsync ( queueKey ) ;
if ( requirements = = null )
2021-08-14 17:55:46 -04:00
{
2022-03-28 19:03:36 -04:00
_logger . LogWarning ( "Unable to fetch requirements {RequirementsHash}" , queueKey ) ;
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( taskInfo . TaskRefId , ComputeTaskState . Complete ) ;
status . AgentId = agent . Id . ToString ( ) ;
status . Detail = $"Unable to retrieve requirements '{queueKey}'" ;
2022-03-28 19:03:36 -04:00
await PostStatusMessageAsync ( taskInfo , status ) ;
2022-04-07 15:34:25 -04:00
2022-03-28 19:03:36 -04:00
return null ;
2021-08-14 17:55:46 -04:00
}
2022-03-28 19:03:36 -04:00
ComputeTaskMessage computeTask = new ComputeTaskMessage ( ) ;
computeTask . ClusterId = taskInfo . ClusterId . ToString ( ) ;
computeTask . ChannelId = taskInfo . ChannelId . ToString ( ) ;
computeTask . NamespaceId = cluster . NamespaceId . ToString ( ) ;
computeTask . InputBucketId = cluster . RequestBucketId . ToString ( ) ;
computeTask . OutputBucketId = cluster . ResponseBucketId . ToString ( ) ;
computeTask . RequirementsHash = queueKey . RequirementsHash ;
computeTask . TaskRefId = taskInfo . TaskRefId ;
2022-04-07 15:34:25 -04:00
computeTask . QueuedAt = Timestamp . FromDateTime ( taskInfo . QueuedAt ) ;
computeTask . DispatchedMs = ( int ) ( DateTime . UtcNow - taskInfo . QueuedAt ) . TotalMilliseconds ;
2022-03-28 19:03:36 -04:00
string leaseName = $"Remote action ({taskInfo.TaskRefId})" ;
byte [ ] payload = Any . Pack ( computeTask ) . ToByteArray ( ) ;
AgentLease lease = new AgentLease ( LeaseId . GenerateNewId ( ) , leaseName , null , null , null , LeaseState . Pending , requirements . Resources , requirements . Exclusive , payload ) ;
_logger . LogDebug ( "Created lease {LeaseId} for channel {ChannelId} task {TaskHash} req {RequirementsHash}" , lease . Id , computeTask . ChannelId , computeTask . TaskRefId , computeTask . RequirementsHash ) ;
return lease ;
2021-08-14 17:55:46 -04:00
}
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public override Task CancelLeaseAsync ( IAgent agent , LeaseId leaseId , ComputeTaskMessage message )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
ClusterId clusterId = new ClusterId ( message . ClusterId ) ;
2022-04-07 15:34:25 -04:00
ComputeTaskInfo taskInfo = new ComputeTaskInfo ( clusterId , new RefId ( new IoHash ( message . TaskRefId . ToByteArray ( ) ) ) , new ChannelId ( message . ChannelId ) , message . QueuedAt . ToDateTime ( ) ) ;
2022-03-23 14:50:23 -04:00
return _taskScheduler . EnqueueAsync ( new QueueKey ( clusterId , new IoHash ( message . RequirementsHash . ToByteArray ( ) ) ) , taskInfo , true ) ;
2021-08-14 17:55:46 -04:00
}
2022-08-18 14:38:44 -04:00
/// <inheritdoc/>
2022-04-07 15:34:25 -04:00
public async Task < List < ComputeTaskStatus > > GetTaskUpdatesAsync ( ClusterId clusterId , ChannelId channelId )
2021-08-14 17:55:46 -04:00
{
2022-04-07 15:34:25 -04:00
return await _messageQueue . ReadMessagesAsync ( GetMessageQueueId ( clusterId , channelId ) ) ;
2021-08-18 21:35:37 -04:00
}
2022-08-16 09:52:47 -04:00
2022-08-18 14:38:44 -04:00
/// <inheritdoc/>
public async Task < int > GetNumQueuedTasksForPoolAsync ( ClusterId clusterId , IPool pool , CancellationToken cancellationToken = default )
2022-08-16 09:52:47 -04:00
{
return await _taskScheduler . GetNumQueuedTasksAsync ( queueKey = > CheckRequirements ( pool , queueKey ) , cancellationToken ) ;
}
2021-08-18 21:35:37 -04:00
2022-08-18 14:38:44 -04:00
/// <inheritdoc/>
2022-04-07 15:34:25 -04:00
public async Task < List < ComputeTaskStatus > > WaitForTaskUpdatesAsync ( ClusterId clusterId , ChannelId channelId , CancellationToken cancellationToken )
2021-08-18 21:35:37 -04:00
{
2022-04-07 15:34:25 -04:00
return await _messageQueue . WaitForMessagesAsync ( GetMessageQueueId ( clusterId , channelId ) , cancellationToken ) ;
2021-08-14 17:55:46 -04:00
}
2022-06-20 08:21:47 -04:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public override async Task OnLeaseStartedAsync ( IAgent agent , LeaseId leaseId , ComputeTaskMessage computeTask , ILogger logger )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
await base . OnLeaseStartedAsync ( agent , leaseId , computeTask , logger ) ;
2021-08-14 17:55:46 -04:00
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( computeTask . TaskRefId , ComputeTaskState . Executing ) ;
status . AgentId = agent . Id . ToString ( ) ;
status . LeaseId = leaseId . ToString ( ) ;
2022-03-23 14:50:23 -04:00
await PostStatusMessageAsync ( computeTask , status ) ;
2021-08-14 17:55:46 -04:00
}
2022-06-20 08:21:47 -04:00
/// <inheritdoc/>
2022-03-23 14:50:23 -04:00
public override async Task OnLeaseFinishedAsync ( IAgent agent , LeaseId leaseId , ComputeTaskMessage computeTask , LeaseOutcome outcome , ReadOnlyMemory < byte > output , ILogger logger )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
await base . OnLeaseFinishedAsync ( agent , leaseId , computeTask , outcome , output , logger ) ;
2022-01-31 10:38:27 -05:00
2022-04-07 15:34:25 -04:00
DateTime queuedAt = computeTask . QueuedAt . ToDateTime ( ) ;
2022-03-23 14:50:23 -04:00
ComputeTaskResultMessage message = ComputeTaskResultMessage . Parser . ParseFrom ( output . ToArray ( ) ) ;
2021-08-14 17:55:46 -04:00
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( computeTask . TaskRefId , ComputeTaskState . Complete ) ;
status . AgentId = agent . Id . ToString ( ) ;
status . LeaseId = leaseId . ToString ( ) ;
status . QueueStats = new ComputeTaskQueueStats ( queuedAt , computeTask . DispatchedMs , ( int ) ( DateTime . UtcNow - queuedAt ) . TotalMilliseconds ) ;
status . ExecutionStats = message . ExecutionStats ? . ToNative ( ) ;
2022-03-23 14:50:23 -04:00
if ( message . ResultRefId ! = null )
2021-08-14 17:55:46 -04:00
{
2022-03-23 14:50:23 -04:00
status . ResultRefId = message . ResultRefId ;
2021-09-21 08:27:39 -04:00
}
2022-03-23 14:50:23 -04:00
else if ( ( ComputeTaskOutcome ) message . Outcome ! = ComputeTaskOutcome . Success )
2021-09-21 08:27:39 -04:00
{
2022-03-23 14:50:23 -04:00
( status . Outcome , status . Detail ) = ( ( ComputeTaskOutcome ) message . Outcome , message . Detail ) ;
2021-09-21 08:27:39 -04:00
}
2022-03-23 14:50:23 -04:00
else if ( outcome = = LeaseOutcome . Failed )
2021-09-21 08:27:39 -04:00
{
2022-03-23 14:50:23 -04:00
status . Outcome = ComputeTaskOutcome . Failed ;
2021-09-21 08:27:39 -04:00
}
2022-03-23 14:50:23 -04:00
else if ( outcome = = LeaseOutcome . Cancelled )
2021-09-21 08:27:39 -04:00
{
2022-03-23 14:50:23 -04:00
status . Outcome = ComputeTaskOutcome . Cancelled ;
2021-09-21 08:27:39 -04:00
}
else
{
2022-03-23 14:50:23 -04:00
status . Outcome = ComputeTaskOutcome . NoResult ;
2021-08-14 17:55:46 -04:00
}
2022-03-23 14:50:23 -04:00
logger . LogInformation ( "Compute lease finished (lease: {LeaseId}, task: {TaskHash}, agent: {AgentId}, channel: {ChannelId}, result: {ResultHash}, outcome: {Outcome})" , leaseId , computeTask . TaskRefId . AsRefId ( ) , agent . Id , computeTask . ChannelId , status . ResultRefId ? . ToString ( ) ? ? "(none)" , status . Outcome ) ;
await PostStatusMessageAsync ( computeTask , status ) ;
2021-08-14 17:55:46 -04:00
}
2021-09-10 10:45:26 -04:00
/// <summary>
/// Checks that an agent matches the necessary criteria to execute a task
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="agent"></param>
/// <param name="queueKey"></param>
2021-09-10 10:45:26 -04:00
/// <returns></returns>
2022-03-23 14:50:23 -04:00
async ValueTask < bool > CheckRequirements ( IAgent agent , QueueKey queueKey )
2021-09-10 10:45:26 -04:00
{
2022-03-23 14:50:23 -04:00
Requirements ? requirements = await GetCachedRequirementsAsync ( queueKey ) ;
if ( requirements = = null )
2021-09-10 10:45:26 -04:00
{
return false ;
}
2022-03-23 14:50:23 -04:00
return agent . MeetsRequirements ( requirements ) ;
2021-09-10 10:45:26 -04:00
}
2022-08-16 09:52:47 -04:00
/// <summary>
/// Checks that a pool matches the necessary criteria to execute a task
/// </summary>
/// <param name="pool"></param>
/// <param name="queueKey"></param>
/// <returns></returns>
async ValueTask < bool > CheckRequirements ( IPool pool , QueueKey queueKey )
{
Requirements ? requirements = await GetCachedRequirementsAsync ( queueKey ) ;
if ( requirements = = null )
{
return false ;
}
return pool . MeetsRequirements ( requirements ) ;
}
2021-09-10 10:45:26 -04:00
/// <summary>
/// Gets the requirements object from the CAS
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="queueKey">Queue identifier</param>
2022-01-31 09:39:15 -05:00
/// <returns>Requirements object for the queue</returns>
2022-03-23 14:50:23 -04:00
async ValueTask < Requirements ? > GetCachedRequirementsAsync ( QueueKey queueKey )
2021-09-10 10:45:26 -04:00
{
2022-03-23 14:50:23 -04:00
Requirements ? requirements ;
if ( ! _requirementsCache . TryGetValue ( queueKey . RequirementsHash , out requirements ) )
2021-09-10 10:45:26 -04:00
{
2022-03-23 14:50:23 -04:00
requirements = await GetRequirementsAsync ( queueKey ) ;
if ( requirements ! = null )
2022-01-29 14:50:26 -05:00
{
2022-03-23 14:50:23 -04:00
using ( ICacheEntry entry = _requirementsCache . CreateEntry ( queueKey . RequirementsHash ) )
2021-09-10 10:45:26 -04:00
{
2022-03-23 14:50:23 -04:00
entry . SetSlidingExpiration ( TimeSpan . FromMinutes ( 10.0 ) ) ;
entry . SetValue ( requirements ) ;
2021-09-10 10:45:26 -04:00
}
}
}
2022-03-23 14:50:23 -04:00
return requirements ;
2021-09-10 10:45:26 -04:00
}
2022-01-31 09:39:15 -05:00
/// <summary>
/// Gets the requirements object for a given queue. Fails tasks in the queue if the requirements object is missing.
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="queueKey">Queue identifier</param>
2022-01-31 09:39:15 -05:00
/// <returns>Requirements object for the queue</returns>
2022-03-23 14:50:23 -04:00
async ValueTask < Requirements ? > GetRequirementsAsync ( QueueKey queueKey )
2022-01-31 09:39:15 -05:00
{
2022-03-23 14:50:23 -04:00
Requirements ? requirements = null ;
2022-01-31 09:39:15 -05:00
2022-03-23 14:50:23 -04:00
ComputeClusterConfig ? clusterConfig = await GetClusterAsync ( queueKey . ClusterId ) ;
if ( clusterConfig ! = null )
2022-01-31 09:39:15 -05:00
{
2022-03-23 14:50:23 -04:00
NamespaceId namespaceId = new NamespaceId ( clusterConfig . NamespaceId ) ;
2022-01-31 09:39:15 -05:00
try
{
2022-03-23 14:50:23 -04:00
requirements = await _storageClient . ReadBlobAsync < Requirements > ( namespaceId , queueKey . RequirementsHash ) ;
2022-01-31 09:39:15 -05:00
}
2022-09-19 21:55:12 -04:00
catch ( LegacyBlobNotFoundException )
2022-01-31 09:39:15 -05:00
{
}
2022-03-23 14:50:23 -04:00
catch ( Exception ex )
2022-01-31 09:39:15 -05:00
{
2022-03-23 14:50:23 -04:00
_logger . LogError ( ex , "Unable to read blob {NamespaceId}/{RequirementsHash} from storage service" , clusterConfig . NamespaceId , queueKey . RequirementsHash ) ;
2022-01-31 09:39:15 -05:00
}
}
2022-03-23 14:50:23 -04:00
if ( requirements = = null )
2022-01-31 09:39:15 -05:00
{
2022-03-23 14:50:23 -04:00
_logger . LogWarning ( "Unable to fetch requirements object for queue {QueueKey}; failing queued tasks." , queueKey ) ;
2022-01-31 09:39:15 -05:00
for ( ; ; )
{
2022-03-23 14:50:23 -04:00
ComputeTaskInfo ? computeTask = await _taskScheduler . DequeueAsync ( queueKey ) ;
if ( computeTask = = null )
2022-01-31 09:39:15 -05:00
{
break ;
}
2022-04-07 15:34:25 -04:00
ComputeTaskStatus status = CreateStatus ( computeTask . TaskRefId , ComputeTaskState . Complete ) ;
2022-03-23 14:50:23 -04:00
status . Outcome = ComputeTaskOutcome . BlobNotFound ;
status . Detail = $"Missing requirements object {queueKey.RequirementsHash}" ;
_logger . LogInformation ( "Compute task failed due to missing requirements (queue: {QueueKey}, task: {TaskHash}, channel: {ChannelId})" , queueKey , computeTask . TaskRefId , computeTask . ChannelId ) ;
await PostStatusMessageAsync ( computeTask , status ) ;
2022-01-31 09:39:15 -05:00
}
}
2022-03-23 14:50:23 -04:00
return requirements ;
2022-01-31 09:39:15 -05:00
}
2022-01-31 10:38:27 -05:00
/// <summary>
/// Post a status message for a particular task
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="computeTask">The compute task instance</param>
/// <param name="status">New status for the task</param>
async Task PostStatusMessageAsync ( ComputeTaskInfo computeTask , ComputeTaskStatus status )
2022-01-31 10:38:27 -05:00
{
2022-03-23 14:50:23 -04:00
await _messageQueue . PostAsync ( GetMessageQueueId ( computeTask . ClusterId , computeTask . ChannelId ) , status ) ;
2022-01-31 10:38:27 -05:00
}
/// <summary>
/// Post a status message for a particular task
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="computeTaskMessage">The compute task lease</param>
/// <param name="status">New status for the task</param>
2022-01-31 10:38:27 -05:00
/// <returns></returns>
2022-03-23 14:50:23 -04:00
async Task PostStatusMessageAsync ( ComputeTaskMessage computeTaskMessage , ComputeTaskStatus status )
2022-01-31 10:38:27 -05:00
{
2022-03-23 14:50:23 -04:00
await _messageQueue . PostAsync ( GetMessageQueueId ( new ClusterId ( computeTaskMessage . ClusterId ) , new ChannelId ( computeTaskMessage . ChannelId ) ) , status ) ;
2022-01-31 10:38:27 -05:00
}
/// <summary>
/// Gets the name of a particular message queue
/// </summary>
2022-03-23 14:50:23 -04:00
/// <param name="clusterId">The compute cluster</param>
/// <param name="channelId">Identifier for the message channel</param>
2022-01-31 10:38:27 -05:00
/// <returns>Name of the message queue</returns>
2022-03-23 14:50:23 -04:00
static string GetMessageQueueId ( ClusterId clusterId , ChannelId channelId )
2022-01-31 10:38:27 -05:00
{
2022-03-23 14:50:23 -04:00
return $"{clusterId}/{channelId}" ;
2022-01-31 10:38:27 -05:00
}
2021-08-14 17:55:46 -04:00
}
}