Skip to content

Commit

Permalink
Added inference & temporary queue depth in message --ECL
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Aug 22, 2023
1 parent 5ef722d commit cd720b8
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 18 deletions.
51 changes: 47 additions & 4 deletions src/SIL.Machine.AspNetCore/Services/ClearMLNmtEngineBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ CancellationToken cancellationToken
clearMLTaskId = clearMLTask.Id;
}

IReadOnlyList<string>? previousClearMLTasks = await _clearMLService.GetTasksAheadInQueueAsync(
clearMLTaskId,
cancellationToken
);
IReadOnlyList<string?>? localBuildsAheadAtStart = (await _engines.GetAllAsync())
.Where(e => e.BuildState is BuildState.Pending or BuildState.Active)
.Select(e => e.Id)
.ToList();

int numberOfTasksAheadInQueue = previousClearMLTasks?.Union(localBuildsAheadAtStart).Count() ?? 0;

ProgressStatus previousStatus = new();

int lastIteration = 0;
while (true)
{
Expand Down Expand Up @@ -109,18 +122,48 @@ or ClearMLTaskStatus.Completed
switch (clearMLTask.Status)
{
case ClearMLTaskStatus.Queued:
IReadOnlyList<string>? currentClearMLTasks = await _clearMLService.GetTasksAheadInQueueAsync(
clearMLTaskId,
cancellationToken
);
if (currentClearMLTasks is not null && previousClearMLTasks is not null)
{
IEnumerable<string>? newTasks = currentClearMLTasks?.Except(previousClearMLTasks);
numberOfTasksAheadInQueue +=
newTasks?.Select(taskName => !localBuildsAheadAtStart?.Contains(taskName)).Count() ?? 0;
IEnumerable<string>? finishedTasks = previousClearMLTasks?.Except(currentClearMLTasks!);
numberOfTasksAheadInQueue -= finishedTasks?.Count() ?? 0;
}
previousClearMLTasks = currentClearMLTasks;
goto case ClearMLTaskStatus.InProgress;
case ClearMLTaskStatus.InProgress:
ProgressStatus? status = await _clearMLService.GetStatusAsync(clearMLTaskId, cancellationToken);
if (status is not null)
float inferencePercentComplete = 0.0f;
if (clearMLTask.Status is ClearMLTaskStatus.InProgress)
{
numberOfTasksAheadInQueue = 0;
inferencePercentComplete = await _clearMLService.GetInferencePercentCompleteAsync(
clearMLTaskId
);
}
ProgressStatus currentStatus =
new(
clearMLTask.LastIteration,
(clearMLTask.LastIteration / (float)_options.CurrentValue.MaxSteps) * 90.0
+ (inferencePercentComplete / 100.0f) * 10.0,
$"Number of tasks ahead in queue: {numberOfTasksAheadInQueue}"
);
if (!previousStatus.Equals(currentStatus))
{
await _platformService.UpdateBuildStatusAsync(buildId, (ProgressStatus)status!);
await _platformService.UpdateBuildStatusAsync(buildId, currentStatus);
previousStatus = currentStatus;
}
lastIteration = clearMLTask.LastIteration;
break;
case ClearMLTaskStatus.Completed:
currentStatus = new(clearMLTask.LastIteration, 100.0, "Number of tasks ahead in queue: 0");
if (lastIteration != clearMLTask.LastIteration)
{
await _platformService.UpdateBuildStatusAsync(buildId, clearMLTask.LastIteration);
await _platformService.UpdateBuildStatusAsync(buildId, currentStatus);
lastIteration = clearMLTask.LastIteration;
}
break;
Expand Down
59 changes: 46 additions & 13 deletions src/SIL.Machine.AspNetCore/Services/ClearMLService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,47 @@ public async Task<IReadOnlyDictionary<string, double>> GetTaskMetricsAsync(
return results;
}

public async Task<ProgressStatus?> GetStatusAsync(string taskId, CancellationToken cancellationToken = default)
private async Task<string?> GetMetricAsync(
string taskId,
string metricName,
string variantName,
CancellationToken cancellationToken = default
)
{
var body = new JsonObject { ["id"] = taskId };
JsonObject? result = await CallAsync("tasks", "get_by_id_ex", body, cancellationToken);
var tasks = (JsonArray?)result?["data"]?["tasks"];
if (tasks is null || tasks.Count == 0)
return null;
JsonObject task = (JsonObject)tasks[0]!;
string metricNameHash,
variantNameHash;
using (var md5 = MD5.Create())
{
metricNameHash = Convert.ToHexString(md5.ComputeHash(Encoding.ASCII.GetBytes(metricName))).ToLower();
variantNameHash = Convert.ToHexString(md5.ComputeHash(Encoding.ASCII.GetBytes(variantName))).ToLower();
}
return (string?)task?["last_metrics"]?[metricNameHash]?[variantNameHash];
}

public async Task<float> GetInferencePercentCompleteAsync(string id, CancellationToken cancellationToken = default)
{
return float.Parse(
await GetMetricAsync(id, "inference_percent_complete", "inference_percent_complete") ?? "0.0"
);
}

public async Task<IReadOnlyList<string>?> GetTasksAheadInQueueAsync(
string taskId,
CancellationToken cancellationToken = default
)
{
ClearMLTask? task = await GetTaskAsync(taskId, cancellationToken);
if (task is null)
return null;
JsonObject? result = await CallAsync(
"queues",
"get_all",
"get_all_ex",
// Uses python regex syntax to only match exact queue name. See https://clear.ml/docs/latest/docs/references/api/queues#post-queuesget_all
new JsonObject { ["name"] = $"^{_options.CurrentValue.Queue}$" },
cancellationToken
Expand All @@ -218,20 +251,20 @@ public async Task<IReadOnlyDictionary<string, double>> GetTaskMetricsAsync(
if (entriesNode is null)
return null;
JsonArray entries = (JsonArray)entriesNode;
int numTasksAheadInQueue = 0;
foreach (var entry in entries)
List<string> tasksAheadInQueue = new();
foreach (JsonNode? entry in entries)
{
if ((string?)entry?["task"] == taskId)
JsonNode? task_node = entry?["task"];
if (task_node is null)
return null;
string? id = (string?)task_node["id"];
string? name = (string?)task_node["name"];
if (id == taskId)
break;
numTasksAheadInQueue++;
if (name is not null)
tasksAheadInQueue.Add(name);
}
ProgressStatus status =
new(
task.LastIteration,
((float)task.LastIteration / (float)_options.CurrentValue.MaxSteps) * 90, //90% at 100% of training
$"Number of tasks ahead in queue: {numTasksAheadInQueue}"
);
return status;
return tasksAheadInQueue;
}

private async Task<ClearMLTask?> GetTaskAsync(JsonObject body, CancellationToken cancellationToken = default)
Expand Down
7 changes: 6 additions & 1 deletion src/SIL.Machine.AspNetCore/Services/IClearMLService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,10 @@ Task<IReadOnlyDictionary<string, double>> GetTaskMetricsAsync(
string id,
CancellationToken cancellationToken = default
);
Task<ProgressStatus?> GetStatusAsync(string buildId, CancellationToken cancellationToken = default);
Task<IReadOnlyList<string>?> GetTasksAheadInQueueAsync(
string buildId,
CancellationToken cancellationToken = default
);

Task<float> GetInferencePercentCompleteAsync(string id, CancellationToken cancellationToken = default);
}

0 comments on commit cd720b8

Please sign in to comment.