Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor project path resolvers in CohortExtractor #1957

Merged
merged 18 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/microserviceConfigs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ CohortExtractorOptions:

AuditorType: "SmiServices.Microservices.CohortExtractor.Audit.NullAuditExtractions"
RequestFulfillerType: "SmiServices.Microservices.CohortExtractor.RequestFulfillers.FromCataloguesExtractionRequestFulfiller"
ProjectPathResolverType: "SmiServices.Microservices.CohortExtractor.ProjectPathResolvers.DefaultProjectPathResolver"
ProjectPathResolverType: "StudySeriesSOPProjectPathResolver"
ExtractAnonRoutingKey: anon
ExtractIdentRoutingKey: ident
# Writes (Producer) to this exchange
Expand Down
2 changes: 1 addition & 1 deletion docs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ CohortExtractorOptions:

AuditorType: "Microservices.CohortExtractor.Audit.NullAuditExtractions"
RequestFulfillerType: "Microservices.CohortExtractor.Execution.RequestFulfillers.FromCataloguesExtractionRequestFulfiller"
ProjectPathResolverType: "Microservices.CohortExtractor.Execution.ProjectPathResolvers.DefaultProjectPathResolver"
ProjectPathResolverType: "StudySeriesSOPProjectPathResolver"
ExtractAnonRoutingKey: anon
ExtractIdentRoutingKey: ident
# Writes (Producer) to this exchange
Expand Down
6 changes: 6 additions & 0 deletions news/1957-change.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Refactor project path resolvers in CohortExtractor:

- `DefaultProjectPathResolver` is now `StudySeriesOriginalFilenameProjectPathResolver`
- Undo handling UIDs with leading dot (#1506) as this was causing difficulties with path lookups elsewhere
- Add `StudySeriesSOPProjectPathResolver` which generates filenames using SOPInstanceUID instead of the original file name. This is now the default path resolver
- Disallow null Study/Series/SOPInstanceUID values, which should never occur in practice
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using SmiServices.Microservices.CohortExtractor.RequestFulfillers;
using SmiServices.Microservices.CohortExtractor.RequestFulfillers.Dynamic;
using System;
using System.IO.Abstractions;
using System.Linq;
using System.Text.RegularExpressions;

Expand All @@ -42,20 +43,24 @@ public class CohortExtractorHost : MicroserviceHost
private IProjectPathResolver? _pathResolver;
private IProducerModel? _fileMessageProducer;

private readonly IFileSystem _fileSystem;

/// <summary>
/// Creates a new instance of the host with the given
/// </summary>
/// <param name="options">Settings for the microservice (location of rabbit, queue names etc)</param>
/// <param name="auditor">Optional override for the value specified in <see cref="GlobalOptions.CohortExtractorOptions"/></param>
/// <param name="fulfiller">Optional override for the value specified in <see cref="GlobalOptions.CohortExtractorOptions"/></param>
public CohortExtractorHost(GlobalOptions options, IAuditExtractions? auditor, IExtractionRequestFulfiller? fulfiller)
/// <param name="fileSystem"></param>
public CohortExtractorHost(GlobalOptions options, IAuditExtractions? auditor, IExtractionRequestFulfiller? fulfiller, IFileSystem? fileSystem = null)
: base(options)
{
_consumerOptions = options.CohortExtractorOptions!;
_consumerOptions.Validate();

_auditor = auditor;
_fulfiller = fulfiller;
_fileSystem = fileSystem ?? new FileSystem();
}

/// <summary>
Expand Down Expand Up @@ -156,9 +161,8 @@ private void InitializeExtractionSources(IRDMPPlatformRepositoryServiceLocator r
}

_pathResolver = string.IsNullOrWhiteSpace(_consumerOptions.ProjectPathResolverType)
? new DefaultProjectPathResolver()
: ObjectFactory.CreateInstance<IProjectPathResolver>(
_consumerOptions.ProjectPathResolverType, typeof(IProjectPathResolver).Assembly, repositoryLocator);
? new StudySeriesSOPProjectPathResolver(_fileSystem)
: ProjectPathResolverFactory.Create(_consumerOptions.ProjectPathResolverType, _fileSystem);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
using SmiServices.Common.Messages.Extraction;
using SmiServices.Microservices.CohortExtractor.RequestFulfillers;
using System.IO.Abstractions;

namespace SmiServices.Microservices.CohortExtractor.ProjectPathResolvers
{
/// <summary>
/// Acts like <see cref="DefaultProjectPathResolver"/> but with no "-an" component to indicate files have been anonymised. In most cases this results in an identical filename to the source file but can include the addition of a .dcm extension where it is missing
/// Acts like <see cref="StudySeriesOriginalFilenameProjectPathResolver"/> but with no "-an" component to indicate files have been anonymised. In most cases this results in an identical filename to the source file but can include the addition of a .dcm extension where it is missing
/// </summary>
public class NoSuffixProjectPathResolver : DefaultProjectPathResolver
public class NoSuffixProjectPathResolver : StudySeriesOriginalFilenameProjectPathResolver
{
public NoSuffixProjectPathResolver()
public NoSuffixProjectPathResolver(IFileSystem fileSystem) : base(fileSystem) { }

public override string GetOutputPath(QueryToExecuteResult result, ExtractionRequestMessage message)
{
AnonExt = ".dcm";
return base.GetOutputPath(result, message).Replace("-an", "");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace SmiServices.Microservices.CohortExtractor.ProjectPathResolvers;
public static class ProjectPathResolverConstants
{
/// <summary>
/// Extension used when creating anonymised files
/// </summary>
public const string ANON_EXT = "-an.dcm";

/// <summary>
/// Extension used when creating "identifiable" files
/// </summary>
public const string IDENT_EXT = ".dcm";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using System;
using System.IO.Abstractions;

namespace SmiServices.Microservices.CohortExtractor.ProjectPathResolvers;
internal class ProjectPathResolverFactory
{
public static IProjectPathResolver Create(string projectPathResolverType, IFileSystem fileSystem)
{
return projectPathResolverType switch
{
nameof(StudySeriesSOPProjectPathResolver) => new StudySeriesSOPProjectPathResolver(fileSystem),
nameof(NoSuffixProjectPathResolver) => new NoSuffixProjectPathResolver(fileSystem),
nameof(StudySeriesOriginalFilenameProjectPathResolver) => new StudySeriesOriginalFilenameProjectPathResolver(fileSystem),
_ => throw new NotImplementedException($"No case for IProjectPathResolver type '{projectPathResolverType}'"),

Check warning on line 14 in src/SmiServices/Microservices/CohortExtractor/ProjectPathResolvers/ProjectPathResolverFactory.cs

View check run for this annotation

Codecov / codecov/patch

src/SmiServices/Microservices/CohortExtractor/ProjectPathResolvers/ProjectPathResolverFactory.cs#L12-L14

Added lines #L12 - L14 were not covered by tests
};
}
}
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
using SmiServices.Common.Messages.Extraction;
using SmiServices.Microservices.CohortExtractor.RequestFulfillers;
using System;
using System.IO;
using System.IO.Abstractions;

namespace SmiServices.Microservices.CohortExtractor.ProjectPathResolvers
{
public class DefaultProjectPathResolver : IProjectPathResolver
public class StudySeriesOriginalFilenameProjectPathResolver : IProjectPathResolver
{
public string AnonExt { get; protected set; } = "-an.dcm";
public string IdentExt { get; protected set; } = ".dcm";

private static readonly string[] _replaceableExtensions = [".dcm", ".dicom"];

private readonly IFileSystem _fileSystem;

public StudySeriesOriginalFilenameProjectPathResolver(IFileSystem fileSystem)
{
_fileSystem = fileSystem;
}

/// <summary>
/// Returns the output path for the anonymised file, relative to the ExtractionDirectory
/// </summary>
/// <param name="result"></param>
/// <param name="message"></param>
/// <returns></returns>
public string GetOutputPath(QueryToExecuteResult result, ExtractionRequestMessage message)
public virtual string GetOutputPath(QueryToExecuteResult result, ExtractionRequestMessage message)
{
string extToUse = message.IsIdentifiableExtraction ? IdentExt : AnonExt;
string extToUse = message.IsIdentifiableExtraction ? ProjectPathResolverConstants.IDENT_EXT : ProjectPathResolverConstants.ANON_EXT;

// The extension of the input DICOM file can be anything (or nothing), but here we try to standardise the output file name to have the required extension
string fileName = Path.GetFileName(result.FilePathValue);
string fileName = _fileSystem.Path.GetFileName(result.FilePathValue);
if (string.IsNullOrWhiteSpace(fileName))
throw new ArgumentNullException(nameof(result));

Expand All @@ -39,14 +43,11 @@ public string GetOutputPath(QueryToExecuteResult result, ExtractionRequestMessag
if (!replaced)
fileName += extToUse;

// Remove any leading periods from the UIDs
string? studyUID = result.StudyTagValue?.TrimStart('.');
string? seriesUID = result.SeriesTagValue?.TrimStart('.');

return Path.Combine(
studyUID ?? "unknown",
seriesUID ?? "unknown",
fileName);
return _fileSystem.Path.Combine(
result.StudyTagValue,
result.SeriesTagValue,
fileName
);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using SmiServices.Common.Messages.Extraction;
using SmiServices.Microservices.CohortExtractor.RequestFulfillers;
using System.IO.Abstractions;

namespace SmiServices.Microservices.CohortExtractor.ProjectPathResolvers;

/// <summary>
/// Generates output paths in the form:
/// StudyInstanceUID/SeriesInstanceUID/SOPInstanceUID-an.dcm
/// </summary>
public class StudySeriesSOPProjectPathResolver : IProjectPathResolver
{
private readonly IFileSystem _fileSystem;

public StudySeriesSOPProjectPathResolver(IFileSystem fileSystem)
{
_fileSystem = fileSystem;
}

/// <inheritdoc/>
public string GetOutputPath(QueryToExecuteResult result, ExtractionRequestMessage request)
{
string extToUse = request.IsIdentifiableExtraction ? ProjectPathResolverConstants.IDENT_EXT : ProjectPathResolverConstants.ANON_EXT;

return _fileSystem.Path.Combine(
result.StudyTagValue,
result.SeriesTagValue,
$"{result.InstanceTagValue}{extToUse}"
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ public IEnumerable<QueryToExecuteResult> Execute(string valueToLookup, List<IRej
}
}

var path = Columns.FilePathColumn!.GetRuntimeName();
var study = Columns.StudyTagColumn?.GetRuntimeName();
var series = Columns.SeriesTagColumn?.GetRuntimeName();
var instance = Columns.InstanceTagColumn?.GetRuntimeName();
string path = Columns.FilePathColumn?.GetRuntimeName() ?? throw new NullReferenceException("No FilePathColumn set");
string study = Columns.StudyTagColumn?.GetRuntimeName() ?? throw new NullReferenceException("No StudyTagColumn set");
string series = Columns.SeriesTagColumn?.GetRuntimeName() ?? throw new NullReferenceException("No SeriesTagColumn set");
string instance = Columns.InstanceTagColumn?.GetRuntimeName() ?? throw new NullReferenceException("No InstanceTagColumn set");

using DbConnection con = Server!.GetConnection();
con.Open();
Expand Down Expand Up @@ -158,13 +158,14 @@ public IEnumerable<QueryToExecuteResult> Execute(string valueToLookup, List<IRej
}
}

yield return
new QueryToExecuteResult((string)imagePath,
study == null ? null : (string)reader[study],
series == null ? null : (string)reader[series],
instance == null ? null : (string)reader[instance],
reject,
rejectReason);
yield return new QueryToExecuteResult(
(string)imagePath,
(string)reader[study],
(string)reader[series],
(string)reader[instance],
reject,
rejectReason
);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ namespace SmiServices.Microservices.CohortExtractor.RequestFulfillers
public class QueryToExecuteResult : MemberwiseEquatable<QueryToExecuteResult>
{
public readonly string FilePathValue;
public readonly string? StudyTagValue;
public readonly string? SeriesTagValue;
public readonly string? InstanceTagValue;
public readonly string StudyTagValue;
public readonly string SeriesTagValue;
public readonly string InstanceTagValue;

public readonly bool Reject;
public readonly string? RejectReason;

public QueryToExecuteResult(
string filePathValue,
string? studyTagValue,
string? seriesTagValue,
string? instanceTagValue,
string studyTagValue,
string seriesTagValue,
string instanceTagValue,
bool rejection,
string? rejectionReason
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,85 +33,6 @@ protected override void SetUp()
TestLogger.Setup();
}

[TestCase(DatabaseType.MicrosoftSQLServer)]
[TestCase(DatabaseType.MySql)]
public void FromCataloguesExtractionRequestFulfiller_NormalMatching_SeriesInstanceUIDOnly(DatabaseType databaseType)
{
var db = GetCleanedServer(databaseType);

var dt = new DataTable();
dt.Columns.Add("SeriesInstanceUID");
dt.Columns.Add("Extractable", typeof(bool));
dt.Columns.Add(QueryToExecuteColumnSet.DefaultImagePathColumnName);

dt.Rows.Add("123", true, "/images/1.dcm");
dt.Rows.Add("123", false, "/images/2.dcm");
dt.Rows.Add("1234", false, "/images/3.dcm");
dt.Rows.Add("1234", true, "/images/4.dcm");

dt.SetDoNotReType(true);

var tbl = db.CreateTable("FromCataloguesExtractionRequestFulfillerTests", dt);
var catalogue = Import(tbl);

var fulfiller = new FromCataloguesExtractionRequestFulfiller([catalogue]);

var matching = fulfiller.GetAllMatchingFiles(new ExtractionRequestMessage
{
KeyTag = "SeriesInstanceUID",
ExtractionIdentifiers = new List<string>(["123"]),
}, new NullAuditExtractions()).ToArray();

Assert.That(matching, Has.Length.EqualTo(1));
Assert.Multiple(() =>
{
Assert.That(matching[0].Accepted, Has.Count.EqualTo(2));
Assert.That(matching[0].Accepted.Count(f => f.FilePathValue.Equals("/images/1.dcm")), Is.EqualTo(1));
Assert.That(matching[0].Accepted.Count(f => f.FilePathValue.Equals("/images/2.dcm")), Is.EqualTo(1));
});
}

[TestCase(DatabaseType.MicrosoftSQLServer)]
[TestCase(DatabaseType.MySql)]
public void FromCataloguesExtractionRequestFulfiller_MandatoryFilter_SeriesInstanceUIDOnly(DatabaseType databaseType)
{
var db = GetCleanedServer(databaseType);

var dt = new DataTable();
dt.Columns.Add("SeriesInstanceUID");
dt.Columns.Add("Extractable", typeof(bool));
dt.Columns.Add(QueryToExecuteColumnSet.DefaultImagePathColumnName);

dt.Rows.Add("123", true, "/images/1.dcm");
dt.Rows.Add("123", false, "/images/2.dcm");
dt.Rows.Add("1234", false, "/images/3.dcm");
dt.Rows.Add("1234", true, "/images/4.dcm");

dt.SetDoNotReType(true);

var tbl = db.CreateTable("FromCataloguesExtractionRequestFulfillerTests", dt);
var catalogue = Import(tbl);

var ei = catalogue.GetAllExtractionInformation(ExtractionCategory.Any).First();
var filter = new ExtractionFilter(CatalogueRepository, "Extractable only", ei)
{
IsMandatory = true,
WhereSQL = "Extractable = 1"
};
filter.SaveToDatabase();
var fulfiller = new FromCataloguesExtractionRequestFulfiller([catalogue]);

var matching = fulfiller.GetAllMatchingFiles(new ExtractionRequestMessage
{
KeyTag = "SeriesInstanceUID",
ExtractionIdentifiers = new List<string>(["123"]),
}, new NullAuditExtractions()).ToArray();

Assert.That(matching, Has.Length.EqualTo(1));
Assert.That(matching[0].Accepted, Has.Count.EqualTo(1));
Assert.That(matching[0].Accepted.Count(f => f.FilePathValue.Equals("/images/1.dcm")), Is.EqualTo(1));
}


[TestCase(DatabaseType.MicrosoftSQLServer)]
[TestCase(DatabaseType.MySql)]
Expand Down
Loading