Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Support extraction validation failure #1259

Merged
merged 8 commits into from
Aug 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions news/1259-feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add support for extraction processing failures
- Remove `IsIdentifiable` field from `ExtractedFileVerificationMessage` and replace with new `VerifiedFileStatus` enum
- Update extraction job classes in `CohortPackager` to store this new field, and handle backwards-incompatibility when reading older extraction logs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace Smi.Common.Messages.Extraction
public class ExtractedFileVerificationMessage : ExtractMessage, IFileReferenceMessage
{
[JsonProperty(Required = Required.Always)]
public bool IsIdentifiable { get; set; }
public VerifiedFileStatus Status { get; set; }

[JsonProperty(Required = Required.Always)]
public string Report { get; set; }
Expand Down
30 changes: 30 additions & 0 deletions src/common/Smi.Common/Messages/Extraction/VerifiedFileStatus.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
namespace Smi.Common.Messages.Extraction
{
public enum VerifiedFileStatus
{
/// <summary>
/// Unused placeholder value
/// </summary>
None = 0,

/// <summary>
/// The file has not (yet) been verified
/// </summary>
NotVerified,

/// <summary>
/// The file was scanned and determined to not be identifiable
/// </summary>
NotIdentifiable,

/// <summary>
/// The file was scanned and determined to be identifiable
/// </summary>
IsIdentifiable,

/// <summary>
/// There was an error processing the file. Identifiability could not be determined
/// </summary>
ErrorWontRetry,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public void PersistMessageToStore(
throw new ApplicationException("Received a verification message without the AnonymisedFileName set");
if (string.IsNullOrWhiteSpace(message.Report))
throw new ApplicationException("Null or empty report data");
if (message.IsIdentifiable && message.Report == "[]")
if (message.Status == VerifiedFileStatus.IsIdentifiable && message.Report == "[]")
throw new ApplicationException("No report data for message marked as identifiable");

PersistMessageToStoreImpl(message, header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ protected override void PersistMessageToStoreImpl(ExtractedFileStatusMessage mes
MongoExtractionMessageHeaderDoc.FromMessageHeader(message.ExtractionJobIdentifier, header, _dateTimeProvider),
message.DicomFilePath,
message.OutputFilePath,
wasAnonymised: false,
isIdentifiable: true,
message.Status,
statusMessage: message.StatusMessage);
VerifiedFileStatus.NotVerified,
statusMessage: message.StatusMessage
);

_database
.GetCollection<MongoFileStatusDoc>(StatusCollectionName(message.ExtractionJobIdentifier))
Expand All @@ -98,10 +98,10 @@ protected override void PersistMessageToStoreImpl(ExtractedFileVerificationMessa
MongoExtractionMessageHeaderDoc.FromMessageHeader(message.ExtractionJobIdentifier, header, _dateTimeProvider),
message.DicomFilePath,
message.OutputFilePath,
wasAnonymised: true,
isIdentifiable: message.IsIdentifiable,
ExtractedFileStatus.Anonymised,
statusMessage: message.Report);
message.Status,
statusMessage: message.Report
);

_database
.GetCollection<MongoFileStatusDoc>(StatusCollectionName(message.ExtractionJobIdentifier))
Expand Down Expand Up @@ -329,20 +329,25 @@ protected override IEnumerable<ExtractionIdentifierRejectionInfo> GetCompletedJo

protected override IEnumerable<FileAnonFailureInfo> GetCompletedJobAnonymisationFailuresImpl(Guid jobId)
{
// NOTE(rkm 2020-03-16) Files which failed anonymisation should have statuses where WasAnonymised=false and IsIdentifiable=true
var filter = FilterDefinition<MongoFileStatusDoc>.Empty;
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.Header.ExtractionJobIdentifier, jobId);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.WasAnonymised, false);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.IsIdentifiable, true);
filter &= Builders<MongoFileStatusDoc>.Filter.Or(
Builders<MongoFileStatusDoc>.Filter.Eq(x => x.ExtractedFileStatus, ExtractedFileStatus.Anonymised),
Builders<MongoFileStatusDoc>.Filter.Eq(x => x.ExtractedFileStatus, ExtractedFileStatus.Copied)
);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.VerifiedFileStatus, VerifiedFileStatus.NotVerified);
return CompletedStatusDocsForFilter(filter).Select(x => new FileAnonFailureInfo(x.Item1, x.Item2));
}

protected override IEnumerable<FileVerificationFailureInfo> GetCompletedJobVerificationFailuresImpl(Guid jobId)
{
var filter = FilterDefinition<MongoFileStatusDoc>.Empty;
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.Header.ExtractionJobIdentifier, jobId);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.WasAnonymised, true);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.IsIdentifiable, true);
filter &= Builders<MongoFileStatusDoc>.Filter.Or(
Builders<MongoFileStatusDoc>.Filter.Ne(x => x.ExtractedFileStatus, ExtractedFileStatus.Anonymised),
Builders<MongoFileStatusDoc>.Filter.Ne(x => x.ExtractedFileStatus, ExtractedFileStatus.Copied)
);
filter &= Builders<MongoFileStatusDoc>.Filter.Eq(x => x.VerifiedFileStatus, VerifiedFileStatus.IsIdentifiable);
return CompletedStatusDocsForFilter(filter).Select(x => new FileVerificationFailureInfo(x.Item1, x.Item2));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,14 @@ public class MongoFileStatusDoc : MemberwiseEquatable<MongoFileStatusDoc>, ISupp
[CanBeNull]
public string OutputFileName { get; set; }

[BsonElement("wasAnonymised")]
public bool WasAnonymised { get; set; }

[BsonElement("isIdentifiable")]
public bool IsIdentifiable { get; set; }

[BsonElement("extractedFileStatus")]
[BsonRepresentation(BsonType.String)]
public ExtractedFileStatus ExtractedFileStatus { get; set; }

[BsonElement("verifiedFileStatus")]
[BsonRepresentation(BsonType.String)]
public VerifiedFileStatus VerifiedFileStatus { get; set; }

/// <summary>
/// Should only be null for identifiable extractions where the file was successfully copied. Otherwise will be the failure reason from CTP or the report content from the IsIdentifiable verification
/// </summary>
Expand All @@ -54,20 +52,20 @@ public MongoFileStatusDoc(
[NotNull] MongoExtractionMessageHeaderDoc header,
[NotNull] string dicomFilePath,
[CanBeNull] string outputFileName,
bool wasAnonymised,
bool isIdentifiable,
ExtractedFileStatus extractedFileStatus,
[CanBeNull] string statusMessage)
VerifiedFileStatus verifiedFileStatus,
[CanBeNull] string statusMessage
)
{
Header = header ?? throw new ArgumentNullException(nameof(header));
DicomFilePath = dicomFilePath ?? throw new ArgumentNullException(nameof(dicomFilePath));
OutputFileName = outputFileName;
WasAnonymised = wasAnonymised;
IsIdentifiable = isIdentifiable;
ExtractedFileStatus = (extractedFileStatus != ExtractedFileStatus.None) ? extractedFileStatus : throw new ArgumentException(nameof(extractedFileStatus));
ExtractedFileStatus = (extractedFileStatus != ExtractedFileStatus.None) ? extractedFileStatus : throw new ArgumentException("Cannot be None", nameof(extractedFileStatus));
VerifiedFileStatus = (verifiedFileStatus != VerifiedFileStatus.None) ? verifiedFileStatus : throw new ArgumentException("Cannot be None", nameof(verifiedFileStatus));

StatusMessage = statusMessage;
if (!IsIdentifiable && string.IsNullOrWhiteSpace(statusMessage))
throw new ArgumentNullException(nameof(statusMessage));
if (string.IsNullOrWhiteSpace(StatusMessage) && ExtractedFileStatus != ExtractedFileStatus.Copied)
throw new ArgumentException("Cannot be null or whitespace except for successful file copies", nameof(statusMessage));
}

// ^ISupportInitialize
Expand All @@ -76,12 +74,22 @@ public void BeginInit() { }
// ^ISupportInitialize
public void EndInit()
{
if (!ExtraElements.ContainsKey("anonymisedFileName"))
return;
// NOTE(rkm 2022-07-28) Removed after v1.11.1
if (ExtraElements.ContainsKey("anonymisedFileName"))
{
OutputFileName = (string)ExtraElements["anonymisedFileName"];
DicomFilePath = "<unknown>";
ExtractedFileStatus = OutputFileName == null ? ExtractedFileStatus.ErrorWontRetry : ExtractedFileStatus.Anonymised;
}

OutputFileName = (string)ExtraElements["anonymisedFileName"];
DicomFilePath = "<unknown>";
ExtractedFileStatus = OutputFileName == null ? ExtractedFileStatus.ErrorWontRetry : ExtractedFileStatus.Anonymised;
// NOTE(rkm 2022-07-28) Removed after v5.1.3
if (ExtraElements.ContainsKey("isIdentifiable"))
{
if (OutputFileName == null)
VerifiedFileStatus = VerifiedFileStatus.NotVerified;
else
VerifiedFileStatus = (bool)ExtraElements["isIdentifiable"] ? VerifiedFileStatus.IsIdentifiable : VerifiedFileStatus.NotIdentifiable;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,23 @@ public IsIdentifiableQueueConsumer(
throw new DirectoryNotFoundException($"Could not find the extraction root '{_extractionRoot}' in the filesystem");
}

protected override void ProcessMessageImpl(IMessageHeader header, ExtractedFileStatusMessage message, ulong tag)
protected override void ProcessMessageImpl(IMessageHeader header, ExtractedFileStatusMessage statusMessage, ulong tag)
{
// We should only ever receive messages regarding anonymised images
if (message.Status != ExtractedFileStatus.Anonymised)
throw new ApplicationException($"Received an {message.GetType().Name} message with status '{message.Status}' and message '{message.StatusMessage}'");
if (statusMessage.Status != ExtractedFileStatus.Anonymised)
throw new ApplicationException($"Received an {statusMessage.GetType().Name} message with Status '{statusMessage.Status}' and StatusMessage '{statusMessage.StatusMessage}'");

IFileInfo toProcess = _fileSystem.FileInfo.FromFileName(
_fileSystem.Path.Combine(
_extractionRoot,
message.ExtractionDirectory,
message.OutputFilePath
statusMessage.ExtractionDirectory,
statusMessage.OutputFilePath
)
);

if (!toProcess.Exists)
{
ErrorAndNack(
header,
tag,
$"Exception while processing {message.GetType().Name}",
new ApplicationException($"Could not find '{toProcess.FullName}'")
);
SendVerificationMessage(statusMessage, header, tag, VerifiedFileStatus.ErrorWontRetry, $"Exception while processing {statusMessage.GetType().Name}: Could not find file to process '{toProcess.FullName}'");
return;
}

Expand All @@ -67,17 +62,32 @@ protected override void ProcessMessageImpl(IMessageHeader header, ExtractedFileS
}
catch (ArithmeticException ae)
{
ErrorAndNack(header, tag, $"Exception while classifying {message.GetType().Name}", ae);
SendVerificationMessage(statusMessage, header, tag, VerifiedFileStatus.ErrorWontRetry, $"Exception while classifying {statusMessage.GetType().Name}:\n{ae}");
return;
}

foreach (Failure f in failures)
Logger.Info($"Validation failed for {f.Resource} Problem Value:{f.ProblemValue}");

var response = new ExtractedFileVerificationMessage(message)

var status = failures.Any() ? VerifiedFileStatus.IsIdentifiable : VerifiedFileStatus.NotIdentifiable;
var report = JsonConvert.SerializeObject(failures);

SendVerificationMessage(statusMessage, header, tag, status, report);
}

private void SendVerificationMessage(
ExtractedFileStatusMessage statusMessage,
IMessageHeader header,
ulong tag,
VerifiedFileStatus status,
string report
)
{
var response = new ExtractedFileVerificationMessage(statusMessage)
{
IsIdentifiable = failures.Any(),
Report = JsonConvert.SerializeObject(failures),
Status = status,
Report = report,
};
_producer.SendMessage(response, header);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,29 @@ public class CohortPackagerHostTest
{
private readonly TestDateTimeProvider _dateTimeProvider = new();

#region Fixture Methods

[OneTimeSetUp]
public void OneTimeSetUp()
{
TestLogger.Setup();
}

[OneTimeTearDown]
public void OneTimeTearDown() { }

#endregion

#region Test Methods

[SetUp]
public void SetUp() { }

[TearDown]
public void TearDown() { }

#endregion

#region Fixtures

// TODO(rkm 2020-12-17) Test if the old form of this is fixed in NUnit 3.13 (see https://github.com/nunit/nunit/issues/2574)
Expand Down Expand Up @@ -65,7 +88,7 @@ public void Dispose()

#endregion

#region Test Methods
#region Tests

private bool HaveFiles(PathFixtures pf) => Directory.Exists(pf.ProjReportsDirAbsolute) && Directory.EnumerateFiles(pf.ProjExtractDirAbsolute).Any();

Expand Down Expand Up @@ -127,11 +150,7 @@ private void VerifyReports(GlobalOptions globals, PathFixtures pf, ReportFormat
break;
}
}

#endregion

#region Tests


[TestCase(ReportFormat.Combined)]
[TestCase(ReportFormat.Split)]
public void Integration_HappyPath(ReportFormat reportFormat)
Expand Down Expand Up @@ -175,7 +194,7 @@ public void Integration_HappyPath(ReportFormat reportFormat)
ProjectNumber = "testProj1",
ExtractionJobIdentifier = jobId,
ExtractionDirectory = pf.ProjExtractDirRelative,
IsIdentifiable = false,
Status = VerifiedFileStatus.NotIdentifiable,
Report = "[]",
DicomFilePath = "series-1-orig-1.dcm",
};
Expand Down Expand Up @@ -267,7 +286,7 @@ public void Integration_BumpyRoad(ReportFormat reportFormat)
ProjectNumber = "testProj1",
ExtractionJobIdentifier = jobId,
ExtractionDirectory = pf.ProjExtractDirRelative,
IsIdentifiable = false,
Status = VerifiedFileStatus.NotIdentifiable,
Report = "[]",
DicomFilePath = "series-1-orig-1.dcm",
};
Expand All @@ -288,7 +307,7 @@ public void Integration_BumpyRoad(ReportFormat reportFormat)
ProjectNumber = "testProj1",
ExtractionJobIdentifier = jobId,
ExtractionDirectory = pf.ProjExtractDirRelative,
IsIdentifiable = true,
Status = VerifiedFileStatus.IsIdentifiable,
Report = failureReport,
DicomFilePath = "series-2-orig-2.dcm",
};
Expand Down Expand Up @@ -361,13 +380,13 @@ public void Integration_IdentifiableExtraction_HappyPath()
var testExtractFileStatusMessage2 = new ExtractedFileStatusMessage
{
JobSubmittedAt = _dateTimeProvider.UtcNow(),
OutputFilePath = "src_missing.dcm",
OutputFilePath = null,
ProjectNumber = "testProj1",
ExtractionJobIdentifier = jobId,
ExtractionDirectory = pf.ProjExtractDirRelative,
Status = ExtractedFileStatus.FileMissing,
StatusMessage = null,
DicomFilePath = "study-1-orig-2.dcm",
StatusMessage = "Couldn't find src_missing.dcm",
DicomFilePath = "src_missing.dcm",
IsIdentifiableExtraction = true,
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public void TestPersistMessageToStore_IsIdentifiableMessage()
// Report needs to contain content if marked as IsIdentifiable
message = new ExtractedFileVerificationMessage();
message.OutputFilePath = "anon.dcm";
message.IsIdentifiable = true;
message.Status = VerifiedFileStatus.IsIdentifiable;
message.Report = "[]";
Assert.Throws<ApplicationException>(() => testExtractJobStore.PersistMessageToStore(message, header));
// NOTE(rkm 2020-07-23) The actual report content is verified to be valid the message consumer, so don't need to re-check here
Expand All @@ -98,7 +98,7 @@ public void TestPersistMessageToStore_IsIdentifiableMessage()
// Report can be empty if not marked as IsIdentifiable
message = new ExtractedFileVerificationMessage();
message.OutputFilePath = "anon.dcm";
message.IsIdentifiable = false;
message.Status = VerifiedFileStatus.NotIdentifiable;
message.Report = "[]";
testExtractJobStore.PersistMessageToStore(message, header);
}
Expand Down
Loading