diff --git a/.github/workflows/dotnet-core.yml b/.github/workflows/dotnet-core.yml
index 67d2adec..9f51c721 100644
--- a/.github/workflows/dotnet-core.yml
+++ b/.github/workflows/dotnet-core.yml
@@ -87,13 +87,22 @@ jobs:
mv target/nerd-0.0.1-SNAPSHOT.jar ../../release/smi-nerd-$(fgrep AssemblyVersion ../../SharedAssemblyInfo.cs | cut -d'"' -f2).jar
- name: Package ii binary
run: |
+ tag="$(fgrep AssemblyVersion SharedAssemblyInfo.cs | cut -d'"' -f2)"
cd ii
dotnet publish --runtime win-x64 -c Release --self-contained true -o ../ii-win-x64
- dotnet publish --runtime linux-x64 -c Release --self-contained true -o ../ii-$(fgrep AssemblyVersion ../SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64
+ dotnet publish --runtime linux-x64 -c Release --self-contained true -o ../ii-"$tag"-cli-linux-x64
cd ../ii-win-x64
- zip -q9r ../release/ii-$(fgrep AssemblyVersion ../SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-win-x64.zip .
+ zip -q9r ../release/ii-"$tag"-cli-win-x64.zip .
cd ..
- tar -zcvf ./release/ii-$(fgrep AssemblyVersion SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64.tar.gz ii-$(fgrep AssemblyVersion SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64
+ tar -zcvf ./release/ii-"$tag"-cli-linux-x64.tar.gz ii-"$tag"-cli-linux-x64
+ git_tag="$(git rev-parse --short HEAD)"
+ cp ./release/ii-"$tag"-cli-linux-x64.tar.gz ./ii-"$git_tag"-cli-linux-x64.tar.gz
+ - name: Store created ii binary
+ uses: actions/upload-artifact@v3
+ with:
+ name: ii
+ path: "*.tar.gz"
+ retention-days: 1
- name: Test ii binary
run: |
set -e
@@ -122,6 +131,7 @@ jobs:
- name: Store created nupkg files
uses: actions/upload-artifact@v3
with:
+ name: IsIdentifiablePlugin
path: release/IsIdentifiablePlugin.*.nupkg
retention-days: 1
- name: Upload binaries to release
diff --git a/IsIdentifiable/Failures/FailurePart.cs b/IsIdentifiable/Failures/FailurePart.cs
index 9e3d14fc..21436991 100644
--- a/IsIdentifiable/Failures/FailurePart.cs
+++ b/IsIdentifiable/Failures/FailurePart.cs
@@ -1,4 +1,4 @@
-using Equ;
+using Equ;
namespace IsIdentifiable.Failures;
@@ -71,4 +71,7 @@ public bool Includes(int start, int length)
return false;
}
+
+ ///
+ public override string ToString() => $"{nameof(FailurePart)}({Word},{Offset},{Classification})";
}
diff --git a/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs b/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs
new file mode 100644
index 00000000..132a92ff
--- /dev/null
+++ b/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs
@@ -0,0 +1,53 @@
+using CommandLine;
+using IsIdentifiable.Reporting.Reports;
+using System;
+
+namespace IsIdentifiable.Options;
+
+///
+/// CLI options for the validator
+///
+[Verb("validate", HelpText = "Validate a FailureStoreReport")]
+public class IsIdentifiableReportValidatorOptions
+{
+ ///
+ /// The CSV list of failures to process. Must be in the format of a
+ ///
+ [Option('f', "file",
+ Required = true,
+ HelpText = "Pre load an existing failures file"
+ )]
+ public string FailuresCsv { get; set; }
+
+ ///
+ /// Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console)
+ ///
+ [Option("usc", HelpText = "Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console)")]
+ public bool UseSystemConsole { get; internal set; }
+
+ ///
+ /// Sets the user interface to use a specific color palette yaml file
+ ///
+ [Option("theme", HelpText = "Sets the user interface to use a specific color palette yaml file")]
+ public string Theme { get; set; }
+
+ ///
+ /// Stop after the first error encountered
+ ///
+ [Option("stop-at-first-error", Required = false, Default = false, HelpText = "Stop after the first error encountered")]
+ public bool StopAtFirstError { get; set; }
+
+
+ ///
+ /// Populates values in this instance where no value yet exists and there is a value in
+ /// to inherit.
+ ///
+ ///
+ public virtual void InheritValuesFrom(IsIdentifiableReviewerOptions globalOpts)
+ {
+ ArgumentNullException.ThrowIfNull(globalOpts);
+
+ if (Theme == null && !string.IsNullOrWhiteSpace(globalOpts.Theme))
+ Theme = globalOpts.Theme;
+ }
+}
diff --git a/IsIdentifiable/Redacting/OutBase.cs b/IsIdentifiable/Redacting/OutBase.cs
index 6757fb21..94419797 100644
--- a/IsIdentifiable/Redacting/OutBase.cs
+++ b/IsIdentifiable/Redacting/OutBase.cs
@@ -23,6 +23,11 @@ public abstract class OutBase
///
public List Rules { get; }
+ ///
+ /// Temp -- do not use.
+ ///
+ public readonly List PartRules_Temp = new();
+
///
/// Persistence of
///
@@ -68,8 +73,11 @@ protected OutBase(IFileInfo rulesFile, IFileSystem fileSystem, string defaultFil
else
{
//populated rules file already existed
- var deserializer = new Deserializer();
- Rules = deserializer.Deserialize>(existingRules) ?? new List();
+ var builder = new DeserializerBuilder();
+ builder.WithTagMapping("!IgnorePartRegexRule", typeof(PartPatternFilterRule));
+ var allRules = builder.Build().Deserialize>(existingRules) ?? new List();
+ Rules = allRules.OfType().ToList();
+ PartRules_Temp = allRules.OfType().ToList();
}
}
}
diff --git a/IsIdentifiable/Redacting/ReportReader.cs b/IsIdentifiable/Redacting/ReportReader.cs
index baf97dca..99780bbe 100644
--- a/IsIdentifiable/Redacting/ReportReader.cs
+++ b/IsIdentifiable/Redacting/ReportReader.cs
@@ -1,6 +1,8 @@
using IsIdentifiable.Failures;
using IsIdentifiable.Reporting.Reports;
+using IsIdentifiable.Rules;
using System;
+using System.Collections.Generic;
using System.IO.Abstractions;
using System.Linq;
using System.Threading;
@@ -52,10 +54,10 @@ public ReportReader(IFileInfo csvFile)
///
///
///
- public ReportReader(IFileInfo csvFile, Action loadedRows, IFileSystem fileSystem, CancellationToken token)
+ public ReportReader(IFileInfo csvFile, Action loadedRows, IFileSystem fileSystem, CancellationToken token, List? partRules = null)
{
var report = new FailureStoreReport("", 0, fileSystem);
- Failures = FailureStoreReport.Deserialize(csvFile, loadedRows, token).ToArray();
+ Failures = FailureStoreReport.Deserialize(csvFile, loadedRows, token, partRules).ToArray();
}
///
@@ -77,9 +79,11 @@ public bool Next()
/// by the total number of
///
///
- public void GoTo(int index)
+ public bool GoTo(int index)
{
+ var original = _current;
_current = Math.Min(Math.Max(0, index), Failures.Length);
+ return _current != original && (_current != Failures.Length);
}
///
@@ -87,8 +91,5 @@ public void GoTo(int index)
/// the is.
///
///
- public string DescribeProgress()
- {
- return $"{_current}/{Failures.Length}";
- }
+ public string DescribeProgress() => $"{_current + 1}/{Failures.Length}";
}
diff --git a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs
index c77146f8..9a37a9e4 100644
--- a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs
+++ b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs
@@ -2,12 +2,17 @@
using IsIdentifiable.Failures;
using IsIdentifiable.Options;
using IsIdentifiable.Reporting.Destinations;
+using IsIdentifiable.Rules;
using System;
+using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Data;
using System.IO.Abstractions;
using System.Linq;
+using System.Net;
+using System.Text.RegularExpressions;
using System.Threading;
+using System.Threading.Tasks;
namespace IsIdentifiable.Reporting.Reports;
@@ -128,46 +133,220 @@ public static IEnumerable Deserialize(IFileInfo oldFile)
/// Action to call periodically as records are read from the file (for
/// when the file is very big and you want to show progress etc)
/// Cancellation token for aborting the file deserialication (and closing the file again)
+ ///
+ ///
+ ///
///
///
- public static IEnumerable Deserialize(IFileInfo oldFile, Action loadedRows, CancellationToken token)
+ public static IEnumerable Deserialize(IFileInfo oldFile, Action loadedRows, CancellationToken token, IEnumerable? partRules = null, bool runParallel = true, bool stopAtFirstError = false)
{
- var lineNumber = 0;
+ partRules ??= new List();
using var stream = oldFile.OpenRead();
using var sr = new System.IO.StreamReader(stream);
- using var r = new CsvReader(sr, System.Globalization.CultureInfo.CurrentCulture);
- if (r.Read())
- r.ReadHeader();
+ using var reader = new CsvReader(sr, System.Globalization.CultureInfo.CurrentCulture);
+ if (reader.Read())
+ reader.ReadHeader();
else
- yield break;
- lineNumber++;
- // "Resource", "ResourcePrimaryKey", "ProblemField", "ProblemValue", "PartWords", "PartClassifications", "PartOffsets"
+ return Enumerable.Empty();
- while (r.Read())
+ int totalProcessed = 0;
+ var localTokenSource = new CancellationTokenSource();
+ var failures = new ConcurrentBag();
+
+ if (runParallel)
{
- token.ThrowIfCancellationRequested();
- lineNumber++;
- var words = r["PartWords"].Split(Separator);
- var classes = r["PartClassifications"].Split(Separator);
- var offsets = r["PartOffsets"].Split(Separator);
-
- var parts = words.Select((t, i) => new FailurePart(
- t,
- Enum.TryParse(classes[i], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[i]}' on line {lineNumber}"),
- int.TryParse(offsets[i], out var offset) ? offset : throw new Exception($"Invalid offset '{offsets[i]}' on line {lineNumber}"))).ToList();
- yield return new Failure(parts)
+ using var timerTask = Task.Run(
+ async () =>
+ {
+ while (!token.IsCancellationRequested && !localTokenSource.Token.IsCancellationRequested)
+ {
+ loadedRows(totalProcessed);
+ await Task.Delay(TimeSpan.FromSeconds(0.1), token);
+ }
+ },
+ token
+ );
+
+ try
+ {
+ Parallel.ForEach(
+ reader.GetRecords(),
+ new ParallelOptions
+ {
+ CancellationToken = token,
+ },
+ (FailureStoreReportRecord row) => Process(row, partRules, failures, ref totalProcessed)
+ );
+ }
+ finally
{
- Resource = r["Resource"],
- ResourcePrimaryKey = r["ResourcePrimaryKey"],
- ProblemField = r["ProblemField"],
- ProblemValue = r["ProblemValue"],
- };
-
- if (lineNumber % 1000 == 0)
- loadedRows(lineNumber);
+ localTokenSource.Cancel();
+ timerTask.Wait();
+ }
}
+ else
+ {
+ var problems = 0;
+ foreach (var row in reader.GetRecords())
+ {
+ try
+ {
+ Process(row, partRules, failures, ref totalProcessed);
+ }
+ catch (Exception e)
+ {
+ if (stopAtFirstError)
+ {
+ Console.Error.WriteLine($"{row}:");
+ Console.Error.WriteLine(e);
+ throw;
+ }
+ else
+ {
+ Console.Error.WriteLine($"{row}:\n{e.Message}\n");
+ problems++;
+ }
+ }
+ }
+
+ Console.WriteLine($"Problem with {problems}/{totalProcessed} records");
+ }
+
+ loadedRows(totalProcessed);
+
+ return failures;
+ }
+
+ private static void Process(FailureStoreReportRecord row, IEnumerable? partRules, ConcurrentBag failures, ref int totalProcessed)
+ {
+ if (row.ProblemValue == null)
+ throw new Exception("ProblemValue was null");
+
+ var words = row.PartWords.Split(Separator);
+ var classes = row.PartClassifications.Split(Separator);
+ var offsets = row.PartOffsets.Split(Separator);
+
+ var parts = words.Select(
+ (word, index) => new FailurePart(
+ word,
+ Enum.TryParse(classes[index], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[index]}'"),
+ int.TryParse(offsets[index], out var offset) ? offset : throw new Exception($"Invalid offset '{row.PartOffsets}'")
+ )
+ ).ToList();
+
+ if (row.ProblemField != "PixelData")
+ {
+ // Fixes any offsets that have been mangled by file endings etc.
+ foreach (var part in parts)
+ {
+ try
+ {
+ if (row.ProblemValue.Substring(part.Offset, part.Word.Length) == part.Word)
+ continue;
+ }
+ catch (ArgumentOutOfRangeException) { }
+
+ if (!row.ProblemValue.Contains(part.Word))
+ {
+ bool fixableOffset = false;
+
+ // Test if the ProblemValue has been HTML escaped
+ var encodedPartWord = WebUtility.HtmlEncode(part.Word);
+
+ // Test if the ProblemValue has hidden unicode symbols
+ var withoutInvisible = Regex.Replace(row.ProblemValue, @"\p{C}+", string.Empty);
+
+ if (row.ProblemValue.Contains(encodedPartWord))
+ {
+ part.Word = encodedPartWord;
+ fixableOffset = true;
+ }
+ else if (withoutInvisible.Contains(part.Word))
+ {
+ row.ProblemValue = withoutInvisible;
+ fixableOffset = true;
+ }
+
+ if (!fixableOffset)
+ throw new ArgumentOutOfRangeException($"Could not find any variation of '{part.Word}' in the ProblemValue");
+ }
+
+ // Finally, try shifting the offset around to find the word
+ try
+ {
+ FixupOffsets(row, part);
+ }
+ catch (ArgumentOutOfRangeException e)
+ {
+ throw new Exception($"Could not fixup Offset of {part} in:\n{row}", e);
+ }
+ }
+ }
+
+ /* TEMP - Filter out any FailureParts covered by an PartPatternFilterRule */
+ var toRemove = new List();
+ foreach (var partRule in partRules)
+ {
+ if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, row.ProblemField, StringComparison.InvariantCultureIgnoreCase))
+ continue;
+
+ foreach (var part in parts.Where(x => partRule.Covers(x, row.ProblemValue)))
+ {
+ toRemove.Add(part);
+ partRule.IncrementUsed();
+ }
+ }
+ parts = parts.Except(toRemove).ToList();
+ /* TEMP */
+
+ if (parts.Any())
+ failures.Add(new Failure(parts)
+ {
+ Resource = row.Resource,
+ ResourcePrimaryKey = row.ResourcePrimaryKey,
+ ProblemField = row.ProblemField,
+ ProblemValue = row.ProblemValue,
+ });
+
+ Interlocked.Increment(ref totalProcessed);
+ }
+
+ private static void FixupOffsets(FailureStoreReportRecord row, FailurePart part)
+ {
+ // Try looking ahead first, then back
+ var origOffset = part.Offset;
+
+ try
+ {
+ while (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word)
+ part.Offset++;
+ }
+ catch (ArgumentOutOfRangeException)
+ {
+ part.Offset = origOffset;
+
+ if (part.Offset + part.Word.Length >= row.ProblemValue.Length)
+ part.Offset = row.ProblemValue.Length - part.Word.Length;
+
+ while (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word)
+ part.Offset--;
+ }
+ }
+
+ internal class FailureStoreReportRecord
+ {
+ public string Resource { get; init; }
+ public string ResourcePrimaryKey { get; init; }
+ public string ProblemField { get; init; }
+
+ // NOTE(rkm 2023-12-07) Allow modification to deal with certain edge cases
+ public string ProblemValue { get; set; }
+
+ public string PartWords { get; init; }
+ public string PartClassifications { get; init; }
+ public string PartOffsets { get; init; }
- loadedRows(lineNumber);
+ public override string ToString() => $"Failure({Resource}|{ResourcePrimaryKey}|{ProblemField}|{ProblemValue}|{PartWords}|{PartClassifications}|{PartOffsets})";
}
}
diff --git a/IsIdentifiable/Rules/PartPatternFilterRule.cs b/IsIdentifiable/Rules/PartPatternFilterRule.cs
new file mode 100644
index 00000000..6881e2e9
--- /dev/null
+++ b/IsIdentifiable/Rules/PartPatternFilterRule.cs
@@ -0,0 +1,116 @@
+using IsIdentifiable.Failures;
+using System;
+using System.Linq;
+using System.Text.RegularExpressions;
+
+namespace IsIdentifiable.Rules;
+
+public class PartPatternFilterRule : RegexRule
+{
+ ///
+ /// Combination of and . Use this to validate
+ /// whether the rule should be applied.
+ ///
+ protected Regex IfPartPatternRegex;
+ private string _ifPartPatternString;
+
+ ///
+ /// The Regex pattern which should be used to match values a specific failing part
+ ///
+ public string IfPartPattern
+ {
+ get => _ifPartPatternString;
+ set
+ {
+ _ifPartPatternString = value;
+ RebuildPartRegex();
+ }
+ }
+
+ ///
+ /// Whether the IfPattern and IfPartPattern are case sensitive (default is false)
+ ///
+ public override bool CaseSensitive
+ {
+ get => base.CaseSensitive;
+ set
+ {
+ base.CaseSensitive = value;
+ RebuildPartRegex();
+ }
+ }
+
+ public string WordBefore { get; set; }
+
+ public string WordAfter { get; set; }
+
+ private Regex? _wordBeforeRegex;
+ private Regex? _wordAfterRegex;
+
+ private int _usedCount = 0;
+ private object _usedCountLock = new();
+
+ public int UsedCount
+ {
+ get => _usedCount;
+ }
+
+ public void IncrementUsed()
+ {
+ lock (_usedCountLock)
+ {
+ ++_usedCount;
+ }
+ }
+
+ // TODO(rkm 2023-07-25) Shouldn't be needed when IfPattern is readonly
+ private void RebuildPartRegex()
+ {
+ if (_ifPartPatternString == null)
+ throw new Exception("Illegal rule setup. You must specify IfPartPattern");
+
+ if (!_ifPartPatternString.StartsWith("^") || !_ifPartPatternString.EndsWith("$"))
+ throw new ArgumentException("IfPartPattern must be enclosed by ^ and $");
+
+ IfPartPatternRegex = new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled);
+ }
+
+ public bool Covers(FailurePart failurePart, string problemValue)
+ {
+ if (As != FailureClassification.None && As != failurePart.Classification)
+ return false;
+
+ bool matchesBefore = false;
+ if (!string.IsNullOrWhiteSpace(WordBefore))
+ {
+ var problemValueUpToOffset = problemValue[..(failurePart.Offset + failurePart.Word.Length)];
+ _wordBeforeRegex ??= new Regex(@$"\b{WordBefore}(\s|-)+{IfPartPattern.TrimStart('^')}", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled);
+ matchesBefore = _wordBeforeRegex.Matches(problemValueUpToOffset).Any();
+ }
+
+ bool matchesAfter = false;
+ if (!string.IsNullOrWhiteSpace(WordAfter))
+ {
+ var problemValueFromOffset = problemValue[failurePart.Offset..];
+ _wordAfterRegex ??= new Regex(@$"{IfPartPattern.TrimEnd('$')}(\s|-)+{WordAfter}\b", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled);
+ matchesAfter = _wordAfterRegex.Matches(problemValueFromOffset).Any();
+ }
+
+ if (
+ matchesBefore && string.IsNullOrWhiteSpace(WordAfter) ||
+ matchesAfter && string.IsNullOrWhiteSpace(WordBefore) ||
+ (matchesBefore && matchesAfter)
+ )
+ {
+ return true;
+ }
+ else if (!string.IsNullOrWhiteSpace(WordBefore) || !string.IsNullOrWhiteSpace(WordAfter))
+ {
+ return false;
+ }
+
+ return IfPartPatternRegex.Matches(failurePart.Word).Any();
+ }
+
+ public override string ToString() => $"Pat:'{_ifPartPatternString}' WB:'{WordBefore}' WA:'{WordAfter}' Col:'{IfColumn}' As:'{As}' x{_usedCount:N0}";
+}
diff --git a/Tests/Directory.Build.props b/Tests/Directory.Build.props
index f6bf3b83..b39947e6 100644
--- a/Tests/Directory.Build.props
+++ b/Tests/Directory.Build.props
@@ -7,10 +7,12 @@
runtime; build; native; contentfiles; analyzers; buildtransitive
+
all
runtime; build; native; contentfiles; analyzers; buildtransitive
+
-
\ No newline at end of file
+
diff --git a/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj b/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj
index f3b20ece..00655aff 100644
--- a/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj
+++ b/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj
@@ -14,13 +14,6 @@
PreserveNewest
-
-
-
-
-
-
-
diff --git a/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs b/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs
new file mode 100644
index 00000000..6c3e21a8
--- /dev/null
+++ b/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs
@@ -0,0 +1,158 @@
+using IsIdentifiable.Failures;
+using IsIdentifiable.Rules;
+using MongoDB.Driver.Linq;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace IsIdentifiable.Tests.Rules;
+
+internal class PartPatternFilterRuleTests
+{
+ private static IEnumerable TestCaseSource_ForamenMonroParts()
+ {
+ var parts = new List();
+ foreach (var prefix in new[] { "foramen", "foramina" })
+ {
+ foreach (var join in new[] { "of", "" })
+ {
+ foreach (var name in new[] { "monro", "monroe" })
+ {
+ parts.Add(string.Join(" ", (new[] { prefix, join, name }).Where(x => !string.IsNullOrEmpty(x))));
+ }
+ }
+ }
+ return parts;
+ }
+
+ [TestCaseSource(nameof(TestCaseSource_ForamenMonroParts))]
+ public void Covers_ForamenMonro(string valuePart)
+ {
+ // Arrange
+ var rule = new PartPatternFilterRule()
+ {
+ IfPartPattern = "^Monroe?$",
+ WordBefore = "(foramen|foramina)( of)?",
+ IfColumn = "TextValue",
+ As = FailureClassification.Person,
+ Action = RuleAction.Ignore,
+ };
+ var name = valuePart.Split()[^1];
+ var problemValue = $"Mr {name} has an issue with his {valuePart}";
+ var validFailurePart = new FailurePart(name, FailureClassification.Person, 3);
+ var problemOffset = problemValue.LastIndexOf(" ") + 1;
+ var filteredFailurePart = new FailurePart(name, FailureClassification.Person, problemOffset);
+
+ // Act
+ var coversValidFailurePart = rule.Covers(validFailurePart, problemValue);
+ var coversFilteredFailurePart = rule.Covers(filteredFailurePart, problemValue);
+
+ // Assert
+ Assert.False(coversValidFailurePart);
+ Assert.True(coversFilteredFailurePart);
+ }
+
+ private static IEnumerable TestCaseSource_HodgkinLymphomaParts()
+ {
+ var parts = new List();
+ foreach (var name in new[] { "hodgkin", "hodgkins", "hodgkin's" })
+ {
+ foreach (var postfix in new[] { "lymphoma", "disease" })
+ {
+ parts.Add(string.Join(" ", (new[] { name, postfix }).Where(x => !string.IsNullOrEmpty(x))));
+ }
+ }
+ return parts;
+ }
+
+ [TestCaseSource(nameof(TestCaseSource_HodgkinLymphomaParts))]
+ public void Covers_HodgkinLymphoma(string valuePart)
+ {
+ // Arrange
+ var rule = new PartPatternFilterRule()
+ {
+ Action = RuleAction.Ignore,
+ As = FailureClassification.Person,
+ IfColumn = "TextValue",
+ IfPartPattern = "^Hodgkin(s|'s)?$",
+ WordAfter = "(lymphoma|disease|
lymphoma)",
+ };
+ var name = valuePart.Split()[0];
+ var problemValue = $"Mr {name} possibly has {valuePart}";
+ var validFailurePart = new FailurePart(name, FailureClassification.Person, 3);
+ var problemOffset = problemValue.IndexOf($"has {name}") + 4;
+ var filteredFailurePart = new FailurePart(name, FailureClassification.Person, problemOffset);
+
+ // Act
+ var coversValidFailurePart = rule.Covers(validFailurePart, problemValue);
+ var coversFilteredFailurePart = rule.Covers(filteredFailurePart, problemValue);
+
+ // Assert
+ Assert.False(coversValidFailurePart);
+ Assert.True(coversFilteredFailurePart);
+ }
+
+ [Test]
+ public void Covers_HyphenInWordBefore()
+ {
+ // Arrange
+ var rule = new PartPatternFilterRule()
+ {
+ IfPartPattern = "^Hodgkin$",
+ WordBefore = "Non",
+ IfColumn = "TextValue",
+ As = FailureClassification.Person,
+ Action = RuleAction.Ignore,
+ };
+ var problemValue = $"Non-Hodgkin's lymphoma";
+ var failurePart = new FailurePart("Hodgkin", FailureClassification.Person, 4);
+
+ // Act
+ var ruleCoversFailurePart = rule.Covers(failurePart, problemValue);
+
+ // Assert
+ Assert.True(ruleCoversFailurePart);
+ }
+
+ [Test]
+ public void Covers_HyphenInWordAfter()
+ {
+ // Arrange
+ var rule = new PartPatternFilterRule()
+ {
+ IfPartPattern = "^Gr(a|e)y$",
+ WordAfter = "white",
+ IfColumn = "TextValue",
+ As = FailureClassification.Person,
+ Action = RuleAction.Ignore,
+ };
+ var problemValue = $"Gray-white foo";
+ var failurePart = new FailurePart("Gray", FailureClassification.Person, 0);
+
+ // Act
+ var ruleCoversFailurePart = rule.Covers(failurePart, problemValue);
+
+ // Assert
+ Assert.True(ruleCoversFailurePart);
+ }
+
+ [Test]
+ public void Covers_AnyFailureClassification()
+ {
+ // Arrange
+ var rule = new PartPatternFilterRule()
+ {
+ IfPartPattern = "^Test$",
+ IfColumn = "TextValue",
+ Action = RuleAction.Ignore,
+ };
+ var problemValue = $"Test";
+ var failurePart = new FailurePart("Test", FailureClassification.Person, 0);
+
+ // Act
+ var ruleCoversFailurePart = rule.Covers(failurePart, problemValue);
+
+ // Assert
+ Assert.True(ruleCoversFailurePart);
+ }
+}
diff --git a/Tests/IsIdentifiableTests/StoreReportTests.cs b/Tests/IsIdentifiableTests/StoreReportTests.cs
index b0ecff25..1ddfb2ef 100644
--- a/Tests/IsIdentifiableTests/StoreReportTests.cs
+++ b/Tests/IsIdentifiableTests/StoreReportTests.cs
@@ -34,7 +34,7 @@ public void TestReconstructionFromCsv()
var failure = new Failure(
new FailurePart[]
{
- new("Kansas", FailureClassification.Location, 12),
+ new("Kansas", FailureClassification.Location, 13),
new("Toto", FailureClassification.Location, 28)
})
{
diff --git a/ii/Constants.cs b/ii/Constants.cs
index bd6b3834..0cef015b 100644
--- a/ii/Constants.cs
+++ b/ii/Constants.cs
@@ -5,12 +5,12 @@ public static class Constants
///
/// Width of modal popup dialogues
///
- public const int DlgWidth = 78;
+ public const int DlgWidth = 120;
///
/// Height of modal popup dialogues
///
- public const int DlgHeight = 18;
+ public const int DlgHeight = 60;
///
/// Border boundary of modal popup dialogues
diff --git a/ii/MainWindow.cs b/ii/MainWindow.cs
index 2d42b68f..1c25b2e8 100644
--- a/ii/MainWindow.cs
+++ b/ii/MainWindow.cs
@@ -6,6 +6,7 @@
using IsIdentifiable.Rules;
using System;
using System.Collections.Generic;
+using System.IO;
using System.IO.Abstractions;
using System.Linq;
using System.Text;
@@ -33,7 +34,7 @@ internal class MainWindow : IRulePatternFactory, IDisposable
///
public RowUpdater Updater { get; }
- private readonly FailureView _valuePane;
+ private readonly FailureView _failureView;
private readonly Label _info;
private readonly SpinnerView _spinner;
private readonly TextField _gotoTextField;
@@ -86,7 +87,8 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti
Menu = new MenuBar(new MenuBarItem[] {
new("_File (F9)", new MenuItem [] {
new("_Open Report",null, OpenReport),
- new("_Quit", null, static () => Application.RequestStop())
+ new("_Export 'Outstanding Failures'", null, ExportOutstandingFailures),
+ new("_Quit", null, static () => Application.RequestStop()),
}),
new("_Options", new MenuItem [] {
miCustomPatterns = new MenuItem("_Custom Patterns",null,ToggleCustomPatterns){CheckType = MenuItemCheckStyle.Checked,Checked = false}
@@ -107,20 +109,20 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti
ColorScheme = _greyOnBlack
};
- _valuePane = new FailureView()
+ _failureView = new FailureView()
{
X = 0,
Y = 1,
Width = Dim.Fill(),
- Height = 10,
+ Height = Dim.Fill(),
};
var frame = new FrameView("Options")
{
X = 0,
- Y = 12,
+ Y = Console.WindowHeight * 2 / 3,
Width = Dim.Fill(),
- Height = Dim.Fill()
+ Height = Dim.Fill(),
};
var ignoreButton = new Button("Ignore")
@@ -192,11 +194,16 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti
viewMain.Add(_spinner);
_spinner.Visible = false;
- viewMain.Add(_valuePane);
+ viewMain.Add(_failureView);
viewMain.Add(frame);
if (!string.IsNullOrWhiteSpace(opts.FailuresCsv))
- OpenReport(opts.FailuresCsv, (e) => throw e);
+ {
+ Exception? exc = null;
+ OpenReport(opts.FailuresCsv, (e) => exc = e);
+ if(exc != null)
+ Helpers.ShowException("Failed to Load", exc);
+ }
var tabView = new TabView()
{
@@ -283,20 +290,21 @@ private void GoTo(int page)
return;
try
{
- CurrentReport.GoTo(page);
- _info.Text = CurrentReport.DescribeProgress();
- SetupToShow(CurrentReport.Current);
+ if (CurrentReport.GoTo(page))
+ {
+ _info.Text = CurrentReport.DescribeProgress();
+ SetupToShow(CurrentReport.Current);
+ }
}
catch (Exception e)
{
Helpers.ShowException("Failed to GoTo", e);
}
-
}
private void SetupToShow(Failure? f)
{
- _valuePane.CurrentFailure = f;
+ _failureView.CurrentFailure = f;
if (f != null)
{
@@ -318,7 +326,7 @@ private void BeginNext()
private void Next()
{
- if (_valuePane.CurrentFailure == null || CurrentReport == null)
+ if (_failureView.CurrentFailure == null || CurrentReport == null)
return;
_spinner.Visible = true;
@@ -372,7 +380,7 @@ private void Next()
private void Ignore()
{
- if (_valuePane.CurrentFailure == null || CurrentReport == null)
+ if (_failureView.CurrentFailure == null || CurrentReport == null)
return;
if (taskToLoadNext != null && !taskToLoadNext.IsCompleted)
@@ -383,7 +391,7 @@ private void Ignore()
try
{
- Ignorer.Add(_valuePane.CurrentFailure);
+ Ignorer.Add(_failureView.CurrentFailure);
History.Push(new MainWindowHistory(CurrentReport.CurrentIndex, Ignorer));
}
catch (OperationCanceledException)
@@ -395,7 +403,7 @@ private void Ignore()
}
private void Update()
{
- if (_valuePane.CurrentFailure == null || CurrentReport == null)
+ if (_failureView.CurrentFailure == null || CurrentReport == null)
return;
if (taskToLoadNext != null && !taskToLoadNext.IsCompleted)
@@ -407,7 +415,7 @@ private void Update()
try
{
// TODO(rkm 2021-04-09) Server always passed as null here, but Update seems to require it?
- Updater.Update(null, _valuePane.CurrentFailure, null /*create one yourself*/);
+ Updater.Update(null, _failureView.CurrentFailure, null /*create one yourself*/);
History.Push(new MainWindowHistory(CurrentReport.CurrentIndex, Updater));
}
@@ -425,6 +433,24 @@ private void Update()
BeginNext();
}
+ private void ExportOutstandingFailures()
+ {
+ if (rulesView.OutstandingFiles == null)
+ {
+ Helpers.ShowMessage("Error", "You must evaluate the rules on a report first.");
+ return;
+ }
+
+ var now = DateTime.UtcNow.ToString("s").Replace(':', '-');
+ var fileName = $"OutstandingFiles-{now}.csv";
+ using var sw = new StreamWriter(fileName);
+
+ foreach (var file in rulesView.OutstandingFiles)
+ sw.WriteLine(file);
+
+ Helpers.ShowMessage("Complete", $"Wrote {rulesView.OutstandingFiles.Count} unique item(s) to {fileName}");
+ }
+
private void OpenReport()
{
using var ofd = new OpenDialog("Load CSV Report", "Enter file path to load")
@@ -474,12 +500,15 @@ private void OpenReport(string? path, Action exceptionHandler)
return !done;
});
+ _currentReportLabel.Text = $"Report:{_fileSystem.Path.GetFileName(path)}";
+ _currentReportLabel.SetNeedsDisplay();
+
Task.Run(() =>
{
try
{
CurrentReport = new ReportReader(_fileSystem.FileInfo.New(path), (s) =>
- rows.Text = $"Loaded: {s:N0} rows", _fileSystem, cts.Token);
+ rows.Text = $"Loaded: {s:N0} rows", _fileSystem, cts.Token, Ignorer.PartRules_Temp);
SetupToShow(CurrentReport.Failures.FirstOrDefault());
BeginNext();
@@ -489,6 +518,7 @@ private void OpenReport(string? path, Action exceptionHandler)
{
exceptionHandler(e);
rows.Text = "Error";
+ _currentReportLabel.Text = "Report: ";
}
}
@@ -503,9 +533,6 @@ private void OpenReport(string? path, Action exceptionHandler)
cts.Dispose();
});
- _currentReportLabel.Text = $"Report:{_fileSystem.Path.GetFileName(path)}";
- _currentReportLabel.SetNeedsDisplay();
-
Application.Run(dlg);
}
@@ -664,7 +691,7 @@ public string GetPattern(object sender, Failure failure)
public void Dispose()
{
- _valuePane.Dispose();
+ _failureView.Dispose();
_info.Dispose();
_spinner.Dispose();
_gotoTextField.Dispose();
diff --git a/ii/Program.cs b/ii/Program.cs
index 49f025cc..ee2c9215 100644
--- a/ii/Program.cs
+++ b/ii/Program.cs
@@ -6,11 +6,14 @@
using FAnsi.Implementations.PostgreSql;
using FellowOakDicom;
using IsIdentifiable.Options;
+using IsIdentifiable.Reporting.Reports;
using IsIdentifiable.Runners;
using Microsoft.Extensions.FileSystemGlobbing;
using System;
using System.IO.Abstractions;
using System.Linq;
+using System.Text.RegularExpressions;
+using System.Threading;
using YamlDotNet.Serialization;
namespace ii;
@@ -100,13 +103,15 @@ public static int Main(string[] args)
IsIdentifiableDicomFileOptions,
IsIdentifiableMongoOptions,
IsIdentifiableFileGlobOptions,
- IsIdentifiableReviewerOptions>(args)
+ IsIdentifiableReviewerOptions,
+ IsIdentifiableReportValidatorOptions>(args)
.MapResult(
(IsIdentifiableRelationalDatabaseOptions o) => Run(o, fileSystem),
(IsIdentifiableDicomFileOptions o) => Run(o, fileSystem),
(IsIdentifiableMongoOptions o) => Run(o, fileSystem),
(IsIdentifiableFileGlobOptions o) => Run(o, fileSystem),
(IsIdentifiableReviewerOptions o) => Run(o, fileSystem),
+ (IsIdentifiableReportValidatorOptions o) => Run(o, fileSystem),
// return exit code 0 for user requests for help
errors => args.Any(a => a.Equals("--help", StringComparison.InvariantCultureIgnoreCase)) ? 0 : 1);
@@ -127,10 +132,48 @@ private static int Run(IsIdentifiableReviewerOptions opts, IFileSystem fileSyste
{
Inherit(opts);
+ if (!fileSystem.File.Exists(opts.FailuresCsv))
+ {
+ Console.Error.WriteLine($"Error: Could not find {opts.FailuresCsv}");
+ return 1;
+ }
+
+ const string expectedHeader = "Resource,ResourcePrimaryKey,ProblemField,ProblemValue,PartWords,PartClassifications,PartOffsets";
+ var line = fileSystem.File.ReadLines(opts.FailuresCsv).FirstOrDefault();
+ if (line == null || Regex.Replace(line, @"\s+", "") != line)
+ {
+ Console.Error.WriteLine($"Error: Expected CSV Failure header {expectedHeader}");
+ return 1;
+ }
+
var reviewer = new ReviewerRunner(GlobalOptions?.IsIdentifiableOptions, opts, fileSystem);
return reviewer.Run();
}
+ private static int Run(IsIdentifiableReportValidatorOptions opts, IFileSystem fileSystem)
+ {
+ if (GlobalOptions?.IsIdentifiableReviewerOptions != null)
+ opts.InheritValuesFrom(GlobalOptions.IsIdentifiableReviewerOptions);
+
+ if (!fileSystem.File.Exists(opts.FailuresCsv))
+ {
+ Console.Error.WriteLine($"Error: Could not find {opts.FailuresCsv}");
+ return 1;
+ }
+
+ const string expectedHeader = "Resource,ResourcePrimaryKey,ProblemField,ProblemValue,PartWords,PartClassifications,PartOffsets";
+ var line = fileSystem.File.ReadLines(opts.FailuresCsv).FirstOrDefault();
+ if (line == null || Regex.Replace(line, @"\s+", "") != line)
+ {
+ Console.Error.WriteLine($"Error: Expected CSV Failure header {expectedHeader}");
+ return 1;
+ }
+
+ var report = new FailureStoreReport("", 0, fileSystem);
+ var failures = FailureStoreReport.Deserialize(fileSystem.FileInfo.New(opts.FailuresCsv), (_) => { }, new CancellationTokenSource().Token, partRules: null, runParallel: false, opts.StopAtFirstError).ToArray();
+
+ return 0;
+ }
private static int Run(IsIdentifiableDicomFileOptions opts, IFileSystem fileSystem)
{
diff --git a/ii/Views/FailureView.cs b/ii/Views/FailureView.cs
index e71cb336..225ea5fc 100644
--- a/ii/Views/FailureView.cs
+++ b/ii/Views/FailureView.cs
@@ -2,6 +2,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
+using System.Text;
using Terminal.Gui;
using Attribute = Terminal.Gui.Attribute;
@@ -26,16 +27,16 @@ public override void Redraw(Rect bounds)
var w = bounds.Width;
var h = bounds.Height;
- var toDisplay = CurrentFailure?.ProblemValue ?? " ";
+ var problemValue = CurrentFailure?.ProblemValue ?? " ";
//if the original string validated
var originalNewlines = new HashSet();
- for (var i = 0; i < toDisplay.Length; i++)
- if (toDisplay[i] == '\n')
+ for (var i = 0; i < problemValue.Length; i++)
+ if (problemValue[i] == '\n')
originalNewlines.Add(i);
- var lines = Helpers.Wrap(toDisplay, bounds.Width).Split('\n', StringSplitOptions.RemoveEmptyEntries);
+ var lines = Helpers.Wrap(problemValue, bounds.Width).Split('\n', StringSplitOptions.RemoveEmptyEntries);
var characterOffset = 0;
Attribute? oldColor = null;
@@ -78,19 +79,19 @@ public override void Redraw(Rect bounds)
}
}
- if (CurrentFailure != null)
- {
- Driver.SetAttribute(_attNormal);
- Move(0, h);
-
- var classification =
- $"C:{string.Join(",", CurrentFailure.Parts.Select(p => p.Classification).Distinct().ToArray())}";
+ if (CurrentFailure == null)
+ return;
- var field = CurrentFailure.ProblemField;
- classification = classification.PadRight(w - field.Length);
-
- Driver.AddStr(classification + field);
- }
+ Driver.SetAttribute(_attNormal);
+ Move(0, h);
+ var sb = new StringBuilder();
+ sb.Append($"ProblemField: {CurrentFailure.ProblemField}. ");
+ sb.Append($"Classifications: ");
+ foreach (var failurePart in CurrentFailure.Parts)
+ sb.Append($"'{failurePart.Word}' at {failurePart.Offset} => {failurePart.Classification}, ");
+ sb.Length -= 2;
+ sb.Append('.');
+ Driver.AddStr(sb.ToString().PadRight(w));
}
}
diff --git a/ii/Views/OutstandingFailureNode.cs b/ii/Views/OutstandingFailureNode.cs
index 8c02116f..36a067ef 100644
--- a/ii/Views/OutstandingFailureNode.cs
+++ b/ii/Views/OutstandingFailureNode.cs
@@ -1,4 +1,4 @@
-using IsIdentifiable.Failures;
+using IsIdentifiable.Failures;
using Terminal.Gui.Trees;
namespace ii.Views;
@@ -21,8 +21,5 @@ public OutstandingFailureNode(Failure failure, int numberOfTimesReported)
NumberOfTimesReported = numberOfTimesReported;
}
- public override string ToString()
- {
- return $"{Failure.ProblemValue} x{NumberOfTimesReported:N0}";
- }
+ public override string ToString() => $"({NumberOfTimesReported:N0}x) {Failure.ProblemValue}";
}
diff --git a/ii/Views/RulesView.cs b/ii/Views/RulesView.cs
index 51ceaacc..dc681d1e 100644
--- a/ii/Views/RulesView.cs
+++ b/ii/Views/RulesView.cs
@@ -17,6 +17,8 @@ class RulesView : View
public IgnoreRuleGenerator? Ignorer { get; private set; }
public RowUpdater? Updater { get; private set; }
+ public List? OutstandingFiles { get; private set; }
+
private readonly TreeView _treeView;
///
@@ -108,7 +110,7 @@ public void LoadReport(ReportReader currentReport, IgnoreRuleGenerator ignorer,
Updater = updater;
_bulkIgnorePatternFactory = bulkIgnorePatternFactory;
- _lblInitialSummary.Text = $"There are {ignorer.Rules.Count} ignore rules and {updater.Rules.Count} update rules. Current report contains {CurrentReport.Failures.Length:N0} Failures";
+ _lblInitialSummary.Text = $"There are {ignorer.Rules.Count - ignorer.PartRules_Temp.Count} ignore rules, {ignorer.PartRules_Temp.Count} PartPatternFilterRules, and {updater.Rules.Count} update rules. Current report contains {CurrentReport.Failures.Length:N0} Failures";
}
@@ -259,7 +261,14 @@ private void Activate(OutstandingFailureNode ofn)
using var cancel = new Button("Cancel");
cancel.Clicked += () => { Application.RequestStop(); };
- using var dlg = new Dialog("Failure", Constants.DlgWidth, Constants.DlgHeight, ignore, update, cancel);
+ using var dlg = new Dialog(
+ "Failure",
+ Math.Min(Constants.DlgWidth, Console.WindowWidth - (2 * Constants.DlgBoundary)),
+ Math.Min(Constants.DlgHeight, Console.WindowHeight - (2 * Constants.DlgBoundary)),
+ ignore,
+ update,
+ cancel
+ );
var lbl = new FailureView()
{
@@ -354,8 +363,16 @@ private void EvaluateRuleCoverage()
var colliding = new TreeNodeWithCount("Colliding Rules");
var ignore = new TreeNodeWithCount("Ignore Rules Used");
+
+ var partRulesused = new TreeNodeWithCount("IfPartPattern Rules Used") { OverrideCount = 0 };
+ foreach (var rule in Ignorer.PartRules_Temp.Where(x => x.UsedCount > 0).OrderByDescending(x => x.UsedCount))
+ {
+ partRulesused.OverrideCount += rule.UsedCount;
+ partRulesused.Children.Add(new TreeNode(rule.ToString()));
+ }
+
var update = new TreeNodeWithCount("Update Rules Used");
- var outstanding = new TreeNodeWithCount("Outstanding Failures");
+ var outstanding = new TreeNodeWithCount("Outstanding Failures", countSubChildren: true);
var allRules = Ignorer.Rules.Union(Updater.Rules).ToList();
@@ -401,7 +418,7 @@ private void EvaluateRuleCoverage()
cts.Dispose();
_treeView.RebuildTree();
- _treeView.AddObjects(new[] { colliding, ignore, update, outstanding });
+ _treeView.AddObjects(new[] { colliding, ignore, partRulesused, update, outstanding });
}, SynchronizationContext.Current);
Application.Run(dlg);
@@ -521,6 +538,8 @@ private void EvaluateRuleCoverageAsync(Label stage, ProgressBar progress, Label
.OrderByDescending(v => v.Failures.Sum(f => f.NumberOfTimesReported))
.Cast()
.ToList();
+
+ OutstandingFiles = outstandingFailures.Select(x => x.Value.Failure.Resource).Distinct().ToList();
}
private static void SetProgress(ProgressBar pb, View tp, int done, int max)
diff --git a/ii/Views/TreeNodeWithCount.cs b/ii/Views/TreeNodeWithCount.cs
index 143faabc..f2d0dbec 100644
--- a/ii/Views/TreeNodeWithCount.cs
+++ b/ii/Views/TreeNodeWithCount.cs
@@ -1,4 +1,5 @@
-using Terminal.Gui.Trees;
+using System.Linq;
+using Terminal.Gui.Trees;
namespace ii.Views;
@@ -6,13 +7,25 @@ internal class TreeNodeWithCount : TreeNode
{
public string Heading { get; }
- public TreeNodeWithCount(string heading)
+ private readonly bool _countSubChildren;
+
+ public int OverrideCount { get; set; } = -1;
+
+ public TreeNodeWithCount(string heading, bool countSubChildren = false)
{
Heading = heading;
+ _countSubChildren = countSubChildren;
}
public override string ToString()
{
- return $"{Heading} ({Children.Count:N0})";
+ var count = 0;
+ if (OverrideCount != -1)
+ count = OverrideCount;
+ else if (_countSubChildren)
+ count = Children.Sum(x => x.Children.Count);
+ else
+ count = Children.Count;
+ return $"{Heading} ({count:N0})";
}
}