diff --git a/.github/workflows/dotnet-core.yml b/.github/workflows/dotnet-core.yml index 67d2adec..9f51c721 100644 --- a/.github/workflows/dotnet-core.yml +++ b/.github/workflows/dotnet-core.yml @@ -87,13 +87,22 @@ jobs: mv target/nerd-0.0.1-SNAPSHOT.jar ../../release/smi-nerd-$(fgrep AssemblyVersion ../../SharedAssemblyInfo.cs | cut -d'"' -f2).jar - name: Package ii binary run: | + tag="$(fgrep AssemblyVersion SharedAssemblyInfo.cs | cut -d'"' -f2)" cd ii dotnet publish --runtime win-x64 -c Release --self-contained true -o ../ii-win-x64 - dotnet publish --runtime linux-x64 -c Release --self-contained true -o ../ii-$(fgrep AssemblyVersion ../SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64 + dotnet publish --runtime linux-x64 -c Release --self-contained true -o ../ii-"$tag"-cli-linux-x64 cd ../ii-win-x64 - zip -q9r ../release/ii-$(fgrep AssemblyVersion ../SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-win-x64.zip . + zip -q9r ../release/ii-"$tag"-cli-win-x64.zip . cd .. - tar -zcvf ./release/ii-$(fgrep AssemblyVersion SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64.tar.gz ii-$(fgrep AssemblyVersion SharedAssemblyInfo.cs|cut -d'"' -f2)-cli-linux-x64 + tar -zcvf ./release/ii-"$tag"-cli-linux-x64.tar.gz ii-"$tag"-cli-linux-x64 + git_tag="$(git rev-parse --short HEAD)" + cp ./release/ii-"$tag"-cli-linux-x64.tar.gz ./ii-"$git_tag"-cli-linux-x64.tar.gz + - name: Store created ii binary + uses: actions/upload-artifact@v3 + with: + name: ii + path: "*.tar.gz" + retention-days: 1 - name: Test ii binary run: | set -e @@ -122,6 +131,7 @@ jobs: - name: Store created nupkg files uses: actions/upload-artifact@v3 with: + name: IsIdentifiablePlugin path: release/IsIdentifiablePlugin.*.nupkg retention-days: 1 - name: Upload binaries to release diff --git a/IsIdentifiable/Failures/FailurePart.cs b/IsIdentifiable/Failures/FailurePart.cs index 9e3d14fc..21436991 100644 --- a/IsIdentifiable/Failures/FailurePart.cs +++ b/IsIdentifiable/Failures/FailurePart.cs @@ -1,4 +1,4 @@ -using Equ; +using Equ; namespace IsIdentifiable.Failures; @@ -71,4 +71,7 @@ public bool Includes(int start, int length) return false; } + + /// + public override string ToString() => $"{nameof(FailurePart)}({Word},{Offset},{Classification})"; } diff --git a/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs b/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs new file mode 100644 index 00000000..132a92ff --- /dev/null +++ b/IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs @@ -0,0 +1,53 @@ +using CommandLine; +using IsIdentifiable.Reporting.Reports; +using System; + +namespace IsIdentifiable.Options; + +/// +/// CLI options for the validator +/// +[Verb("validate", HelpText = "Validate a FailureStoreReport")] +public class IsIdentifiableReportValidatorOptions +{ + /// + /// The CSV list of failures to process. Must be in the format of a + /// + [Option('f', "file", + Required = true, + HelpText = "Pre load an existing failures file" + )] + public string FailuresCsv { get; set; } + + /// + /// Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console) + /// + [Option("usc", HelpText = "Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console)")] + public bool UseSystemConsole { get; internal set; } + + /// + /// Sets the user interface to use a specific color palette yaml file + /// + [Option("theme", HelpText = "Sets the user interface to use a specific color palette yaml file")] + public string Theme { get; set; } + + /// + /// Stop after the first error encountered + /// + [Option("stop-at-first-error", Required = false, Default = false, HelpText = "Stop after the first error encountered")] + public bool StopAtFirstError { get; set; } + + + /// + /// Populates values in this instance where no value yet exists and there is a value in + /// to inherit. + /// + /// + public virtual void InheritValuesFrom(IsIdentifiableReviewerOptions globalOpts) + { + ArgumentNullException.ThrowIfNull(globalOpts); + + if (Theme == null && !string.IsNullOrWhiteSpace(globalOpts.Theme)) + Theme = globalOpts.Theme; + } +} diff --git a/IsIdentifiable/Redacting/OutBase.cs b/IsIdentifiable/Redacting/OutBase.cs index 6757fb21..94419797 100644 --- a/IsIdentifiable/Redacting/OutBase.cs +++ b/IsIdentifiable/Redacting/OutBase.cs @@ -23,6 +23,11 @@ public abstract class OutBase /// public List Rules { get; } + /// + /// Temp -- do not use. + /// + public readonly List PartRules_Temp = new(); + /// /// Persistence of /// @@ -68,8 +73,11 @@ protected OutBase(IFileInfo rulesFile, IFileSystem fileSystem, string defaultFil else { //populated rules file already existed - var deserializer = new Deserializer(); - Rules = deserializer.Deserialize>(existingRules) ?? new List(); + var builder = new DeserializerBuilder(); + builder.WithTagMapping("!IgnorePartRegexRule", typeof(PartPatternFilterRule)); + var allRules = builder.Build().Deserialize>(existingRules) ?? new List(); + Rules = allRules.OfType().ToList(); + PartRules_Temp = allRules.OfType().ToList(); } } } diff --git a/IsIdentifiable/Redacting/ReportReader.cs b/IsIdentifiable/Redacting/ReportReader.cs index baf97dca..99780bbe 100644 --- a/IsIdentifiable/Redacting/ReportReader.cs +++ b/IsIdentifiable/Redacting/ReportReader.cs @@ -1,6 +1,8 @@ using IsIdentifiable.Failures; using IsIdentifiable.Reporting.Reports; +using IsIdentifiable.Rules; using System; +using System.Collections.Generic; using System.IO.Abstractions; using System.Linq; using System.Threading; @@ -52,10 +54,10 @@ public ReportReader(IFileInfo csvFile) /// /// /// - public ReportReader(IFileInfo csvFile, Action loadedRows, IFileSystem fileSystem, CancellationToken token) + public ReportReader(IFileInfo csvFile, Action loadedRows, IFileSystem fileSystem, CancellationToken token, List? partRules = null) { var report = new FailureStoreReport("", 0, fileSystem); - Failures = FailureStoreReport.Deserialize(csvFile, loadedRows, token).ToArray(); + Failures = FailureStoreReport.Deserialize(csvFile, loadedRows, token, partRules).ToArray(); } /// @@ -77,9 +79,11 @@ public bool Next() /// by the total number of /// /// - public void GoTo(int index) + public bool GoTo(int index) { + var original = _current; _current = Math.Min(Math.Max(0, index), Failures.Length); + return _current != original && (_current != Failures.Length); } /// @@ -87,8 +91,5 @@ public void GoTo(int index) /// the is. /// /// - public string DescribeProgress() - { - return $"{_current}/{Failures.Length}"; - } + public string DescribeProgress() => $"{_current + 1}/{Failures.Length}"; } diff --git a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs index c77146f8..9a37a9e4 100644 --- a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs +++ b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs @@ -2,12 +2,17 @@ using IsIdentifiable.Failures; using IsIdentifiable.Options; using IsIdentifiable.Reporting.Destinations; +using IsIdentifiable.Rules; using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Data; using System.IO.Abstractions; using System.Linq; +using System.Net; +using System.Text.RegularExpressions; using System.Threading; +using System.Threading.Tasks; namespace IsIdentifiable.Reporting.Reports; @@ -128,46 +133,220 @@ public static IEnumerable Deserialize(IFileInfo oldFile) /// Action to call periodically as records are read from the file (for /// when the file is very big and you want to show progress etc) /// Cancellation token for aborting the file deserialication (and closing the file again) + /// + /// + /// /// /// - public static IEnumerable Deserialize(IFileInfo oldFile, Action loadedRows, CancellationToken token) + public static IEnumerable Deserialize(IFileInfo oldFile, Action loadedRows, CancellationToken token, IEnumerable? partRules = null, bool runParallel = true, bool stopAtFirstError = false) { - var lineNumber = 0; + partRules ??= new List(); using var stream = oldFile.OpenRead(); using var sr = new System.IO.StreamReader(stream); - using var r = new CsvReader(sr, System.Globalization.CultureInfo.CurrentCulture); - if (r.Read()) - r.ReadHeader(); + using var reader = new CsvReader(sr, System.Globalization.CultureInfo.CurrentCulture); + if (reader.Read()) + reader.ReadHeader(); else - yield break; - lineNumber++; - // "Resource", "ResourcePrimaryKey", "ProblemField", "ProblemValue", "PartWords", "PartClassifications", "PartOffsets" + return Enumerable.Empty(); - while (r.Read()) + int totalProcessed = 0; + var localTokenSource = new CancellationTokenSource(); + var failures = new ConcurrentBag(); + + if (runParallel) { - token.ThrowIfCancellationRequested(); - lineNumber++; - var words = r["PartWords"].Split(Separator); - var classes = r["PartClassifications"].Split(Separator); - var offsets = r["PartOffsets"].Split(Separator); - - var parts = words.Select((t, i) => new FailurePart( - t, - Enum.TryParse(classes[i], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[i]}' on line {lineNumber}"), - int.TryParse(offsets[i], out var offset) ? offset : throw new Exception($"Invalid offset '{offsets[i]}' on line {lineNumber}"))).ToList(); - yield return new Failure(parts) + using var timerTask = Task.Run( + async () => + { + while (!token.IsCancellationRequested && !localTokenSource.Token.IsCancellationRequested) + { + loadedRows(totalProcessed); + await Task.Delay(TimeSpan.FromSeconds(0.1), token); + } + }, + token + ); + + try + { + Parallel.ForEach( + reader.GetRecords(), + new ParallelOptions + { + CancellationToken = token, + }, + (FailureStoreReportRecord row) => Process(row, partRules, failures, ref totalProcessed) + ); + } + finally { - Resource = r["Resource"], - ResourcePrimaryKey = r["ResourcePrimaryKey"], - ProblemField = r["ProblemField"], - ProblemValue = r["ProblemValue"], - }; - - if (lineNumber % 1000 == 0) - loadedRows(lineNumber); + localTokenSource.Cancel(); + timerTask.Wait(); + } } + else + { + var problems = 0; + foreach (var row in reader.GetRecords()) + { + try + { + Process(row, partRules, failures, ref totalProcessed); + } + catch (Exception e) + { + if (stopAtFirstError) + { + Console.Error.WriteLine($"{row}:"); + Console.Error.WriteLine(e); + throw; + } + else + { + Console.Error.WriteLine($"{row}:\n{e.Message}\n"); + problems++; + } + } + } + + Console.WriteLine($"Problem with {problems}/{totalProcessed} records"); + } + + loadedRows(totalProcessed); + + return failures; + } + + private static void Process(FailureStoreReportRecord row, IEnumerable? partRules, ConcurrentBag failures, ref int totalProcessed) + { + if (row.ProblemValue == null) + throw new Exception("ProblemValue was null"); + + var words = row.PartWords.Split(Separator); + var classes = row.PartClassifications.Split(Separator); + var offsets = row.PartOffsets.Split(Separator); + + var parts = words.Select( + (word, index) => new FailurePart( + word, + Enum.TryParse(classes[index], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[index]}'"), + int.TryParse(offsets[index], out var offset) ? offset : throw new Exception($"Invalid offset '{row.PartOffsets}'") + ) + ).ToList(); + + if (row.ProblemField != "PixelData") + { + // Fixes any offsets that have been mangled by file endings etc. + foreach (var part in parts) + { + try + { + if (row.ProblemValue.Substring(part.Offset, part.Word.Length) == part.Word) + continue; + } + catch (ArgumentOutOfRangeException) { } + + if (!row.ProblemValue.Contains(part.Word)) + { + bool fixableOffset = false; + + // Test if the ProblemValue has been HTML escaped + var encodedPartWord = WebUtility.HtmlEncode(part.Word); + + // Test if the ProblemValue has hidden unicode symbols + var withoutInvisible = Regex.Replace(row.ProblemValue, @"\p{C}+", string.Empty); + + if (row.ProblemValue.Contains(encodedPartWord)) + { + part.Word = encodedPartWord; + fixableOffset = true; + } + else if (withoutInvisible.Contains(part.Word)) + { + row.ProblemValue = withoutInvisible; + fixableOffset = true; + } + + if (!fixableOffset) + throw new ArgumentOutOfRangeException($"Could not find any variation of '{part.Word}' in the ProblemValue"); + } + + // Finally, try shifting the offset around to find the word + try + { + FixupOffsets(row, part); + } + catch (ArgumentOutOfRangeException e) + { + throw new Exception($"Could not fixup Offset of {part} in:\n{row}", e); + } + } + } + + /* TEMP - Filter out any FailureParts covered by an PartPatternFilterRule */ + var toRemove = new List(); + foreach (var partRule in partRules) + { + if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, row.ProblemField, StringComparison.InvariantCultureIgnoreCase)) + continue; + + foreach (var part in parts.Where(x => partRule.Covers(x, row.ProblemValue))) + { + toRemove.Add(part); + partRule.IncrementUsed(); + } + } + parts = parts.Except(toRemove).ToList(); + /* TEMP */ + + if (parts.Any()) + failures.Add(new Failure(parts) + { + Resource = row.Resource, + ResourcePrimaryKey = row.ResourcePrimaryKey, + ProblemField = row.ProblemField, + ProblemValue = row.ProblemValue, + }); + + Interlocked.Increment(ref totalProcessed); + } + + private static void FixupOffsets(FailureStoreReportRecord row, FailurePart part) + { + // Try looking ahead first, then back + var origOffset = part.Offset; + + try + { + while (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word) + part.Offset++; + } + catch (ArgumentOutOfRangeException) + { + part.Offset = origOffset; + + if (part.Offset + part.Word.Length >= row.ProblemValue.Length) + part.Offset = row.ProblemValue.Length - part.Word.Length; + + while (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word) + part.Offset--; + } + } + + internal class FailureStoreReportRecord + { + public string Resource { get; init; } + public string ResourcePrimaryKey { get; init; } + public string ProblemField { get; init; } + + // NOTE(rkm 2023-12-07) Allow modification to deal with certain edge cases + public string ProblemValue { get; set; } + + public string PartWords { get; init; } + public string PartClassifications { get; init; } + public string PartOffsets { get; init; } - loadedRows(lineNumber); + public override string ToString() => $"Failure({Resource}|{ResourcePrimaryKey}|{ProblemField}|{ProblemValue}|{PartWords}|{PartClassifications}|{PartOffsets})"; } } diff --git a/IsIdentifiable/Rules/PartPatternFilterRule.cs b/IsIdentifiable/Rules/PartPatternFilterRule.cs new file mode 100644 index 00000000..6881e2e9 --- /dev/null +++ b/IsIdentifiable/Rules/PartPatternFilterRule.cs @@ -0,0 +1,116 @@ +using IsIdentifiable.Failures; +using System; +using System.Linq; +using System.Text.RegularExpressions; + +namespace IsIdentifiable.Rules; + +public class PartPatternFilterRule : RegexRule +{ + /// + /// Combination of and . Use this to validate + /// whether the rule should be applied. + /// + protected Regex IfPartPatternRegex; + private string _ifPartPatternString; + + /// + /// The Regex pattern which should be used to match values a specific failing part + /// + public string IfPartPattern + { + get => _ifPartPatternString; + set + { + _ifPartPatternString = value; + RebuildPartRegex(); + } + } + + /// + /// Whether the IfPattern and IfPartPattern are case sensitive (default is false) + /// + public override bool CaseSensitive + { + get => base.CaseSensitive; + set + { + base.CaseSensitive = value; + RebuildPartRegex(); + } + } + + public string WordBefore { get; set; } + + public string WordAfter { get; set; } + + private Regex? _wordBeforeRegex; + private Regex? _wordAfterRegex; + + private int _usedCount = 0; + private object _usedCountLock = new(); + + public int UsedCount + { + get => _usedCount; + } + + public void IncrementUsed() + { + lock (_usedCountLock) + { + ++_usedCount; + } + } + + // TODO(rkm 2023-07-25) Shouldn't be needed when IfPattern is readonly + private void RebuildPartRegex() + { + if (_ifPartPatternString == null) + throw new Exception("Illegal rule setup. You must specify IfPartPattern"); + + if (!_ifPartPatternString.StartsWith("^") || !_ifPartPatternString.EndsWith("$")) + throw new ArgumentException("IfPartPattern must be enclosed by ^ and $"); + + IfPartPatternRegex = new Regex(_ifPartPatternString, (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); + } + + public bool Covers(FailurePart failurePart, string problemValue) + { + if (As != FailureClassification.None && As != failurePart.Classification) + return false; + + bool matchesBefore = false; + if (!string.IsNullOrWhiteSpace(WordBefore)) + { + var problemValueUpToOffset = problemValue[..(failurePart.Offset + failurePart.Word.Length)]; + _wordBeforeRegex ??= new Regex(@$"\b{WordBefore}(\s|-)+{IfPartPattern.TrimStart('^')}", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); + matchesBefore = _wordBeforeRegex.Matches(problemValueUpToOffset).Any(); + } + + bool matchesAfter = false; + if (!string.IsNullOrWhiteSpace(WordAfter)) + { + var problemValueFromOffset = problemValue[failurePart.Offset..]; + _wordAfterRegex ??= new Regex(@$"{IfPartPattern.TrimEnd('$')}(\s|-)+{WordAfter}\b", (CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase) | RegexOptions.Compiled); + matchesAfter = _wordAfterRegex.Matches(problemValueFromOffset).Any(); + } + + if ( + matchesBefore && string.IsNullOrWhiteSpace(WordAfter) || + matchesAfter && string.IsNullOrWhiteSpace(WordBefore) || + (matchesBefore && matchesAfter) + ) + { + return true; + } + else if (!string.IsNullOrWhiteSpace(WordBefore) || !string.IsNullOrWhiteSpace(WordAfter)) + { + return false; + } + + return IfPartPatternRegex.Matches(failurePart.Word).Any(); + } + + public override string ToString() => $"Pat:'{_ifPartPatternString}' WB:'{WordBefore}' WA:'{WordAfter}' Col:'{IfColumn}' As:'{As}' x{_usedCount:N0}"; +} diff --git a/Tests/Directory.Build.props b/Tests/Directory.Build.props index f6bf3b83..b39947e6 100644 --- a/Tests/Directory.Build.props +++ b/Tests/Directory.Build.props @@ -7,10 +7,12 @@ runtime; build; native; contentfiles; analyzers; buildtransitive + all runtime; build; native; contentfiles; analyzers; buildtransitive + - \ No newline at end of file + diff --git a/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj b/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj index f3b20ece..00655aff 100644 --- a/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj +++ b/Tests/IsIdentifiableTests/IsIdentifiable.Tests.csproj @@ -14,13 +14,6 @@ PreserveNewest - - - - - - - diff --git a/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs b/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs new file mode 100644 index 00000000..6c3e21a8 --- /dev/null +++ b/Tests/IsIdentifiableTests/Rules/PartPatternFilterRuleTests.cs @@ -0,0 +1,158 @@ +using IsIdentifiable.Failures; +using IsIdentifiable.Rules; +using MongoDB.Driver.Linq; +using NUnit.Framework; +using System.Collections.Generic; +using System.Linq; + +namespace IsIdentifiable.Tests.Rules; + +internal class PartPatternFilterRuleTests +{ + private static IEnumerable TestCaseSource_ForamenMonroParts() + { + var parts = new List(); + foreach (var prefix in new[] { "foramen", "foramina" }) + { + foreach (var join in new[] { "of", "" }) + { + foreach (var name in new[] { "monro", "monroe" }) + { + parts.Add(string.Join(" ", (new[] { prefix, join, name }).Where(x => !string.IsNullOrEmpty(x)))); + } + } + } + return parts; + } + + [TestCaseSource(nameof(TestCaseSource_ForamenMonroParts))] + public void Covers_ForamenMonro(string valuePart) + { + // Arrange + var rule = new PartPatternFilterRule() + { + IfPartPattern = "^Monroe?$", + WordBefore = "(foramen|foramina)( of)?", + IfColumn = "TextValue", + As = FailureClassification.Person, + Action = RuleAction.Ignore, + }; + var name = valuePart.Split()[^1]; + var problemValue = $"Mr {name} has an issue with his {valuePart}"; + var validFailurePart = new FailurePart(name, FailureClassification.Person, 3); + var problemOffset = problemValue.LastIndexOf(" ") + 1; + var filteredFailurePart = new FailurePart(name, FailureClassification.Person, problemOffset); + + // Act + var coversValidFailurePart = rule.Covers(validFailurePart, problemValue); + var coversFilteredFailurePart = rule.Covers(filteredFailurePart, problemValue); + + // Assert + Assert.False(coversValidFailurePart); + Assert.True(coversFilteredFailurePart); + } + + private static IEnumerable TestCaseSource_HodgkinLymphomaParts() + { + var parts = new List(); + foreach (var name in new[] { "hodgkin", "hodgkins", "hodgkin's" }) + { + foreach (var postfix in new[] { "lymphoma", "disease" }) + { + parts.Add(string.Join(" ", (new[] { name, postfix }).Where(x => !string.IsNullOrEmpty(x)))); + } + } + return parts; + } + + [TestCaseSource(nameof(TestCaseSource_HodgkinLymphomaParts))] + public void Covers_HodgkinLymphoma(string valuePart) + { + // Arrange + var rule = new PartPatternFilterRule() + { + Action = RuleAction.Ignore, + As = FailureClassification.Person, + IfColumn = "TextValue", + IfPartPattern = "^Hodgkin(s|'s)?$", + WordAfter = "(lymphoma|disease|
lymphoma)", + }; + var name = valuePart.Split()[0]; + var problemValue = $"Mr {name} possibly has {valuePart}"; + var validFailurePart = new FailurePart(name, FailureClassification.Person, 3); + var problemOffset = problemValue.IndexOf($"has {name}") + 4; + var filteredFailurePart = new FailurePart(name, FailureClassification.Person, problemOffset); + + // Act + var coversValidFailurePart = rule.Covers(validFailurePart, problemValue); + var coversFilteredFailurePart = rule.Covers(filteredFailurePart, problemValue); + + // Assert + Assert.False(coversValidFailurePart); + Assert.True(coversFilteredFailurePart); + } + + [Test] + public void Covers_HyphenInWordBefore() + { + // Arrange + var rule = new PartPatternFilterRule() + { + IfPartPattern = "^Hodgkin$", + WordBefore = "Non", + IfColumn = "TextValue", + As = FailureClassification.Person, + Action = RuleAction.Ignore, + }; + var problemValue = $"Non-Hodgkin's lymphoma"; + var failurePart = new FailurePart("Hodgkin", FailureClassification.Person, 4); + + // Act + var ruleCoversFailurePart = rule.Covers(failurePart, problemValue); + + // Assert + Assert.True(ruleCoversFailurePart); + } + + [Test] + public void Covers_HyphenInWordAfter() + { + // Arrange + var rule = new PartPatternFilterRule() + { + IfPartPattern = "^Gr(a|e)y$", + WordAfter = "white", + IfColumn = "TextValue", + As = FailureClassification.Person, + Action = RuleAction.Ignore, + }; + var problemValue = $"Gray-white foo"; + var failurePart = new FailurePart("Gray", FailureClassification.Person, 0); + + // Act + var ruleCoversFailurePart = rule.Covers(failurePart, problemValue); + + // Assert + Assert.True(ruleCoversFailurePart); + } + + [Test] + public void Covers_AnyFailureClassification() + { + // Arrange + var rule = new PartPatternFilterRule() + { + IfPartPattern = "^Test$", + IfColumn = "TextValue", + Action = RuleAction.Ignore, + }; + var problemValue = $"Test"; + var failurePart = new FailurePart("Test", FailureClassification.Person, 0); + + // Act + var ruleCoversFailurePart = rule.Covers(failurePart, problemValue); + + // Assert + Assert.True(ruleCoversFailurePart); + } +} diff --git a/Tests/IsIdentifiableTests/StoreReportTests.cs b/Tests/IsIdentifiableTests/StoreReportTests.cs index b0ecff25..1ddfb2ef 100644 --- a/Tests/IsIdentifiableTests/StoreReportTests.cs +++ b/Tests/IsIdentifiableTests/StoreReportTests.cs @@ -34,7 +34,7 @@ public void TestReconstructionFromCsv() var failure = new Failure( new FailurePart[] { - new("Kansas", FailureClassification.Location, 12), + new("Kansas", FailureClassification.Location, 13), new("Toto", FailureClassification.Location, 28) }) { diff --git a/ii/Constants.cs b/ii/Constants.cs index bd6b3834..0cef015b 100644 --- a/ii/Constants.cs +++ b/ii/Constants.cs @@ -5,12 +5,12 @@ public static class Constants /// /// Width of modal popup dialogues /// - public const int DlgWidth = 78; + public const int DlgWidth = 120; /// /// Height of modal popup dialogues /// - public const int DlgHeight = 18; + public const int DlgHeight = 60; /// /// Border boundary of modal popup dialogues diff --git a/ii/MainWindow.cs b/ii/MainWindow.cs index 2d42b68f..1c25b2e8 100644 --- a/ii/MainWindow.cs +++ b/ii/MainWindow.cs @@ -6,6 +6,7 @@ using IsIdentifiable.Rules; using System; using System.Collections.Generic; +using System.IO; using System.IO.Abstractions; using System.Linq; using System.Text; @@ -33,7 +34,7 @@ internal class MainWindow : IRulePatternFactory, IDisposable /// public RowUpdater Updater { get; } - private readonly FailureView _valuePane; + private readonly FailureView _failureView; private readonly Label _info; private readonly SpinnerView _spinner; private readonly TextField _gotoTextField; @@ -86,7 +87,8 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti Menu = new MenuBar(new MenuBarItem[] { new("_File (F9)", new MenuItem [] { new("_Open Report",null, OpenReport), - new("_Quit", null, static () => Application.RequestStop()) + new("_Export 'Outstanding Failures'", null, ExportOutstandingFailures), + new("_Quit", null, static () => Application.RequestStop()), }), new("_Options", new MenuItem [] { miCustomPatterns = new MenuItem("_Custom Patterns",null,ToggleCustomPatterns){CheckType = MenuItemCheckStyle.Checked,Checked = false} @@ -107,20 +109,20 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti ColorScheme = _greyOnBlack }; - _valuePane = new FailureView() + _failureView = new FailureView() { X = 0, Y = 1, Width = Dim.Fill(), - Height = 10, + Height = Dim.Fill(), }; var frame = new FrameView("Options") { X = 0, - Y = 12, + Y = Console.WindowHeight * 2 / 3, Width = Dim.Fill(), - Height = Dim.Fill() + Height = Dim.Fill(), }; var ignoreButton = new Button("Ignore") @@ -192,11 +194,16 @@ public MainWindow(IsIdentifiableOptions analyserOpts, IsIdentifiableReviewerOpti viewMain.Add(_spinner); _spinner.Visible = false; - viewMain.Add(_valuePane); + viewMain.Add(_failureView); viewMain.Add(frame); if (!string.IsNullOrWhiteSpace(opts.FailuresCsv)) - OpenReport(opts.FailuresCsv, (e) => throw e); + { + Exception? exc = null; + OpenReport(opts.FailuresCsv, (e) => exc = e); + if(exc != null) + Helpers.ShowException("Failed to Load", exc); + } var tabView = new TabView() { @@ -283,20 +290,21 @@ private void GoTo(int page) return; try { - CurrentReport.GoTo(page); - _info.Text = CurrentReport.DescribeProgress(); - SetupToShow(CurrentReport.Current); + if (CurrentReport.GoTo(page)) + { + _info.Text = CurrentReport.DescribeProgress(); + SetupToShow(CurrentReport.Current); + } } catch (Exception e) { Helpers.ShowException("Failed to GoTo", e); } - } private void SetupToShow(Failure? f) { - _valuePane.CurrentFailure = f; + _failureView.CurrentFailure = f; if (f != null) { @@ -318,7 +326,7 @@ private void BeginNext() private void Next() { - if (_valuePane.CurrentFailure == null || CurrentReport == null) + if (_failureView.CurrentFailure == null || CurrentReport == null) return; _spinner.Visible = true; @@ -372,7 +380,7 @@ private void Next() private void Ignore() { - if (_valuePane.CurrentFailure == null || CurrentReport == null) + if (_failureView.CurrentFailure == null || CurrentReport == null) return; if (taskToLoadNext != null && !taskToLoadNext.IsCompleted) @@ -383,7 +391,7 @@ private void Ignore() try { - Ignorer.Add(_valuePane.CurrentFailure); + Ignorer.Add(_failureView.CurrentFailure); History.Push(new MainWindowHistory(CurrentReport.CurrentIndex, Ignorer)); } catch (OperationCanceledException) @@ -395,7 +403,7 @@ private void Ignore() } private void Update() { - if (_valuePane.CurrentFailure == null || CurrentReport == null) + if (_failureView.CurrentFailure == null || CurrentReport == null) return; if (taskToLoadNext != null && !taskToLoadNext.IsCompleted) @@ -407,7 +415,7 @@ private void Update() try { // TODO(rkm 2021-04-09) Server always passed as null here, but Update seems to require it? - Updater.Update(null, _valuePane.CurrentFailure, null /*create one yourself*/); + Updater.Update(null, _failureView.CurrentFailure, null /*create one yourself*/); History.Push(new MainWindowHistory(CurrentReport.CurrentIndex, Updater)); } @@ -425,6 +433,24 @@ private void Update() BeginNext(); } + private void ExportOutstandingFailures() + { + if (rulesView.OutstandingFiles == null) + { + Helpers.ShowMessage("Error", "You must evaluate the rules on a report first."); + return; + } + + var now = DateTime.UtcNow.ToString("s").Replace(':', '-'); + var fileName = $"OutstandingFiles-{now}.csv"; + using var sw = new StreamWriter(fileName); + + foreach (var file in rulesView.OutstandingFiles) + sw.WriteLine(file); + + Helpers.ShowMessage("Complete", $"Wrote {rulesView.OutstandingFiles.Count} unique item(s) to {fileName}"); + } + private void OpenReport() { using var ofd = new OpenDialog("Load CSV Report", "Enter file path to load") @@ -474,12 +500,15 @@ private void OpenReport(string? path, Action exceptionHandler) return !done; }); + _currentReportLabel.Text = $"Report:{_fileSystem.Path.GetFileName(path)}"; + _currentReportLabel.SetNeedsDisplay(); + Task.Run(() => { try { CurrentReport = new ReportReader(_fileSystem.FileInfo.New(path), (s) => - rows.Text = $"Loaded: {s:N0} rows", _fileSystem, cts.Token); + rows.Text = $"Loaded: {s:N0} rows", _fileSystem, cts.Token, Ignorer.PartRules_Temp); SetupToShow(CurrentReport.Failures.FirstOrDefault()); BeginNext(); @@ -489,6 +518,7 @@ private void OpenReport(string? path, Action exceptionHandler) { exceptionHandler(e); rows.Text = "Error"; + _currentReportLabel.Text = "Report: "; } } @@ -503,9 +533,6 @@ private void OpenReport(string? path, Action exceptionHandler) cts.Dispose(); }); - _currentReportLabel.Text = $"Report:{_fileSystem.Path.GetFileName(path)}"; - _currentReportLabel.SetNeedsDisplay(); - Application.Run(dlg); } @@ -664,7 +691,7 @@ public string GetPattern(object sender, Failure failure) public void Dispose() { - _valuePane.Dispose(); + _failureView.Dispose(); _info.Dispose(); _spinner.Dispose(); _gotoTextField.Dispose(); diff --git a/ii/Program.cs b/ii/Program.cs index 49f025cc..ee2c9215 100644 --- a/ii/Program.cs +++ b/ii/Program.cs @@ -6,11 +6,14 @@ using FAnsi.Implementations.PostgreSql; using FellowOakDicom; using IsIdentifiable.Options; +using IsIdentifiable.Reporting.Reports; using IsIdentifiable.Runners; using Microsoft.Extensions.FileSystemGlobbing; using System; using System.IO.Abstractions; using System.Linq; +using System.Text.RegularExpressions; +using System.Threading; using YamlDotNet.Serialization; namespace ii; @@ -100,13 +103,15 @@ public static int Main(string[] args) IsIdentifiableDicomFileOptions, IsIdentifiableMongoOptions, IsIdentifiableFileGlobOptions, - IsIdentifiableReviewerOptions>(args) + IsIdentifiableReviewerOptions, + IsIdentifiableReportValidatorOptions>(args) .MapResult( (IsIdentifiableRelationalDatabaseOptions o) => Run(o, fileSystem), (IsIdentifiableDicomFileOptions o) => Run(o, fileSystem), (IsIdentifiableMongoOptions o) => Run(o, fileSystem), (IsIdentifiableFileGlobOptions o) => Run(o, fileSystem), (IsIdentifiableReviewerOptions o) => Run(o, fileSystem), + (IsIdentifiableReportValidatorOptions o) => Run(o, fileSystem), // return exit code 0 for user requests for help errors => args.Any(a => a.Equals("--help", StringComparison.InvariantCultureIgnoreCase)) ? 0 : 1); @@ -127,10 +132,48 @@ private static int Run(IsIdentifiableReviewerOptions opts, IFileSystem fileSyste { Inherit(opts); + if (!fileSystem.File.Exists(opts.FailuresCsv)) + { + Console.Error.WriteLine($"Error: Could not find {opts.FailuresCsv}"); + return 1; + } + + const string expectedHeader = "Resource,ResourcePrimaryKey,ProblemField,ProblemValue,PartWords,PartClassifications,PartOffsets"; + var line = fileSystem.File.ReadLines(opts.FailuresCsv).FirstOrDefault(); + if (line == null || Regex.Replace(line, @"\s+", "") != line) + { + Console.Error.WriteLine($"Error: Expected CSV Failure header {expectedHeader}"); + return 1; + } + var reviewer = new ReviewerRunner(GlobalOptions?.IsIdentifiableOptions, opts, fileSystem); return reviewer.Run(); } + private static int Run(IsIdentifiableReportValidatorOptions opts, IFileSystem fileSystem) + { + if (GlobalOptions?.IsIdentifiableReviewerOptions != null) + opts.InheritValuesFrom(GlobalOptions.IsIdentifiableReviewerOptions); + + if (!fileSystem.File.Exists(opts.FailuresCsv)) + { + Console.Error.WriteLine($"Error: Could not find {opts.FailuresCsv}"); + return 1; + } + + const string expectedHeader = "Resource,ResourcePrimaryKey,ProblemField,ProblemValue,PartWords,PartClassifications,PartOffsets"; + var line = fileSystem.File.ReadLines(opts.FailuresCsv).FirstOrDefault(); + if (line == null || Regex.Replace(line, @"\s+", "") != line) + { + Console.Error.WriteLine($"Error: Expected CSV Failure header {expectedHeader}"); + return 1; + } + + var report = new FailureStoreReport("", 0, fileSystem); + var failures = FailureStoreReport.Deserialize(fileSystem.FileInfo.New(opts.FailuresCsv), (_) => { }, new CancellationTokenSource().Token, partRules: null, runParallel: false, opts.StopAtFirstError).ToArray(); + + return 0; + } private static int Run(IsIdentifiableDicomFileOptions opts, IFileSystem fileSystem) { diff --git a/ii/Views/FailureView.cs b/ii/Views/FailureView.cs index e71cb336..225ea5fc 100644 --- a/ii/Views/FailureView.cs +++ b/ii/Views/FailureView.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text; using Terminal.Gui; using Attribute = Terminal.Gui.Attribute; @@ -26,16 +27,16 @@ public override void Redraw(Rect bounds) var w = bounds.Width; var h = bounds.Height; - var toDisplay = CurrentFailure?.ProblemValue ?? " "; + var problemValue = CurrentFailure?.ProblemValue ?? " "; //if the original string validated var originalNewlines = new HashSet(); - for (var i = 0; i < toDisplay.Length; i++) - if (toDisplay[i] == '\n') + for (var i = 0; i < problemValue.Length; i++) + if (problemValue[i] == '\n') originalNewlines.Add(i); - var lines = Helpers.Wrap(toDisplay, bounds.Width).Split('\n', StringSplitOptions.RemoveEmptyEntries); + var lines = Helpers.Wrap(problemValue, bounds.Width).Split('\n', StringSplitOptions.RemoveEmptyEntries); var characterOffset = 0; Attribute? oldColor = null; @@ -78,19 +79,19 @@ public override void Redraw(Rect bounds) } } - if (CurrentFailure != null) - { - Driver.SetAttribute(_attNormal); - Move(0, h); - - var classification = - $"C:{string.Join(",", CurrentFailure.Parts.Select(p => p.Classification).Distinct().ToArray())}"; + if (CurrentFailure == null) + return; - var field = CurrentFailure.ProblemField; - classification = classification.PadRight(w - field.Length); - - Driver.AddStr(classification + field); - } + Driver.SetAttribute(_attNormal); + Move(0, h); + var sb = new StringBuilder(); + sb.Append($"ProblemField: {CurrentFailure.ProblemField}. "); + sb.Append($"Classifications: "); + foreach (var failurePart in CurrentFailure.Parts) + sb.Append($"'{failurePart.Word}' at {failurePart.Offset} => {failurePart.Classification}, "); + sb.Length -= 2; + sb.Append('.'); + Driver.AddStr(sb.ToString().PadRight(w)); } } diff --git a/ii/Views/OutstandingFailureNode.cs b/ii/Views/OutstandingFailureNode.cs index 8c02116f..36a067ef 100644 --- a/ii/Views/OutstandingFailureNode.cs +++ b/ii/Views/OutstandingFailureNode.cs @@ -1,4 +1,4 @@ -using IsIdentifiable.Failures; +using IsIdentifiable.Failures; using Terminal.Gui.Trees; namespace ii.Views; @@ -21,8 +21,5 @@ public OutstandingFailureNode(Failure failure, int numberOfTimesReported) NumberOfTimesReported = numberOfTimesReported; } - public override string ToString() - { - return $"{Failure.ProblemValue} x{NumberOfTimesReported:N0}"; - } + public override string ToString() => $"({NumberOfTimesReported:N0}x) {Failure.ProblemValue}"; } diff --git a/ii/Views/RulesView.cs b/ii/Views/RulesView.cs index 51ceaacc..dc681d1e 100644 --- a/ii/Views/RulesView.cs +++ b/ii/Views/RulesView.cs @@ -17,6 +17,8 @@ class RulesView : View public IgnoreRuleGenerator? Ignorer { get; private set; } public RowUpdater? Updater { get; private set; } + public List? OutstandingFiles { get; private set; } + private readonly TreeView _treeView; /// @@ -108,7 +110,7 @@ public void LoadReport(ReportReader currentReport, IgnoreRuleGenerator ignorer, Updater = updater; _bulkIgnorePatternFactory = bulkIgnorePatternFactory; - _lblInitialSummary.Text = $"There are {ignorer.Rules.Count} ignore rules and {updater.Rules.Count} update rules. Current report contains {CurrentReport.Failures.Length:N0} Failures"; + _lblInitialSummary.Text = $"There are {ignorer.Rules.Count - ignorer.PartRules_Temp.Count} ignore rules, {ignorer.PartRules_Temp.Count} PartPatternFilterRules, and {updater.Rules.Count} update rules. Current report contains {CurrentReport.Failures.Length:N0} Failures"; } @@ -259,7 +261,14 @@ private void Activate(OutstandingFailureNode ofn) using var cancel = new Button("Cancel"); cancel.Clicked += () => { Application.RequestStop(); }; - using var dlg = new Dialog("Failure", Constants.DlgWidth, Constants.DlgHeight, ignore, update, cancel); + using var dlg = new Dialog( + "Failure", + Math.Min(Constants.DlgWidth, Console.WindowWidth - (2 * Constants.DlgBoundary)), + Math.Min(Constants.DlgHeight, Console.WindowHeight - (2 * Constants.DlgBoundary)), + ignore, + update, + cancel + ); var lbl = new FailureView() { @@ -354,8 +363,16 @@ private void EvaluateRuleCoverage() var colliding = new TreeNodeWithCount("Colliding Rules"); var ignore = new TreeNodeWithCount("Ignore Rules Used"); + + var partRulesused = new TreeNodeWithCount("IfPartPattern Rules Used") { OverrideCount = 0 }; + foreach (var rule in Ignorer.PartRules_Temp.Where(x => x.UsedCount > 0).OrderByDescending(x => x.UsedCount)) + { + partRulesused.OverrideCount += rule.UsedCount; + partRulesused.Children.Add(new TreeNode(rule.ToString())); + } + var update = new TreeNodeWithCount("Update Rules Used"); - var outstanding = new TreeNodeWithCount("Outstanding Failures"); + var outstanding = new TreeNodeWithCount("Outstanding Failures", countSubChildren: true); var allRules = Ignorer.Rules.Union(Updater.Rules).ToList(); @@ -401,7 +418,7 @@ private void EvaluateRuleCoverage() cts.Dispose(); _treeView.RebuildTree(); - _treeView.AddObjects(new[] { colliding, ignore, update, outstanding }); + _treeView.AddObjects(new[] { colliding, ignore, partRulesused, update, outstanding }); }, SynchronizationContext.Current); Application.Run(dlg); @@ -521,6 +538,8 @@ private void EvaluateRuleCoverageAsync(Label stage, ProgressBar progress, Label .OrderByDescending(v => v.Failures.Sum(f => f.NumberOfTimesReported)) .Cast() .ToList(); + + OutstandingFiles = outstandingFailures.Select(x => x.Value.Failure.Resource).Distinct().ToList(); } private static void SetProgress(ProgressBar pb, View tp, int done, int max) diff --git a/ii/Views/TreeNodeWithCount.cs b/ii/Views/TreeNodeWithCount.cs index 143faabc..f2d0dbec 100644 --- a/ii/Views/TreeNodeWithCount.cs +++ b/ii/Views/TreeNodeWithCount.cs @@ -1,4 +1,5 @@ -using Terminal.Gui.Trees; +using System.Linq; +using Terminal.Gui.Trees; namespace ii.Views; @@ -6,13 +7,25 @@ internal class TreeNodeWithCount : TreeNode { public string Heading { get; } - public TreeNodeWithCount(string heading) + private readonly bool _countSubChildren; + + public int OverrideCount { get; set; } = -1; + + public TreeNodeWithCount(string heading, bool countSubChildren = false) { Heading = heading; + _countSubChildren = countSubChildren; } public override string ToString() { - return $"{Heading} ({Children.Count:N0})"; + var count = 0; + if (OverrideCount != -1) + count = OverrideCount; + else if (_countSubChildren) + count = Children.Sum(x => x.Children.Count); + else + count = Children.Count; + return $"{Heading} ({count:N0})"; } }