diff --git a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs index bd219e46..8dfb28d0 100644 --- a/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs +++ b/IsIdentifiable/Reporting/Reports/FailureStoreReport.cs @@ -195,6 +195,7 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo if (row.ProblemValue.Substring(part.Offset, part.Word.Length) == part.Word) continue; + // Test if the ProblemValue has been HTML escaped var encodedPartWord = WebUtility.HtmlEncode(part.Word); if (row.ProblemValue.Substring(part.Offset, encodedPartWord.Length) == encodedPartWord) { @@ -202,6 +203,19 @@ public static IEnumerable Deserialize(IFileInfo oldFile, Action lo continue; } + // Test if the ProblemValue has had a space inserted after a unicode point + var problemValueWithoutSpace = row.ProblemValue.Substring(part.Offset, part.Word.Length + 1).Replace(" ", ""); + if (problemValueWithoutSpace == part.Word) + { + part.Word = part.Word.Insert(1, " "); + + if (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word) + throw new Exception($"Could not fix additional whitespace in Failure\n{row}"); + + continue; + } + + // Finally, try shifting the offset around to find the word try { FixupOffsets(row, part);