diff --git a/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs b/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs index f8b9b354..d3b2751b 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs @@ -14,11 +14,13 @@ public class CharacterDefinitionTable : ICollection { private readonly Dictionary _charDefLookup; private readonly HashSet _charDefs; + private readonly Dictionary _naturalClassLookup; public CharacterDefinitionTable() { _charDefLookup = new Dictionary(); _charDefs = new HashSet(); + _naturalClassLookup = new Dictionary(); } public string Name { get; set; } @@ -43,6 +45,11 @@ public CharacterDefinition AddBoundary(IEnumerable strRep) return Add(strRep, HCFeatureSystem.Boundary, null); } + public void AddNaturalClass(NaturalClass naturalClass) + { + _naturalClassLookup[naturalClass.Name] = naturalClass; + } + /// /// Adds the character definition. /// @@ -103,6 +110,9 @@ private bool GetShapeNodes(string str, out IEnumerable nodes, out int var nodesList = new List(); int i = 0; string normalized = str.Normalize(NormalizationForm.FormD); + bool optional = false; + int optionalPos = 0; + int optionalCount = 0; while (i < normalized.Length) { bool match = false; @@ -120,15 +130,83 @@ private bool GetShapeNodes(string str, out IEnumerable nodes, out int break; } } + if (match) continue; - if (!match) + // Check for pattern language. + // NB: This only happens when the characters don't match. + if (normalized[i] == '[') + { + // Example: [Seg]. + // Look for a natural class. + int closePos = normalized.IndexOf("]", i); + if (closePos > 0) + { + string className = normalized.Substring(i + 1, closePos - i - 1); + if (_naturalClassLookup.ContainsKey(className)) + { + NaturalClass naturalClass = _naturalClassLookup[className]; + var node = new ShapeNode(naturalClass.FeatureStruct); + nodesList.Add(node); + i = closePos + 1; + continue; + } + } + } + else if (normalized[i] == '(') + { + if (i + 1 < normalized.Length && normalized[i + 1] == '[') + { + // The natural class that follows is optional. + // Wait for the close parenthesis to process. + optional = true; + optionalPos = i; + optionalCount = nodesList.Count; + i++; + continue; + } + } + else if (normalized[i] == ')') + { + if (optional && nodesList.Count == optionalCount + 1) + { + // Example: ([Seg]). + // Ill-formed: ([C][V]). + // Make the last node optional. + nodesList[nodesList.Count - 1].Annotation.Optional = true; + optional = false; + i++; + continue; + } + } + else if (normalized[i] == '*') { - nodes = null; - errorPos = i; - if (!str.IsNormalized(NormalizationForm.FormD)) - errorPos = normalized.Substring(0, errorPos).Normalize().Length; - return false; + if (i > 0 && normalized[i - 1] == ']') + { + // Example: [Seg]*. + // Make the last node Kleene star. + nodesList[nodesList.Count - 1].Annotation.Optional = true; + nodesList[nodesList.Count - 1].Annotation.Iterative = true; + i++; + continue; + } } + // Kleene plus doesn't work because '+' is a boundary marker. + + // Failure + nodes = null; + errorPos = i; + if (!str.IsNormalized(NormalizationForm.FormD)) + errorPos = normalized.Substring(0, errorPos).Normalize().Length; + return false; + } + if (optional) + { + // The open parenthesis didn't get closed. + nodes = null; + errorPos = optionalPos; + if (!str.IsNormalized(NormalizationForm.FormD)) + errorPos = normalized.Substring(0, errorPos).Normalize().Length; + return false; } nodes = nodesList; errorPos = -1; diff --git a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs index 3caba1af..a63cc047 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs @@ -30,9 +30,18 @@ public Segments Segments public bool IsBound { get; set; } /// - /// Does this represent a lexical pattern (e.g. [Seg]+)? + /// Does this represent a lexical pattern (e.g. [Seg]*)? /// - public bool IsPattern { get; set; } + public bool IsPattern { + get + { + foreach (var node in _segments.Shape.GetNodes(_segments.Shape.Range)) + { + if (node.Annotation.IsNaturalClass) return true; + } + return false; + } + } protected override bool ConstraintsEqual(Allomorph other) { diff --git a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs index cacb9b55..cf8ac9a9 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs @@ -717,6 +717,10 @@ private void LoadNaturalClass(XElement natClassElem) _language.NaturalClasses.Add(nc); _natClasses[(string)natClassElem.Attribute("id")] = nc; + foreach (var table in _language.CharacterDefinitionTables) + { + table.AddNaturalClass(nc); + } } private void LoadPhonologicalRule(XElement pruleElem) diff --git a/src/SIL.Machine/Annotations/Annotation.cs b/src/SIL.Machine/Annotations/Annotation.cs index e75ea2bb..1b29933d 100644 --- a/src/SIL.Machine/Annotations/Annotation.cs +++ b/src/SIL.Machine/Annotations/Annotation.cs @@ -20,6 +20,7 @@ public class Annotation private FeatureStruct _fs; private bool _optional; private bool _iterative; + private bool _isNaturalClass; private object _data; public Annotation(Range range, FeatureStruct fs) @@ -130,10 +131,27 @@ public bool Optional } } + /// + /// Gets or sets a value indicating whether this annotation represents a natural class. + /// This is used for lexical patterns such as [Seg]. + /// + /// + /// true if this annotation is a natural class, otherwise false. + /// + + public bool IsNaturalClass + { + get { return _isNaturalClass; } + set + { + CheckFrozen(); + _isNaturalClass = value; + } + } + /// /// Gets or sets a value indicating whether this annotation is iterative. - /// This is used in lexical patterns such as [Seg]+: - /// Kleene star = iterative and optional, Kleene plus = iterative and not optional. + /// This is used in lexical patterns such as [Seg]*: /// /// /// true if this annotation is iterative, otherwise false. diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs index 4b48a4d4..267d0160 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs @@ -93,7 +93,9 @@ public void AnalyzeWord_CanGuess_ReturnsCorrectAnalysis() begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor )); shape.AddRange(new List { node }); - var lexicalPattern = new RootAllomorph(new Segments(Table1, "", shape)); + var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" }; + Table1.AddNaturalClass(naturalClass); + var lexicalPattern = new RootAllomorph(new Segments(Table1, "[Any]*")); var morpher = new Morpher(TraceManager, Language); morpher.LexicalPatterns.Add(lexicalPattern); @@ -187,21 +189,13 @@ public void TestMatchNodesWithPattern() FeatureValue valueA = new StringFeatureValue("A"); FeatureValue valueB = new StringFeatureValue("B"); FeatureStruct fs1A = new FeatureStruct(); - FeatureStruct fs1B = new FeatureStruct(); FeatureStruct fs2B = new FeatureStruct(); fs1A.AddValue(feat1, valueA); - fs1B.AddValue(feat1, valueB); fs2B.AddValue(feat2, valueB); // Test feature matching. List nodesfs1A = new List { new ShapeNode(fs1A) }; - List nodesfs1B = new List { new ShapeNode(fs1B) }; List nodesfs2B = new List { new ShapeNode(fs2B) }; - Assert.That(morpher.MatchNodesWithPattern(nodesfs1A, nodesfs1B), Is.Empty); - Assert.That( - morpher.MatchNodesWithPattern(nodesfs1A, nodesfs1A), - Is.EqualTo(new List> { nodesfs1A }) - ); var fs1A2B = morpher.MatchNodesWithPattern(nodesfs1A, nodesfs2B); Assert.That( fs1A2B.ToList()[0][0].Annotation.FeatureStruct.GetValue(feat1).ToString(), @@ -212,78 +206,70 @@ public void TestMatchNodesWithPattern() Is.EqualTo(valueB.ToString()) ); - List noNodes = new List { }; - List oneNode = new List { new ShapeNode(fs1A) }; - List twoNodes = new List { new ShapeNode(fs1A), new ShapeNode(fs1A) }; - List threeNodes = new List - { - new ShapeNode(fs1A), - new ShapeNode(fs1A), - new ShapeNode(fs1A) - }; - List fourNodes = new List - { - new ShapeNode(fs1A), - new ShapeNode(fs1A), - new ShapeNode(fs1A), - new ShapeNode(fs1A) - }; + IList noNodes = GetNodes(""); + IList oneNode = GetNodes("a"); + IList twoNodes = GetNodes("aa"); + IList threeNodes = GetNodes("aaa"); + IList fourNodes = GetNodes("aaaa"); + var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" }; + Table2.AddNaturalClass(naturalClass); // Test sequences. + Assert.That(morpher.MatchNodesWithPattern(oneNode, GetNodes("i")), Is.Empty); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, oneNode), + Is.EqualTo(new List> { oneNode }) + ); Assert.That( morpher.MatchNodesWithPattern(twoNodes, twoNodes), - Is.EquivalentTo(new List> { twoNodes }) + Is.EquivalentTo(new List> { twoNodes }) ); Assert.That( morpher.MatchNodesWithPattern(threeNodes, threeNodes), - Is.EquivalentTo(new List> { threeNodes }) + Is.EquivalentTo(new List> { threeNodes }) ); // Test optionality. - ShapeNode optionalNode = new ShapeNode(fs1A); - optionalNode.Annotation.Optional = true; - List optionalPattern = new List { optionalNode }; + IList optionalPattern = GetNodes("([Any])"); Assert.That( morpher.MatchNodesWithPattern(noNodes, optionalPattern), - Is.EquivalentTo(new List> { noNodes }) + Is.EquivalentTo(new List> { noNodes }) ); Assert.That( morpher.MatchNodesWithPattern(oneNode, optionalPattern), - Is.EquivalentTo(new List> { oneNode }) + Is.EquivalentTo(new List> { oneNode }) ); Assert.That(morpher.MatchNodesWithPattern(twoNodes, optionalPattern), Is.Empty); // Test Kleene star. - ShapeNode starNode = new ShapeNode(fs1A); - starNode.Annotation.Optional = true; - starNode.Annotation.Iterative = true; - List starPattern = new List { starNode }; + IList starPattern = GetNodes("[Any]*"); Assert.That( morpher.MatchNodesWithPattern(noNodes, starPattern), - Is.EquivalentTo(new List> { noNodes }) + Is.EquivalentTo(new List> { noNodes }) ); - var result = morpher.MatchNodesWithPattern(oneNode, starPattern); Assert.That( morpher.MatchNodesWithPattern(oneNode, starPattern), - Is.EquivalentTo(new List> { oneNode }) + Is.EquivalentTo(new List> { oneNode }) ); Assert.That( morpher.MatchNodesWithPattern(twoNodes, starPattern), - Is.EquivalentTo(new List> { twoNodes }) + Is.EquivalentTo(new List> { twoNodes }) ); - // Test Kleene plus. - ShapeNode plusNode = new ShapeNode(fs1A); - plusNode.Annotation.Iterative = true; - List plusPattern = new List { plusNode }; + // Test Kleene plus look alike ("+" is a boundary marker). + IList plusPattern = GetNodes("[Any]+"); Assert.That(morpher.MatchNodesWithPattern(noNodes, plusPattern), Is.Empty); Assert.That( morpher.MatchNodesWithPattern(oneNode, plusPattern), - Is.EquivalentTo(new List> { oneNode }) - ); - Assert.That( - morpher.MatchNodesWithPattern(twoNodes, plusPattern), - Is.EquivalentTo(new List> { twoNodes }) + Is.EquivalentTo(new List> { oneNode }) ); + Assert.That(morpher.MatchNodesWithPattern(twoNodes, plusPattern), Is.Empty); + } + + IList GetNodes(string pattern) + { + // Use Table2 because it has boundaries defined. + Shape shape = new Segments(Table2, pattern).Shape; + return shape.GetNodes(shape.Range).ToList(); } }