diff --git a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs index 71d63220..378688d9 100644 --- a/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs +++ b/src/SIL.Machine.Morphology.HermitCrab/Morpher.cs @@ -359,12 +359,16 @@ LexEntry entry in SearchRootAllomorphs(input.Stratum, input.Shape) } } + /// + /// Match the input against lexical patterns and return matches. + /// private IEnumerable LexicalGuess(Word input) { if (_traceManager.IsTracing) _traceManager.LexicalLookup(input.Stratum, input); CharacterDefinitionTable table = input.Stratum.CharacterDefinitionTable; IEnumerable shapeNodes = input.Shape.GetNodes(input.Range); + HashSet shapeSet = new HashSet(); foreach (RootAllomorph lexicalPattern in _lexicalPatterns) { IEnumerable shapePattern = lexicalPattern.Segments.Shape.GetNodes( @@ -374,6 +378,10 @@ private IEnumerable LexicalGuess(Word input) { // Create a root allomorph for the guess. string shapeString = match.ToString(table, false); + if (shapeSet.Contains(shapeString)) + // Avoid duplicates caused by multiple paths through pattern (e.g. ([Seg])([Seg])). + continue; + shapeSet.Add(shapeString); var root = new RootAllomorph(new Segments(table, shapeString)) { Guessed = true }; // Create a lexical entry to hold the root allomorph. // (The root's Morpheme will point to the lexical entry.) @@ -402,6 +410,11 @@ private IEnumerable LexicalGuess(Word input) } } + /// + /// Match the shape nodes against the shape pattern. + /// This can produce multiple outputs if there is more than one path. + /// The outputs can be different because it unifies the nodes. + /// public IEnumerable> MatchNodesWithPattern( IList nodes, IList pattern, diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs index 4e7634c4..1237326e 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs @@ -242,6 +242,21 @@ public void TestMatchNodesWithPattern() ); Assert.That(morpher.MatchNodesWithPattern(twoNodes, optionalPattern), Is.Empty); + // Test ambiguity. + // (It is up to the caller to eliminate duplicates.) + IList optionalPattern2 = GetNodes("([Any])([Any])"); + Assert.That( + morpher.MatchNodesWithPattern(noNodes, optionalPattern2), + Is.EquivalentTo(new List> { noNodes }) + ); + Assert.That( + morpher.MatchNodesWithPattern(oneNode, optionalPattern2), + Is.EquivalentTo(new List> { oneNode, oneNode }) + ); + Assert.That(morpher.MatchNodesWithPattern(twoNodes, optionalPattern2), + Is.EquivalentTo(new List> { twoNodes })); + Assert.That(morpher.MatchNodesWithPattern(threeNodes, optionalPattern2), Is.Empty); + // Test Kleene star. IList starPattern = GetNodes("[Any]*"); Assert.That(