Skip to content

Commit

Permalink
Change GetShapeNodes to parse lexical patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmaxwell3 committed Sep 16, 2024
1 parent 08751d2 commit cd10e0f
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 59 deletions.
90 changes: 84 additions & 6 deletions src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ public class CharacterDefinitionTable : ICollection<CharacterDefinition>
{
private readonly Dictionary<string, CharacterDefinition> _charDefLookup;
private readonly HashSet<CharacterDefinition> _charDefs;
private readonly Dictionary<string, NaturalClass> _naturalClassLookup;

public CharacterDefinitionTable()
{
_charDefLookup = new Dictionary<string, CharacterDefinition>();
_charDefs = new HashSet<CharacterDefinition>();
_naturalClassLookup = new Dictionary<string, NaturalClass>();
}

public string Name { get; set; }
Expand All @@ -43,6 +45,11 @@ public CharacterDefinition AddBoundary(IEnumerable<string> strRep)
return Add(strRep, HCFeatureSystem.Boundary, null);
}

public void AddNaturalClass(NaturalClass naturalClass)
{
_naturalClassLookup[naturalClass.Name] = naturalClass;
}

/// <summary>
/// Adds the character definition.
/// </summary>
Expand Down Expand Up @@ -103,6 +110,9 @@ private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int
var nodesList = new List<ShapeNode>();
int i = 0;
string normalized = str.Normalize(NormalizationForm.FormD);
bool optional = false;
int optionalPos = 0;
int optionalCount = 0;
while (i < normalized.Length)
{
bool match = false;
Expand All @@ -120,15 +130,83 @@ private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int
break;
}
}
if (match) continue;

if (!match)
// Check for pattern language.
// NB: This only happens when the characters don't match.
if (normalized[i] == '[')
{
// Example: [Seg].
// Look for a natural class.
int closePos = normalized.IndexOf("]", i);
if (closePos > 0)
{
string className = normalized.Substring(i + 1, closePos - i - 1);
if (_naturalClassLookup.ContainsKey(className))
{
NaturalClass naturalClass = _naturalClassLookup[className];
var node = new ShapeNode(naturalClass.FeatureStruct);
nodesList.Add(node);
i = closePos + 1;
continue;
}
}
}
else if (normalized[i] == '(')
{
if (i + 1 < normalized.Length && normalized[i + 1] == '[')
{
// The natural class that follows is optional.
// Wait for the close parenthesis to process.
optional = true;
optionalPos = i;
optionalCount = nodesList.Count;
i++;
continue;
}
}
else if (normalized[i] == ')')
{
if (optional && nodesList.Count == optionalCount + 1)
{
// Example: ([Seg]).
// Ill-formed: ([C][V]).
// Make the last node optional.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
optional = false;
i++;
continue;
}
}
else if (normalized[i] == '*')
{
nodes = null;
errorPos = i;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
if (i > 0 && normalized[i - 1] == ']')
{
// Example: [Seg]*.
// Make the last node Kleene star.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
nodesList[nodesList.Count - 1].Annotation.Iterative = true;
i++;
continue;
}
}
// Kleene plus doesn't work because '+' is a boundary marker.

// Failure
nodes = null;
errorPos = i;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
}
if (optional)
{
// The open parenthesis didn't get closed.
nodes = null;
errorPos = optionalPos;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
}
nodes = nodesList;
errorPos = -1;
Expand Down
13 changes: 11 additions & 2 deletions src/SIL.Machine.Morphology.HermitCrab/RootAllomorph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,18 @@ public Segments Segments
public bool IsBound { get; set; }

/// <summary>
/// Does this represent a lexical pattern (e.g. [Seg]+)?
/// Does this represent a lexical pattern (e.g. [Seg]*)?
/// </summary>
public bool IsPattern { get; set; }
public bool IsPattern {
get
{
foreach (var node in _segments.Shape.GetNodes(_segments.Shape.Range))
{
if (node.Annotation.IsNaturalClass) return true;
}
return false;
}
}

protected override bool ConstraintsEqual(Allomorph other)
{
Expand Down
4 changes: 4 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/XmlLanguageLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ private void LoadNaturalClass(XElement natClassElem)

_language.NaturalClasses.Add(nc);
_natClasses[(string)natClassElem.Attribute("id")] = nc;
foreach (var table in _language.CharacterDefinitionTables)
{
table.AddNaturalClass(nc);
}
}

private void LoadPhonologicalRule(XElement pruleElem)
Expand Down
22 changes: 20 additions & 2 deletions src/SIL.Machine/Annotations/Annotation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class Annotation<TOffset>
private FeatureStruct _fs;
private bool _optional;
private bool _iterative;
private bool _isNaturalClass;
private object _data;

public Annotation(Range<TOffset> range, FeatureStruct fs)
Expand Down Expand Up @@ -130,10 +131,27 @@ public bool Optional
}
}

/// <summary>
/// Gets or sets a value indicating whether this annotation represents a natural class.
/// This is used for lexical patterns such as [Seg].
/// </summary>
/// <value>
/// <c>true</c> if this annotation is a natural class, otherwise <c>false</c>.
/// </value>

public bool IsNaturalClass
{
get { return _isNaturalClass; }
set
{
CheckFrozen();
_isNaturalClass = value;
}
}

/// <summary>
/// Gets or sets a value indicating whether this annotation is iterative.
/// This is used in lexical patterns such as [Seg]+:
/// Kleene star = iterative and optional, Kleene plus = iterative and not optional.
/// This is used in lexical patterns such as [Seg]*:
/// </summary>
/// <value>
/// <c>true</c> if this annotation is iterative, otherwise <c>false</c>.
Expand Down
84 changes: 35 additions & 49 deletions tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ public void AnalyzeWord_CanGuess_ReturnsCorrectAnalysis()
begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor
));
shape.AddRange(new List<ShapeNode> { node });
var lexicalPattern = new RootAllomorph(new Segments(Table1, "", shape));
var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" };
Table1.AddNaturalClass(naturalClass);
var lexicalPattern = new RootAllomorph(new Segments(Table1, "[Any]*"));

var morpher = new Morpher(TraceManager, Language);
morpher.LexicalPatterns.Add(lexicalPattern);
Expand Down Expand Up @@ -187,21 +189,13 @@ public void TestMatchNodesWithPattern()
FeatureValue valueA = new StringFeatureValue("A");
FeatureValue valueB = new StringFeatureValue("B");
FeatureStruct fs1A = new FeatureStruct();
FeatureStruct fs1B = new FeatureStruct();
FeatureStruct fs2B = new FeatureStruct();
fs1A.AddValue(feat1, valueA);
fs1B.AddValue(feat1, valueB);
fs2B.AddValue(feat2, valueB);

// Test feature matching.
List<ShapeNode> nodesfs1A = new List<ShapeNode> { new ShapeNode(fs1A) };
List<ShapeNode> nodesfs1B = new List<ShapeNode> { new ShapeNode(fs1B) };
List<ShapeNode> nodesfs2B = new List<ShapeNode> { new ShapeNode(fs2B) };
Assert.That(morpher.MatchNodesWithPattern(nodesfs1A, nodesfs1B), Is.Empty);
Assert.That(
morpher.MatchNodesWithPattern(nodesfs1A, nodesfs1A),
Is.EqualTo(new List<List<ShapeNode>> { nodesfs1A })
);
var fs1A2B = morpher.MatchNodesWithPattern(nodesfs1A, nodesfs2B);
Assert.That(
fs1A2B.ToList()[0][0].Annotation.FeatureStruct.GetValue(feat1).ToString(),
Expand All @@ -212,78 +206,70 @@ public void TestMatchNodesWithPattern()
Is.EqualTo(valueB.ToString())
);

List<ShapeNode> noNodes = new List<ShapeNode> { };
List<ShapeNode> oneNode = new List<ShapeNode> { new ShapeNode(fs1A) };
List<ShapeNode> twoNodes = new List<ShapeNode> { new ShapeNode(fs1A), new ShapeNode(fs1A) };
List<ShapeNode> threeNodes = new List<ShapeNode>
{
new ShapeNode(fs1A),
new ShapeNode(fs1A),
new ShapeNode(fs1A)
};
List<ShapeNode> fourNodes = new List<ShapeNode>
{
new ShapeNode(fs1A),
new ShapeNode(fs1A),
new ShapeNode(fs1A),
new ShapeNode(fs1A)
};
IList<ShapeNode> noNodes = GetNodes("");
IList<ShapeNode> oneNode = GetNodes("a");
IList<ShapeNode> twoNodes = GetNodes("aa");
IList<ShapeNode> threeNodes = GetNodes("aaa");
IList<ShapeNode> fourNodes = GetNodes("aaaa");
var naturalClass = new NaturalClass(new FeatureStruct()) { Name = "Any" };
Table2.AddNaturalClass(naturalClass);

// Test sequences.
Assert.That(morpher.MatchNodesWithPattern(oneNode, GetNodes("i")), Is.Empty);
Assert.That(
morpher.MatchNodesWithPattern(oneNode, oneNode),
Is.EqualTo(new List<IList<ShapeNode>> { oneNode })
);
Assert.That(
morpher.MatchNodesWithPattern(twoNodes, twoNodes),
Is.EquivalentTo(new List<List<ShapeNode>> { twoNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { twoNodes })
);
Assert.That(
morpher.MatchNodesWithPattern(threeNodes, threeNodes),
Is.EquivalentTo(new List<List<ShapeNode>> { threeNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { threeNodes })
);

// Test optionality.
ShapeNode optionalNode = new ShapeNode(fs1A);
optionalNode.Annotation.Optional = true;
List<ShapeNode> optionalPattern = new List<ShapeNode> { optionalNode };
IList<ShapeNode> optionalPattern = GetNodes("([Any])");
Assert.That(
morpher.MatchNodesWithPattern(noNodes, optionalPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { noNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { noNodes })
);
Assert.That(
morpher.MatchNodesWithPattern(oneNode, optionalPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { oneNode })
Is.EquivalentTo(new List<IList<ShapeNode>> { oneNode })
);
Assert.That(morpher.MatchNodesWithPattern(twoNodes, optionalPattern), Is.Empty);

// Test Kleene star.
ShapeNode starNode = new ShapeNode(fs1A);
starNode.Annotation.Optional = true;
starNode.Annotation.Iterative = true;
List<ShapeNode> starPattern = new List<ShapeNode> { starNode };
IList<ShapeNode> starPattern = GetNodes("[Any]*");
Assert.That(
morpher.MatchNodesWithPattern(noNodes, starPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { noNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { noNodes })
);
var result = morpher.MatchNodesWithPattern(oneNode, starPattern);
Assert.That(
morpher.MatchNodesWithPattern(oneNode, starPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { oneNode })
Is.EquivalentTo(new List<IList<ShapeNode>> { oneNode })
);
Assert.That(
morpher.MatchNodesWithPattern(twoNodes, starPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { twoNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { twoNodes })
);

// Test Kleene plus.
ShapeNode plusNode = new ShapeNode(fs1A);
plusNode.Annotation.Iterative = true;
List<ShapeNode> plusPattern = new List<ShapeNode> { plusNode };
// Test Kleene plus look alike ("+" is a boundary marker).
IList<ShapeNode> plusPattern = GetNodes("[Any]+");
Assert.That(morpher.MatchNodesWithPattern(noNodes, plusPattern), Is.Empty);
Assert.That(
morpher.MatchNodesWithPattern(oneNode, plusPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { oneNode })
);
Assert.That(
morpher.MatchNodesWithPattern(twoNodes, plusPattern),
Is.EquivalentTo(new List<List<ShapeNode>> { twoNodes })
Is.EquivalentTo(new List<IList<ShapeNode>> { oneNode })
);
Assert.That(morpher.MatchNodesWithPattern(twoNodes, plusPattern), Is.Empty);
}

IList<ShapeNode> GetNodes(string pattern)
{
// Use Table2 because it has boundaries defined.
Shape shape = new Segments(Table2, pattern).Shape;
return shape.GetNodes(shape.Range).ToList();
}
}

0 comments on commit cd10e0f

Please sign in to comment.