diff --git a/Andaluh.sln b/Andaluh.sln
index 49aef28..b3a551f 100644
--- a/Andaluh.sln
+++ b/Andaluh.sln
@@ -15,21 +15,77 @@ EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
+ Debug|ARM = Debug|ARM
+ Debug|ARM64 = Debug|ARM64
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
+ Release|ARM = Release|ARM
+ Release|ARM64 = Release|ARM64
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|ARM.ActiveCfg = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|ARM.Build.0 = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|ARM64.ActiveCfg = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|ARM64.Build.0 = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|x64.Build.0 = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Debug|x86.Build.0 = Debug|Any CPU
{25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|Any CPU.Build.0 = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|ARM.ActiveCfg = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|ARM.Build.0 = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|ARM64.ActiveCfg = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|ARM64.Build.0 = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|x64.ActiveCfg = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|x64.Build.0 = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|x86.ActiveCfg = Release|Any CPU
+ {25F19C8A-6301-41E7-B127-90387F73ABF6}.Release|x86.Build.0 = Release|Any CPU
{E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|ARM.ActiveCfg = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|ARM.Build.0 = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|ARM64.ActiveCfg = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|ARM64.Build.0 = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|x64.Build.0 = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Debug|x86.Build.0 = Debug|Any CPU
{E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|ARM.ActiveCfg = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|ARM.Build.0 = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|ARM64.ActiveCfg = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|ARM64.Build.0 = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|x64.ActiveCfg = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|x64.Build.0 = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|x86.ActiveCfg = Release|Any CPU
+ {E0097AB9-0B72-4D41-81BB-53D64A282CB1}.Release|x86.Build.0 = Release|Any CPU
{3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|ARM.ActiveCfg = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|ARM.Build.0 = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|ARM64.ActiveCfg = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|ARM64.Build.0 = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|x64.Build.0 = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Debug|x86.Build.0 = Debug|Any CPU
{3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|Any CPU.Build.0 = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|ARM.ActiveCfg = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|ARM.Build.0 = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|ARM64.ActiveCfg = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|ARM64.Build.0 = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|x64.ActiveCfg = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|x64.Build.0 = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|x86.ActiveCfg = Release|Any CPU
+ {3A542431-3F5D-4F90-9151-58B13FE5BBFA}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/Andaluh/Andaluh.csproj b/Andaluh/Andaluh.csproj
index 3072221..81fb875 100644
--- a/Andaluh/Andaluh.csproj
+++ b/Andaluh/Andaluh.csproj
@@ -1,10 +1,24 @@
- netcoreapp3.1
+ netstandard2.0
8.0
Library
-
+ true
+ false
+ GPL-3.0-or-later
+ Chan (aburrio@outlook.com) | AndaluGeeks
+ AndaluGeeks
+ Use this extension to transcript any spanish text to Andaluh
+ Copyright (C) AndaluGeeks 2020
+ https://andaluh.es/
+ https://github.com/andalugeeks/andaluh-net
+ Transliteration, Transcription, Andaluz, Andalu, Andalûh, Andalú
+ Transcriptor Andaluh
+ 1.0.3
+ 1.0.3.0
+ 1.0.3.0
+ Downgraded framework version to increase compatibility
@@ -13,4 +27,9 @@
+
+
+
+
+
diff --git a/Andaluh/EPA.cs b/Andaluh/EPA.cs
index 73d1600..aa924e2 100644
--- a/Andaluh/EPA.cs
+++ b/Andaluh/EPA.cs
@@ -5,7 +5,7 @@ namespace Andaluh
{
public static class EPA
{
- public static string Transcribe(this string text, string vaf = "VAF", string vvf = "VVF") =>
+ public static string Transcribe(this string text) =>
text.IsNullOrEmpty() ? string.Empty : new EPAEngine().Transcribe(text);
public static string ToAndaluh(this string text) => text.Transcribe();
diff --git a/Andaluh/EPAEngine.cs b/Andaluh/EPAEngine.cs
index 4418c8b..3997daa 100644
--- a/Andaluh/EPAEngine.cs
+++ b/Andaluh/EPAEngine.cs
@@ -31,8 +31,6 @@ internal class EPAEngine
new WordInteractionRules()
};
-
-
public string Transcribe(string text)
{
var tokenizedString = new TokenEvaluator(text);
diff --git a/Andaluh/Extensions/CharExtensions.cs b/Andaluh/Extensions/CharExtensions.cs
index a2a8bb5..038bc1d 100644
--- a/Andaluh/Extensions/CharExtensions.cs
+++ b/Andaluh/Extensions/CharExtensions.cs
@@ -11,7 +11,7 @@ public static char GetVowelTilde(this char vowel)
// If no tilde, replace with circumflex
if (i != -1) return Constants.VOWELS_ALL_TILDE[i];
- if (Constants.VOWELS_ALL_TILDE.Contains(vowel)) return vowel;
+ if (Constants.VOWELS_ALL_TILDE.Contains(vowel.ToString())) return vowel;
return '#';
}
@@ -23,7 +23,7 @@ public static char GetVowelCircumflex(this char vowel)
if (i != -1) return Constants.VOWELS_ALL_NOTILDE[i + 5];
- if (Constants.VOWELS_ALL_TILDE.Contains(vowel)) return vowel;
+ if (Constants.VOWELS_ALL_TILDE.Contains(vowel.ToString())) return vowel;
return '#';
}
diff --git a/Andaluh/Extensions/MatchCollectionExtensions.cs b/Andaluh/Extensions/MatchCollectionExtensions.cs
new file mode 100644
index 0000000..ee94ae2
--- /dev/null
+++ b/Andaluh/Extensions/MatchCollectionExtensions.cs
@@ -0,0 +1,18 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Text.RegularExpressions;
+
+namespace Andaluh.Extensions
+{
+ public static class MatchCollectionExtensions
+ {
+ public static bool Any(this MatchCollection matches) => matches.Count != 0;
+ public static IEnumerable Where(this MatchCollection matches, Func func)
+ {
+ foreach (Match match in matches)
+ if (func(match)) yield return match;
+ }
+ }
+}
diff --git a/Andaluh/Extensions/StringExtensions.cs b/Andaluh/Extensions/StringExtensions.cs
index 6b130d3..611ed4f 100644
--- a/Andaluh/Extensions/StringExtensions.cs
+++ b/Andaluh/Extensions/StringExtensions.cs
@@ -38,20 +38,10 @@ public static string ReplaceFirst(this string text, Match match, string replace,
public static string GetWholeWord(this string text, int index)
{
- int startIndex, endIndex;
- startIndex = text.GetWordStartIndex(index);
- endIndex = text.GetWordEndIndex(index);
- return text.Substring(startIndex, endIndex - startIndex);
- }
-
- public static int GetWordEndIndex(this string text, int index)
- {
- if (index >= text.Length) return text.Length;
+ var startIndex = text.GetWordStartIndex(index);
+ var endIndex = text.GetWordEndIndex(index);
- for (int i = index; i < text.Length; i++)
- if (Constants.CARACTERES_NO_PALABRA.Any(c => c == text[i])) return i;
-
- return text.Length;
+ return text.Substring(startIndex, endIndex - startIndex);
}
public static int GetWordStartIndex(this string text, int index)
@@ -63,18 +53,32 @@ public static int GetWordStartIndex(this string text, int index)
return 0;
}
+
+ public static int GetWordEndIndex(this string text, int index)
+ {
+ if (index >= text.Length) return text.Length;
+
+ for (int i = index; i < text.Length; i++)
+ if (Constants.CARACTERES_NO_PALABRA.Any(c => c == text[i])) return i;
+
+ return text.Length;
+ }
+
public static string GetPrefix(this string text, Match match, int bias)
{
- var palabra = text.GetWholeWord(match.Index + bias);
+ var matchIndex = match.Index + bias;
+ var startIndex = text.GetWordStartIndex(matchIndex);
- return palabra.Substring(0, palabra.IndexOf(match.Value));
+ return text.Substring(startIndex, matchIndex - startIndex);
}
public static string GetSuffix(this string text, Match match, int bias)
{
- var palabra = text.GetWholeWord(match.Index + bias);
+ var matchIndex = match.Index + bias;
+
+ var endIndex = text.GetWordEndIndex(matchIndex);
- return palabra.Substring(palabra.IndexOf(match.Value) + match.Value.Length);
+ return text.Substring(matchIndex + match.Value.Length, endIndex - matchIndex - match.Value.Length);
}
public static string KeepCase(this string word, string replacement_word)
@@ -104,5 +108,14 @@ public static string ReplaceFirstKeepingCase(this string text, string search, st
}
public static bool IsNullOrEmpty(this string str) => str == null || str.Trim().Length == 0;
+
+ public static string GetRange(this string text, int start, int end) =>
+ text.Substring(start, end - start);
+
+ public static string GetRangeMinusRight(this string text, int start, int minusEnd) =>
+ text.Substring(start, text.Length - minusEnd);
+
+ public static char GetCharMinusRight(this string text, int minusEnd) =>
+ text[text.Length - minusEnd];
}
}
\ No newline at end of file
diff --git a/Andaluh/Rules/Base/Rule.cs b/Andaluh/Rules/Base/Rule.cs
index 87447a8..fedfbdf 100644
--- a/Andaluh/Rules/Base/Rule.cs
+++ b/Andaluh/Rules/Base/Rule.cs
@@ -30,6 +30,7 @@ public string Execute(Dictionary dynamicRuleExceptions, string t
return ReplaceMany(text);
}
+
private string ReplaceMany(string text)
{
var matches = Pattern?.Matches(text);
@@ -37,7 +38,7 @@ private string ReplaceMany(string text)
var bias = 0;
- foreach (Match match in matches.Where(x=>x.Success))
+ foreach (Match match in matches.Where(x => x.Success))
{
if (NotException(match, text, bias))
{
@@ -52,7 +53,7 @@ private string ReplaceMany(string text)
private bool NotException(Match match, string text, int bias) => !IsException(text.GetWholeWord(match.Index + bias));
- private bool IsException(string palabra) =>
+ private bool IsException(string palabra) =>
Exceptions?.ContainsKey(palabra.ToLower()) == true ||
DynamicRuleExceptions?.ContainsKey(palabra.ToLower()) == true;
diff --git a/Andaluh/Rules/Base/RuleBundle.cs b/Andaluh/Rules/Base/RuleBundle.cs
index 565d929..ff2b004 100644
--- a/Andaluh/Rules/Base/RuleBundle.cs
+++ b/Andaluh/Rules/Base/RuleBundle.cs
@@ -5,6 +5,7 @@ namespace Andaluh.Rules.Base
internal abstract class RuleBundle
{
protected readonly Dictionary DynamicRuleExceptions;
+ protected Dictionary DelayedAfterRuleDynamicRuleExceptions;
protected abstract IEnumerable Rules { get; }
public RuleBundle(Dictionary dynamicRuleExceptions = null)
@@ -14,9 +15,24 @@ public RuleBundle(Dictionary dynamicRuleExceptions = null)
public string Execute(string text)
{
foreach (var rule in Rules)
+ {
+ DelayedAfterRuleDynamicRuleExceptions = new Dictionary();
text = rule.Execute(DynamicRuleExceptions, text);
+ UpdateDynamicRulesAfterCurrentRule();
+ }
return text;
}
+
+ private void UpdateDynamicRulesAfterCurrentRule()
+ {
+ foreach (var exception in DelayedAfterRuleDynamicRuleExceptions)
+ {
+ if (!DynamicRuleExceptions.ContainsKey(exception.Key))
+ DynamicRuleExceptions.Add(exception.Key, exception.Value);
+ else DynamicRuleExceptions[exception.Key] = exception.Value;
+ }
+ }
+
}
}
diff --git a/Andaluh/Rules/Base/RuleConstants.cs b/Andaluh/Rules/Base/RuleConstants.cs
new file mode 100644
index 0000000..276e993
--- /dev/null
+++ b/Andaluh/Rules/Base/RuleConstants.cs
@@ -0,0 +1,9 @@
+using System.Text.RegularExpressions;
+
+namespace Andaluh.Rules.Base
+{
+ public static class RuleConstants
+ {
+ public static readonly Regex pattern_begin_lh = new Regex(@"(?i)\b[aáeéiíoóuú](lh)[aáeéiíoóuú]");
+ }
+}
diff --git a/Andaluh/Rules/DigraphRules.cs b/Andaluh/Rules/DigraphRules.cs
index 1c4ec57..b164ca6 100644
--- a/Andaluh/Rules/DigraphRules.cs
+++ b/Andaluh/Rules/DigraphRules.cs
@@ -11,17 +11,28 @@ internal class DigraphRules : RuleBundle
private static readonly Regex pattern_digraph_special_2 = new Regex("(?i)(tr|p)([ao])(?:ns|st)([bcçdfghjklmnpqstvwxyz])");
private static readonly Regex pattern_digraph_special_3 = new Regex("(?i)([aeiouáéíóú])([bdnr])(s)([bcçdfghjklmnpqstvwxyz])");
private static readonly Regex pattern_digraph_special_4 = new Regex("(?i)([aeiouáéíóú])[djrstxz](l)");
- private static readonly Regex pattern_digraph_general = new Regex("(?i)([aeiouáéíóú])(" + string.Join("|", Constants.DIGRAPHS) + ")");
+ private static readonly Regex pattern_digraph_general = new Regex(@"(?i)([aeiouáéíóú])(" + string.Join("|", Constants.DIGRAPHS) + ")");
+ private readonly Dictionary Digraph_RULES_EXCEPT = new Dictionary();
protected override IEnumerable Rules => new[]
{
new Rule(pattern_digraph_special_1, digraph_special1_rules_replacer),
new Rule(pattern_digraph_special_2, digraph_special2_rules_replacer),
new Rule(pattern_digraph_special_3, digraph_special3_rules_replacer),
- new Rule(pattern_digraph_special_4, digraph_special4_rules_replacer),
- new Rule(pattern_digraph_general, digraph_general_rules_replacer)
+ new Rule(RuleConstants.pattern_begin_lh, exceptuar_patron),
+ new Rule(pattern_digraph_special_4, digraph_special4_rules_replacer, Digraph_RULES_EXCEPT),
+ new Rule(pattern_digraph_general, digraph_general_rules_replacer, Digraph_RULES_EXCEPT)
};
+ private string exceptuar_patron(Match match, string text, int bias)
+ {
+ var palabra = text.GetWholeWord(match.Index + bias);
+ if (!Digraph_RULES_EXCEPT.ContainsKey(palabra))
+ Digraph_RULES_EXCEPT.Add(palabra, palabra);
+
+ return match.Value;
+ }
+
private string digraph_special1_rules_replacer(Match match, string text, int bias) =>
match.Value[1] switch
{
@@ -34,7 +45,7 @@ private string digraph_special2_rules_replacer(Match match, string text, int bia
{
string init_char = match.Groups[1].Value;
char vowel_char = match.Groups[2].Value[0];
- char cons_char = match.Groups[0].Value[^1];
+ char cons_char = match.Groups[0].Value.GetCharMinusRight(1);
return cons_char.ToLower() == 'l' ?
init_char + vowel_char.apply_repl_rules() + cons_char + "-" + cons_char :
@@ -46,7 +57,7 @@ private string digraph_special3_rules_replacer(Match match, string text, int bia
var vowel_char = match.Value[0].ToString();
var cons_char = match.Value[1].ToString();
var s_char = match.Value[2];
- var digraph_char = match.Value[^1];
+ var digraph_char = match.Value.GetCharMinusRight(1);
return cons_char.ToLower() == "r" && s_char.ToLower() == 's' ?
vowel_char + cons_char + digraph_char + digraph_char :
@@ -56,7 +67,7 @@ private string digraph_special3_rules_replacer(Match match, string text, int bia
private string digraph_special4_rules_replacer(Match match, string text, int bias)
{
var vowel_char = match.Value[0].ToString();
- var digraph_char = match.Value[^1];
+ var digraph_char = match.Value.GetCharMinusRight(1);
return vowel_char.apply_repl_rules() + digraph_char + "-" + digraph_char;
}
diff --git a/Andaluh/Rules/FinalRules.cs b/Andaluh/Rules/FinalRules.cs
index 8b234cb..dde8efd 100644
--- a/Andaluh/Rules/FinalRules.cs
+++ b/Andaluh/Rules/FinalRules.cs
@@ -7,8 +7,9 @@ namespace Andaluh.Rules
{
internal class FinalRules : RuleBundle
{
- private static readonly Regex pattern_ador = new Regex(@"(?i)\w+(adôh|edôh|idá)");
- private static readonly Regex pattern_dura = new Regex(@"(?i)\w+(dura|dero|dera|dora)");
+ private static readonly Regex pattern_ador = new Regex(@"(?i)(adôh|edôh|idá)\b");
+ private static readonly Regex pattern_dura = new Regex(@"(?i)(\w)(dura|durâ|duro|dero|durô|derô|dera|dora|derâ|dorâ)\b");
+ private static readonly Regex pattern_deder = new Regex(@"(?i)(b|d)(eder)([aeiouáâçéíóú])\b");
private readonly Dictionary ADOR_RULES_EXCEPT = new Dictionary()
{
@@ -19,18 +20,30 @@ internal class FinalRules : RuleBundle
protected override IEnumerable Rules => new[]
{
new Rule(pattern_ador, FinalesAdor, ADOR_RULES_EXCEPT),
- new Rule(pattern_dura, FinalesDura, null)
+ new Rule(pattern_dura, FinalesDura, null),
+ new Rule(pattern_deder, FinalesDeder, null)
};
- private string FinalesAdor(Match match, string text, int bias)=>
- match.Groups[0].Value[0..^match.Groups[1].Length] + match.Groups[1].Value[0] + match.Groups[1].Value[2..];
+ private string FinalesAdor(Match match, string text, int bias) =>
+ match.Groups[0].Value.GetRangeMinusRight(0, match.Groups[1].Length) + match.Groups[1].Value[0] + match.Groups[1].Value.Substring(2);
private string FinalesDura(Match match, string text, int bias)
{
- var prefijo = match.Groups[0].Value[0..^4];
- var vocalAcentuada = match.Groups[1].Value[1].KeepCase(match.Groups[1].Value[1].GetVowelTilde());
- var final = match.Groups[1].Value[2..4];
+ var charBefore = match.Groups[1].Value;
+ if (charBefore == "n" || charBefore == "r") return match.Groups[0].Value;
- return prefijo + vocalAcentuada + final;
+ var prefijo = match.Groups[0].Value.GetCharMinusRight(3);
+ var vocalAcentuada = prefijo.KeepCase(prefijo.GetVowelTilde());
+ var final = match.Groups[0].Value.GetRange(3, 5);
+
+ return charBefore + vocalAcentuada + final;
+ }
+
+ private string FinalesDeder(Match match, string text, int bias)
+ {
+ var prefijo = match.Groups[0].Value.Substring(0, match.Groups[0].Value.IndexOf(match.Groups[1].Value));
+ var reemplazo = match.Groups[2].Value.KeepCase("eér");
+
+ return prefijo + match.Groups[1].Value + reemplazo + match.Groups[3].Value;
}
public FinalRules() : base()
diff --git a/Andaluh/Rules/GJRules.cs b/Andaluh/Rules/GJRules.cs
index bf3469e..7f4ae4b 100644
--- a/Andaluh/Rules/GJRules.cs
+++ b/Andaluh/Rules/GJRules.cs
@@ -7,6 +7,8 @@ namespace Andaluh.Rules
{
internal class GJRules : RuleBundle
{
+ private static readonly Regex pattern_lge = new Regex("(?i)(lge|lgé|lgi|lgí)");
+ private static readonly Regex pattern_lj = new Regex("(?i)(lj)");
private static readonly Regex pattern_gj = new Regex("(?i)(g(?=[eiéí])|j)([aeiouáéíóú])");
private static readonly Regex pattern_gue_gui = new Regex("(?i)(g)u([eiéí])");
private static readonly Regex pattern_guue_guui = new Regex("(?i)(g)(ü)([eiéí])");
@@ -22,6 +24,8 @@ internal class GJRules : RuleBundle
protected override IEnumerable Rules => new[]
{
+ new Rule(pattern_lge, lge_rules_replacer),
+ new Rule(pattern_lj, lj_rules_replacer),
new Rule(pattern_gj, gj_rules_replacer, GJ_RULES_EXCEPT),
new Rule(pattern_gue_gui, gue_gui_rules_replacer), //DynamicRuleExceptions
new Rule(pattern_guue_guui, guue_guui_rules_replacer),
@@ -29,12 +33,14 @@ internal class GJRules : RuleBundle
new Rule(pattern_guel_gues, guel_gues_rules_replacer)
};
+ private string lge_rules_replacer(Match match, string text, int bias)
+ => match.Value[0].KeepCase('r') + match.Value[1].KeepCase('h') + match.Value[2];
- private string gj_rules_replacer(Match match, string text, int bias)
- {
- string x_correct_capitalization = match.Value[0].IsUpperCase() ? Constants.VVF_mayus : Constants.VVF;
- return x_correct_capitalization + match.Value[1];
- }
+ private string lj_rules_replacer(Match match, string text, int bias)
+ => match.Value[0].KeepCase('r') + match.Value[1].KeepCase('h');
+
+ private string gj_rules_replacer(Match match, string text, int bias) =>
+ match.Value[0].KeepCase(Constants.VVF[0]) + match.Value[1];
private string gue_gui_rules_replacer(Match match, string text, int bias) =>
match.Value[0].ToString() + match.Value[2].ToString();
diff --git a/Andaluh/Rules/HRules.cs b/Andaluh/Rules/HRules.cs
index 86c5010..ab56828 100644
--- a/Andaluh/Rules/HRules.cs
+++ b/Andaluh/Rules/HRules.cs
@@ -8,11 +8,10 @@ namespace Andaluh.Rules
{
internal class HRules : RuleBundle
{
- private static readonly Regex pattern_aha = new Regex("(?i)(aha|aho)");
- //private static readonly Regex pattern_aha = new Regex("(?i)([aá])(h)([aá])");
+ private static readonly Regex pattern_aha = new Regex("(?i)([aá]h[aáeéíuú]|aho(?!rr|ra|ri)|ehe|ehi(?!sto)|oho|ih[ií]|uhu)");
private static readonly Regex pattern_h_general = new Regex("(?i)(? H_RULES_EXCEPT = new Dictionary()
{
@@ -27,13 +26,14 @@ internal class HRules : RuleBundle
protected override IEnumerable Rules => new[]
{
+ new Rule(RuleConstants.pattern_begin_lh, exceptuar_patron),
new Rule(pattern_h_hua, h_hua_rules_replacer),
new Rule(pattern_h_hue, h_hue_rules_replacer),
- new Rule(pattern_aha, exceptuar_aha),
+ new Rule(pattern_aha, exceptuar_patron),
new Rule(pattern_h_general, h_rules_replacer, H_RULES_EXCEPT)
};
- private string exceptuar_aha(Match match, string text, int bias)
+ private string exceptuar_patron(Match match, string text, int bias)
{
var palabra = text.GetWholeWord(match.Index + bias);
if (!H_RULES_EXCEPT.ContainsKey(palabra))
@@ -44,7 +44,7 @@ private string exceptuar_aha(Match match, string text, int bias)
private string h_hue_rules_replacer(Match match, string text, int bias)
{
- string g_correct_capitalization = match.Value[0].IsUpperCase() ? "G" : "g";
+ string g_correct_capitalization = match.Value[0].KeepCase('g');
var result = g_correct_capitalization + match.Value.Substring(1);
AddTransliteratedWordAsExceptionForGueGui(match, text, bias, result);
@@ -56,8 +56,8 @@ private void AddTransliteratedWordAsExceptionForGueGui(Match match, string text,
{
var palabra = text.GetWholeWord(match.Index + bias);
var newWord = palabra.Replace(match.Value, result).ToLower();
- if (!DynamicRuleExceptions.ContainsKey(newWord))
- DynamicRuleExceptions.Add(newWord, newWord);
+ if (!DelayedAfterRuleDynamicRuleExceptions.ContainsKey(newWord))
+ DelayedAfterRuleDynamicRuleExceptions.Add(newWord, newWord);
}
private string h_hua_rules_replacer(Match match, string text, int bias)
diff --git a/Andaluh/Rules/LRules.cs b/Andaluh/Rules/LRules.cs
index f87a8c6..f3ff0d5 100644
--- a/Andaluh/Rules/LRules.cs
+++ b/Andaluh/Rules/LRules.cs
@@ -9,15 +9,26 @@ internal class LRules : RuleBundle
{
private static readonly Regex pattern_l = new Regex("(?i)(l)([bcçgsdfghkmpqrtxz])");
+ private readonly Dictionary L_RULES_EXCEPT = new Dictionary();
+
protected override IEnumerable Rules => new[]
{
- new Rule(pattern_l, l_rules_replacer)
+ new Rule(RuleConstants.pattern_begin_lh, exceptuar_patron),
+ new Rule(pattern_l, l_rules_replacer, L_RULES_EXCEPT)
};
+ private string exceptuar_patron(Match match, string text, int bias)
+ {
+ var palabra = text.GetWholeWord(match.Index + bias);
+ if (!L_RULES_EXCEPT.ContainsKey(palabra))
+ L_RULES_EXCEPT.Add(palabra, palabra);
+
+ return match.Value;
+ }
+
private string l_rules_replacer(Match match, string text, int bias) =>
match.Value[0].KeepCase('r') + match.Value[1];
-
public LRules() : base()
{ }
}
diff --git a/Andaluh/Rules/WordEndingRules.cs b/Andaluh/Rules/WordEndingRules.cs
index 24d7dd3..41b556a 100644
--- a/Andaluh/Rules/WordEndingRules.cs
+++ b/Andaluh/Rules/WordEndingRules.cs
@@ -8,12 +8,13 @@ namespace Andaluh.Rules
{
internal class WordEndingRules : RuleBundle
{
- private static readonly Regex pattern_intervowel_d_end = new Regex(@"(?i)([aiíÍ])(d)([oa])(s?)\b");
+ private static readonly Regex pattern_intervowel_d_end_exceptions = new Regex(@"(?i)[áéíóú][^aeiouáéíóú]\b");
+ private static readonly Regex pattern_intervowel_d_end = new Regex(@"(?i)([aií])(d)([oa])(s?)\b");
private static readonly Regex pattern_eps_end = new Regex("(?i)(e)(ps)");
private static readonly Regex pattern_d_end = new Regex(@"(?i)([aeiouáéíóú])(d)\b");
private static readonly Regex pattern_s_end = new Regex(@"(?i)([aeiouáéíóú])(s)\b");
private static readonly Regex pattern_const_end = new Regex(@"(?i)([aeiouáâçéíóú])([bcfgjkprtxz]\b)");
- private static readonly Regex pattern_l_end = new Regex(@"(?i)([aeiouáâçéíóú])(l\b)");
+ private static readonly Regex pattern_l_end = new Regex(@"(?i)([aeiouáâçéíóú])l\b");
private static readonly Regex pattern_vocal_tilde = new Regex("(?i)á|é|í|ó|ú");
private static readonly Dictionary WORDEND_D_INTERVOWEL_RULES_EXCEPT = new Dictionary()
@@ -89,7 +90,6 @@ internal class WordEndingRules : RuleBundle
private static readonly Dictionary WORDEND_CONST_RULES_EXCEPT = new Dictionary()
{
{"al", "al"},
- {"cual", "cuâ"},
{"del", "del"},
{"dél", "dél"},
{"el", "el"},
@@ -114,8 +114,8 @@ internal class WordEndingRules : RuleBundle
new Rule(pattern_eps_end, eps_end_rules_replacer),
new Rule(pattern_d_end, d_end_rules_replacer, WORDEND_D_RULES_EXCEPT),
new Rule(pattern_s_end, s_end_rules_replacer, WORDEND_S_RULES_EXCEPT),
- new Rule(pattern_const_end, const_end_rules_replacer, WORDEND_CONST_RULES_EXCEPT),
- new Rule(pattern_l_end, const_end_rules_replacer, WORDEND_CONST_RULES_EXCEPT)
+ new Rule(pattern_l_end, const_end_rules_replacer, WORDEND_CONST_RULES_EXCEPT),
+ new Rule(pattern_const_end, const_end_rules_replacer, WORDEND_CONST_RULES_EXCEPT)
};
private bool contain_vocal_tilde(string text) => pattern_vocal_tilde.Match(text).Success;
@@ -123,10 +123,15 @@ internal class WordEndingRules : RuleBundle
private string intervowel_d_end_rules_replacer(Match match, string text, int bias)
{
+ var prefix = text.GetPrefix(match, bias);
+
+ if (pattern_intervowel_d_end_exceptions.IsMatch(prefix)) return match.Value;
+
var firstVowel = match.Value[0];
var lastVowel = match.Value[2];
- if (contain_vocal_tilde(firstVowel)) return match.Value;
+
+ if (contain_vocal_tilde(prefix)) return match.Value;
switch (match.Value)
{
@@ -190,7 +195,7 @@ private string const_end_rules_replacer(Match match, string text, int bias)
if (contain_vocal_tilde(prefix)) return suffixFirstChar.apply_repl_rules();
- return contain_vocal_tilde(suffixFirstChar) ?
+ return suffixFirstChar != 'í' && suffixFirstChar != 'ú' && contain_vocal_tilde(suffixFirstChar) ?
suffixFirstChar.apply_repl_rules() :
suffixFirstChar.apply_repl_rules() + suffixFirstChar.KeepCase('h');
}
diff --git a/Andaluh/Rules/WordInteractionRules.cs b/Andaluh/Rules/WordInteractionRules.cs
index f8dd3f2..c53b93e 100644
--- a/Andaluh/Rules/WordInteractionRules.cs
+++ b/Andaluh/Rules/WordInteractionRules.cs
@@ -16,8 +16,8 @@ internal class WordInteractionRules : RuleBundle
private static string word_interaction_rules_replacer(Match match, string text, int bias)=>
match.Value[0].ToLower() == 'd' ?
- match.Value[0..2] + (match.Value[2].IsUpperCase() ? "R" : "r") + match.Value[3..] :
- match.Value[0] + (match.Value[1].IsUpperCase() ? "R" : "r") + match.Value[2..];
+ match.Value.GetRange(0, 2) + (match.Value[2].IsUpperCase() ? "R" : "r") + match.Value.Substring(3) :
+ match.Value[0] + (match.Value[1].IsUpperCase() ? "R" : "r") + match.Value.Substring(2);
public WordInteractionRules() : base()
{ }
diff --git a/Andaluh/SentenceMethods/SentenceExceptionConstants.cs b/Andaluh/SentenceMethods/SentenceExceptionConstants.cs
index 20aebcd..c55c906 100644
--- a/Andaluh/SentenceMethods/SentenceExceptionConstants.cs
+++ b/Andaluh/SentenceMethods/SentenceExceptionConstants.cs
@@ -4,8 +4,14 @@ namespace Andaluh.SentenceMethods
{
public static class SentenceExceptions
{
- public static Dictionary Exceptions = new Dictionary
+ private static string[] TradeMarks = new string[]
+ {
+ "google", "twitter", "facebook", "outlook"
+ };
+
+ private static Dictionary Exceptions = new Dictionary
{
+ { "et", "et" },
{ "a capela","a capela"},
{ "a contráriis","a contrárî"},
{ "a contrario sensu","a contrario çençu"},
@@ -45,6 +51,7 @@ public static class SentenceExceptions
{ "ad referéndum","ârreferendum"},
{ "ad tempus","âttempû"},
{ "ad valórem","âbbalórem"},
+ { "álter ego", "árterego" },
{ "ex abrupto","ehabrûtto" },
{ "ex aequo","ehaecuo" },
{ "ex cáthedra","êccátedra" },
@@ -54,5 +61,31 @@ public static class SentenceExceptions
{ "ut supra","ut çupra" },
{ "vox pópuli", "bôppópuli" }
};
+
+ public static Dictionary allExceptions;
+
+ public static Dictionary AllExceptions
+ {
+ get
+ {
+ if (allExceptions == null)
+ allExceptions = CreateAllExceptions();
+
+ return allExceptions;
+ }
+ }
+ private static Dictionary CreateAllExceptions()
+ {
+ var allExceptions = new Dictionary();
+
+ foreach (var exception in Exceptions)
+ allExceptions.Add(exception.Key, exception.Value);
+
+ foreach (var tradeMark in TradeMarks)
+ allExceptions.Add(tradeMark, tradeMark);
+
+
+ return allExceptions;
+ }
}
}
diff --git a/Andaluh/SentenceMethods/Token.cs b/Andaluh/SentenceMethods/Token.cs
index 4c9820b..d71c239 100644
--- a/Andaluh/SentenceMethods/Token.cs
+++ b/Andaluh/SentenceMethods/Token.cs
@@ -1,4 +1,5 @@
-using System.Linq;
+using System;
+using System.Linq;
using System.Text.RegularExpressions;
using static Andaluh.SentenceMethods.TokenEvaluator;
@@ -22,12 +23,19 @@ private Token(string value, int position, TranscriptionTypes transcription)
public static Token GetEscapedToken(Match match) =>
new Token(match.Value, match.Index, TranscriptionTypes.Escaped);
- public static Token GetExceptionToken(string exception, int position) =>
- new Token(exception, position, TranscriptionTypes.Exception);
+ public static Token GetExceptionToken(Match match) =>
+ new Token(match.Value, match.Index, TranscriptionTypes.Exception);
public static Token GetStandardToken(string str, int position) =>
new Token(str, position, TranscriptionTypes.Standard);
public override string ToString() => $"{StartIndex}, {EndIndex}, {Value}";
+
+ internal void Copy(Token newToken)
+ {
+ Value = newToken.Value;
+ StartIndex = newToken.StartIndex;
+ EndIndex = newToken.EndIndex;
+ }
}
}
diff --git a/Andaluh/SentenceMethods/TokenEvaluator.cs b/Andaluh/SentenceMethods/TokenEvaluator.cs
index 70e0966..cdf33a1 100644
--- a/Andaluh/SentenceMethods/TokenEvaluator.cs
+++ b/Andaluh/SentenceMethods/TokenEvaluator.cs
@@ -1,4 +1,5 @@
-using System;
+using Andaluh.Extensions;
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
@@ -7,7 +8,7 @@ namespace Andaluh.SentenceMethods
{
internal class TokenEvaluator
{
- private Regex EscapeStringsPattern = new Regex(@"(?i)(http[^ ]+)|(@\w+)|(#\w+)");
+ private Regex EscapeStringsPattern = new Regex(@"(?i)(http[^ ]+)|(@\w+)|(#\w+)|(\w+@\w+)|(\w+\.es)|(\w+\.com)");
public enum TranscriptionTypes { Exception, Escaped, Standard }
List Tokens;
@@ -30,7 +31,7 @@ public TokenEvaluator(string text)
private void ReplaceExceptions()
{
foreach (var token in Tokens.Where(x => x.Transcription == TranscriptionTypes.Exception))
- token.Value = SentenceExceptions.Exceptions[token.Value];
+ token.Value = token.Value.KeepCase(SentenceExceptions.AllExceptions[token.Value.ToLower()]);
}
internal IEnumerable