Skip to content

Commit

Permalink
[pt] Add _DIALECT suffix to speller
Browse files Browse the repository at this point in the history
  • Loading branch information
p-goulart committed Nov 16, 2023
1 parent 3e5f620 commit d3c8165
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public class MorfologikPortugueseSpellerRule extends MorfologikSpellerRule {
private final Map<String, String> dialectAlternationMapping;
private static final PortugueseTagger tagger = new PortugueseTagger();
private static final PortugueseSynthesizer synth = PortugueseSynthesizer.INSTANCE;
private boolean dialectIssue = false;


@Override
Expand All @@ -71,8 +72,12 @@ public static Set<String> getWordSetFromResources(String filepath) {

@Override
public String getId() {
return "MORFOLOGIK_SPELLER_"
String id = "MORFOLOGIK_SPELLER_"
+ language.getShortCodeWithCountryAndVariant().replace("-", "_").toUpperCase();
if (dialectIssue) {
id = id + "_DIALECT";
}
return id;
}

// TODO: document this, as it's about to get messy
Expand Down Expand Up @@ -229,6 +234,7 @@ public List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentenc
if (!ruleMatches.isEmpty()) {
String wordWithBrazilianStylePastTense = checkEuropeanStyle1PLPastTense(word);
if (wordWithBrazilianStylePastTense != null) {
this.dialectIssue = true;
String message = "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.";
replaceFormsOfFirstMatch(message, sentence, ruleMatches, wordWithBrazilianStylePastTense);
}
Expand All @@ -239,6 +245,7 @@ public List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentenc
}
String dialectAlternative = this.dialectAlternative(word);
if (dialectAlternative != null) {
this.dialectIssue = true;
String otherVariant = "europeu";
if (Objects.equals(spellerLanguage.getShortCodeWithCountryAndVariant(), "pt-PT")) {
otherVariant = "brasileiro";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ private void assertSingleError(String sentence, JLanguageTool lt,
}

private void assertSingleExactError(String sentence, JLanguageTool lt, MorfologikPortugueseSpellerRule rule,
String suggestion, String message) throws IOException {
String suggestion, String message, String id) throws IOException {
RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence));
assertEquals(1, matches.length);
if (matches.length > 0) {
Expand All @@ -112,16 +112,18 @@ private void assertSingleExactError(String sentence, JLanguageTool lt, Morfologi
// System.out.println(returnedSuggestions);
assert Objects.equals(returnedSuggestions.get(0), suggestion);
assert Objects.equals(match.getMessage(), message);
System.out.println("id: " + match.getSpecificRuleId());
assert Objects.equals(match.getSpecificRuleId(), id);
}
}

private void assertTwoWayDialectError(String sentenceBR, String sentencePT) throws IOException {
String brMessage = "Possível erro de ortografia: esta é a grafia utilizada no português europeu.";
String ptMessage = "Possível erro de ortografia: esta é a grafia utilizada no português brasileiro.";
assertNoErrors(sentenceBR, ltBR, ruleBR);
assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage);
assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage, "MORFOLOGIK_SPELLER_PT_BR_DIALECT");
assertNoErrors(sentencePT, ltPT, rulePT);
assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage);
assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage, "MORFOLOGIK_SPELLER_PT_PT_DIALECT");
}

private void assertTwoWayOrthographicAgreementError(String sentence90, String sentence45) throws IOException {
Expand Down Expand Up @@ -365,13 +367,15 @@ public void testBrazilPortugueseGema23DFalseNegatives() throws Exception {
@Test
public void testPortugueseDiaeresis() throws Exception {
assertSingleExactError("pingüim", ltBR, ruleBR, "pinguim",
"No mais recente acordo ortográfico, não se usa mais o trema no português.");
"No mais recente acordo ortográfico, não se usa mais o trema no português.",
"MORFOLOGIK_SPELLER_PT_BR");
}

@Test
public void testEuropeanPortugueseStyle1PLPastTenseCorrectedInBrazilian() throws Exception {
assertSingleExactError("amámos", ltBR, ruleBR, "amamos",
"No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.");
"No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.",
"MORFOLOGIK_SPELLER_PT_BR_DIALECT");
}

@Test
Expand Down

0 comments on commit d3c8165

Please sign in to comment.