From d3c8165a1f8d22e772afe75b6ee131ff17f61596 Mon Sep 17 00:00:00 2001 From: p-goulart Date: Thu, 16 Nov 2023 14:38:29 +0100 Subject: [PATCH] [pt] Add _DIALECT suffix to speller --- .../rules/pt/MorfologikPortugueseSpellerRule.java | 9 ++++++++- .../pt/MorfologikPortugueseSpellerRuleTest.java | 14 +++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java index 8d5976a49b8b..b95cf6c20edc 100644 --- a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java @@ -50,6 +50,7 @@ public class MorfologikPortugueseSpellerRule extends MorfologikSpellerRule { private final Map dialectAlternationMapping; private static final PortugueseTagger tagger = new PortugueseTagger(); private static final PortugueseSynthesizer synth = PortugueseSynthesizer.INSTANCE; + private boolean dialectIssue = false; @Override @@ -71,8 +72,12 @@ public static Set getWordSetFromResources(String filepath) { @Override public String getId() { - return "MORFOLOGIK_SPELLER_" + String id = "MORFOLOGIK_SPELLER_" + language.getShortCodeWithCountryAndVariant().replace("-", "_").toUpperCase(); + if (dialectIssue) { + id = id + "_DIALECT"; + } + return id; } // TODO: document this, as it's about to get messy @@ -229,6 +234,7 @@ public List getRuleMatches(String word, int startPos, AnalyzedSentenc if (!ruleMatches.isEmpty()) { String wordWithBrazilianStylePastTense = checkEuropeanStyle1PLPastTense(word); if (wordWithBrazilianStylePastTense != null) { + this.dialectIssue = true; String message = "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento."; replaceFormsOfFirstMatch(message, sentence, ruleMatches, wordWithBrazilianStylePastTense); } @@ -239,6 +245,7 @@ public List getRuleMatches(String word, int startPos, AnalyzedSentenc } String dialectAlternative = this.dialectAlternative(word); if (dialectAlternative != null) { + this.dialectIssue = true; String otherVariant = "europeu"; if (Objects.equals(spellerLanguage.getShortCodeWithCountryAndVariant(), "pt-PT")) { otherVariant = "brasileiro"; diff --git a/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java index 840c3d2b4f2c..9cf43fde3a8c 100644 --- a/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java +++ b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java @@ -103,7 +103,7 @@ private void assertSingleError(String sentence, JLanguageTool lt, } private void assertSingleExactError(String sentence, JLanguageTool lt, MorfologikPortugueseSpellerRule rule, - String suggestion, String message) throws IOException { + String suggestion, String message, String id) throws IOException { RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence)); assertEquals(1, matches.length); if (matches.length > 0) { @@ -112,6 +112,8 @@ private void assertSingleExactError(String sentence, JLanguageTool lt, Morfologi // System.out.println(returnedSuggestions); assert Objects.equals(returnedSuggestions.get(0), suggestion); assert Objects.equals(match.getMessage(), message); + System.out.println("id: " + match.getSpecificRuleId()); + assert Objects.equals(match.getSpecificRuleId(), id); } } @@ -119,9 +121,9 @@ private void assertTwoWayDialectError(String sentenceBR, String sentencePT) thro String brMessage = "Possível erro de ortografia: esta é a grafia utilizada no português europeu."; String ptMessage = "Possível erro de ortografia: esta é a grafia utilizada no português brasileiro."; assertNoErrors(sentenceBR, ltBR, ruleBR); - assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage); + assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage, "MORFOLOGIK_SPELLER_PT_BR_DIALECT"); assertNoErrors(sentencePT, ltPT, rulePT); - assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage); + assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage, "MORFOLOGIK_SPELLER_PT_PT_DIALECT"); } private void assertTwoWayOrthographicAgreementError(String sentence90, String sentence45) throws IOException { @@ -365,13 +367,15 @@ public void testBrazilPortugueseGema23DFalseNegatives() throws Exception { @Test public void testPortugueseDiaeresis() throws Exception { assertSingleExactError("pingüim", ltBR, ruleBR, "pinguim", - "No mais recente acordo ortográfico, não se usa mais o trema no português."); + "No mais recente acordo ortográfico, não se usa mais o trema no português.", + "MORFOLOGIK_SPELLER_PT_BR"); } @Test public void testEuropeanPortugueseStyle1PLPastTenseCorrectedInBrazilian() throws Exception { assertSingleExactError("amámos", ltBR, ruleBR, "amamos", - "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento."); + "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.", + "MORFOLOGIK_SPELLER_PT_BR_DIALECT"); } @Test