[pt] Add _DIALECT suffix to speller

languagetool-org · Nov 16, 2023 · d3c8165 · d3c8165
1 parent 3e5f620
commit d3c8165
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 6 deletions.
diff --git a/...e-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java b/...e-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java
@@ -50,6 +50,7 @@ public class MorfologikPortugueseSpellerRule extends MorfologikSpellerRule {
   private final Map<String, String> dialectAlternationMapping;
   private static final PortugueseTagger tagger = new PortugueseTagger();
   private static final PortugueseSynthesizer synth = PortugueseSynthesizer.INSTANCE;
+  private boolean dialectIssue = false;
 
 
   @Override
@@ -71,8 +72,12 @@ public static Set<String> getWordSetFromResources(String filepath) {
 
   @Override
   public String getId() {
-    return "MORFOLOGIK_SPELLER_"
+    String id = "MORFOLOGIK_SPELLER_"
       + language.getShortCodeWithCountryAndVariant().replace("-", "_").toUpperCase();
+    if (dialectIssue) {
+      id = id + "_DIALECT";
+    }
+    return id;
   }
 
   // TODO: document this, as it's about to get messy
@@ -229,6 +234,7 @@ public List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentenc
     if (!ruleMatches.isEmpty()) {
       String wordWithBrazilianStylePastTense = checkEuropeanStyle1PLPastTense(word);
       if (wordWithBrazilianStylePastTense != null) {
+        this.dialectIssue = true;
         String message = "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.";
         replaceFormsOfFirstMatch(message, sentence, ruleMatches, wordWithBrazilianStylePastTense);
       }
@@ -239,6 +245,7 @@ public List<RuleMatch> getRuleMatches(String word, int startPos, AnalyzedSentenc
       }
       String dialectAlternative = this.dialectAlternative(word);
       if (dialectAlternative != null) {
+        this.dialectIssue = true;
         String otherVariant = "europeu";
         if (Objects.equals(spellerLanguage.getShortCodeWithCountryAndVariant(), "pt-PT")) {
           otherVariant = "brasileiro";

diff --git a/...dules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java b/...dules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java
@@ -103,7 +103,7 @@ private void assertSingleError(String sentence, JLanguageTool lt,
   }
 
   private void assertSingleExactError(String sentence, JLanguageTool lt, MorfologikPortugueseSpellerRule rule,
-                                      String suggestion, String message) throws IOException {
+                                      String suggestion, String message, String id) throws IOException {
     RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence));
     assertEquals(1, matches.length);
     if (matches.length > 0) {
@@ -112,16 +112,18 @@ private void assertSingleExactError(String sentence, JLanguageTool lt, Morfologi
 //      System.out.println(returnedSuggestions);
       assert Objects.equals(returnedSuggestions.get(0), suggestion);
       assert Objects.equals(match.getMessage(), message);
+      System.out.println("id: " + match.getSpecificRuleId());
+      assert Objects.equals(match.getSpecificRuleId(), id);
     }
   }
 
   private void assertTwoWayDialectError(String sentenceBR, String sentencePT) throws IOException {
     String brMessage = "Possível erro de ortografia: esta é a grafia utilizada no português europeu.";
     String ptMessage = "Possível erro de ortografia: esta é a grafia utilizada no português brasileiro.";
     assertNoErrors(sentenceBR, ltBR, ruleBR);
-    assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage);
+    assertSingleExactError(sentencePT, ltBR, ruleBR, sentenceBR, brMessage, "MORFOLOGIK_SPELLER_PT_BR_DIALECT");
     assertNoErrors(sentencePT, ltPT, rulePT);
-    assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage);
+    assertSingleExactError(sentenceBR, ltPT, rulePT, sentencePT, ptMessage, "MORFOLOGIK_SPELLER_PT_PT_DIALECT");
   }
 
   private void assertTwoWayOrthographicAgreementError(String sentence90, String sentence45) throws IOException {
@@ -365,13 +367,15 @@ public void testBrazilPortugueseGema23DFalseNegatives() throws Exception {
   @Test
   public void testPortugueseDiaeresis() throws Exception {
     assertSingleExactError("pingüim", ltBR, ruleBR, "pinguim",
-      "No mais recente acordo ortográfico, não se usa mais o trema no português.");
+      "No mais recente acordo ortográfico, não se usa mais o trema no português.",
+      "MORFOLOGIK_SPELLER_PT_BR");
   }
 
   @Test
   public void testEuropeanPortugueseStyle1PLPastTenseCorrectedInBrazilian() throws Exception {
     assertSingleExactError("amámos", ltBR, ruleBR, "amamos",
-      "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.");
+      "No Brasil, o pretérito perfeito da primeira pessoa do plural escreve-se sem acento.",
+      "MORFOLOGIK_SPELLER_PT_BR_DIALECT");
   }
 
   @Test