diff --git a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java index bb3e48e05b71..f0722e681045 100644 --- a/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java +++ b/languagetool-language-modules/pt/src/main/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRule.java @@ -1,23 +1,64 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Marcin Miłkowski (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ package org.languagetool.rules.pt; import org.languagetool.JLanguageTool; import org.languagetool.Language; import org.languagetool.UserConfig; +import org.languagetool.rules.SuggestedReplacement; +import org.languagetool.rules.spelling.SpellingCheckRule; import org.languagetool.rules.spelling.morfologik.MorfologikSpellerRule; import java.io.IOException; -import java.util.List; -import java.util.Objects; -import java.util.ResourceBundle; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.stream.Collectors; + +import static org.languagetool.JLanguageTool.getDataBroker; public class MorfologikPortugueseSpellerRule extends MorfologikSpellerRule { - private String dictFilename; - private Language language; + private final String dictFilepath; + // Path, in pt/resources, where the list of words to be removed from the suggestion list is to be found. + private static final String doNotSuggestWordsFilepath = "/pt/do_not_suggest.txt"; + // Set of words that we do not want to add to the suggestions, despite being correctly spelt. Mostly profanity. + private static final Set doNotSuggestWords = getDoNotSuggestWords(); + // Path, in pt/resources, where a list of abbreviations is found. These are simple abbreviations of the shape \w+\. + private static final String abbreviationFilepath = "/pt/abbreviations.txt"; + private static final Set abbreviations = getAbbreviations(); @Override public String getFileName() { - return dictFilename; + return dictFilepath; + } + + public static Set getDoNotSuggestWords() { + return getWordSetFromResources(doNotSuggestWordsFilepath); + } + + public static Set getAbbreviations() { + return getWordSetFromResources(abbreviationFilepath); + } + + public static Set getWordSetFromResources(String filepath) { + return new HashSet(getDataBroker().getFromResourceDirAsLines(filepath)); } @Override @@ -33,15 +74,55 @@ public MorfologikPortugueseSpellerRule(ResourceBundle messages, Language languag // the tagger tags pt-PT and pt-BR words all the same, as it should, but they're still incorrect if they belong // to the wrong dialect, commenting this out // this.setIgnoreTaggedWords(); - if (language.getShortCodeWithCountryAndVariant().equals("pt")) { - language = language.getDefaultLanguageVariant(); + Language spellerLanguage = language; + if (spellerLanguage.getShortCodeWithCountryAndVariant().equals("pt")) { + spellerLanguage = spellerLanguage.getDefaultLanguageVariant(); } - this.language = language; - if (Objects.equals(language.getShortCodeWithCountryAndVariant(), "pt-BR")) { - this.dictFilename = "/pt/spelling/pt-BR" + JLanguageTool.DICTIONARY_FILENAME_EXTENSION; - } else { - // TODO: work out how to detect the orthographic agreement; user option? - this.dictFilename = "/pt/spelling/pt-PT-90" + JLanguageTool.DICTIONARY_FILENAME_EXTENSION; + this.dictFilepath = "/pt/spelling/" + getDictFilename(spellerLanguage) + JLanguageTool.DICTIONARY_FILENAME_EXTENSION; + } + + @Override + protected List filterNoSuggestWords(List suggestedReplacements) { + return suggestedReplacements.stream().filter( + suggestedReplacement -> !doNotSuggestWords.contains( + suggestedReplacement.getReplacement().toLowerCase() + )).collect(Collectors.toList()); + } + + @Override + protected List getAdditionalTopSuggestions(List suggestions, String word) + throws IOException { + List suggestionsList = suggestions.stream().map(SuggestedReplacement::getReplacement) + .collect(Collectors.toList()); + return SuggestedReplacement.convert(getAdditionalTopSuggestionsString(suggestionsList, word)); + } + + private List getAdditionalTopSuggestionsString(List suggestions, String word) throws IOException { + if (isAbbreviation(word)) { + return Collections.singletonList(word + "."); + } + return Collections.emptyList(); + } + + // Check if the word we're checking is in our list of abbreviations. + protected boolean isAbbreviation(String word) { + // regular case (since we do have some abbreviations with weird casing) as well as downcased + return abbreviations.contains(word + ".") || abbreviations.contains(word.toLowerCase() + "."); + } + + private static String getDictFilename(Language spellerLanguage) { + String dictFilename = "pt-BR"; // default dict is pt-BR with 1990 spelling + String fullLanguageCode = spellerLanguage.getShortCodeWithCountryAndVariant(); + switch (fullLanguageCode) { + case "pt-BR": dictFilename = "pt-BR"; break; + case "pt-PT": dictFilename = "pt-PT-90"; break; + case "pt-AO": case "pt-MZ": dictFilename = "pt-PT-45"; break; } + return dictFilename; + } + + @Override + public List getAdditionalSpellingFileNames() { + return Arrays.asList(SpellingCheckRule.GLOBAL_SPELLING_FILE, "/pt/multiwords.txt"); } } diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/abbreviations.txt b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/abbreviations.txt new file mode 100644 index 000000000000..e6c35a228b48 --- /dev/null +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/abbreviations.txt @@ -0,0 +1,1947 @@ +A. +a.a. +a.C. +A.C. +a.m. +aa. +aberirv. +abr. +Abr. +abrev. +abs. +aç. +adit. +adj. +adm. +Admin. +adv. +adv.º +Adv.º +advers. +ag. +al. +alt. +am. +an. +ant. +anton. +antr. +antropon. +ap. +apass. +aprox. +Aprox. +arg. +arom. +art. +Art. +art.º +arts. +ass. +Ass. +atm. +atôm. +Att. +aux. +Aux. +av. +Av. +bact. +bacter. +bacteriol. +balíst. +bált. +bárb. +basq. +bat. +bbl. +beir. +Bel. +Bel.ª +beng. +berb. +bíb. +bibl. +bíbl. +Bíbl. +bibl.f. +bibl.m. +bibl.m.pl. +bibliof. +bibliog. +bibliogr. +bibliol. +bibliôn. +bibliot. +bibliotec. +bibliotecon. +biblog. +biblol. +biblon. +bien. +bim. +bimen. +biodim. +biofís. +biogen. +biogên. +biogeo. +biogeog. +biogeogr. +biogr. +biol. +biom. +bioq. +bioquím. +biot. +biotip. +biotipol. +birm. +birrel. +bisp. +bispd. +bitr. +bitrans. +biz. +bm. +boêm. +bol. +boliv. +bomb. +bord. +borg. +borr. +bot. +br. +bras. +Bras. +bret. +brig. +Brig. +brit. +BrOffice.org +brom. +bromat. +btl. +bud. +búlg. +burl. +buroc. +C.Ág. +cab. +caç. +cad. +caf. +calç. +cálc. +calcog. +calcogr. +cald. +calig. +caligr. +calv. +calvin. +câm. +can. +canad. +canaliz. +cant. +cap. +cap.m.g. +Cap.M.G. +capac. +capit. +capix. +caps. +capt. +car. +card. +carn. +carp. +carr. +cart. +cartogr. +cast. +casuís. +casuíst. +cat. +Cat. +catacr. +catal. +catarin. +categ. +catól. +Catol. +Catolic. +catóp. +catópt. +cau. +caus. +causalid. +cav. +cb. +cc. +ce. +cel. +celt. +célt. +cên. +cenog. +cent. +cênt. +centr. +cerâm. +cerv. +cest. +cet. +cf. +cfr. +ch. +Ch.B. +Ch.M. +chanc. +chancel. +chap. +chil. +chin. +chul. +cibern. +cicl. +cid. +cien. +cienc. +ciênc. +cient. +cig. +cin. +cineg. +cinem. +cinematogr. +cing. +cinol. +cinz. +cir. +circ. +círc. +circunscr. +cirurg. +cit. +citol. +citt. +civ. +cj. +Cj. +clas. +clás. +cláss. +clich. +clim. +climatol. +clín. +cob. +cód. +códs. +cogn. +col. +Col. +colet. +colomb. +cols. +comb. +combin. +combinat. +combust. +comdor. +comend. +comerc. +comp. +compar. +compl. +compr. +compt. +comte. +comunic. +con. +côn. +Côn. +conc. +concess. +concl. +concr. +concret. +cond. +condic. +conf. +confed. +Confed. +confeit. +confl. +confls. +cong. +conhec. +conj. +Conj. +conjug. +conq. +conquil. +conquiliol. +cons. +consec. +consel. +conselh. +conseq. +const. +Const. +constel. +constr. +consuet. +cont. +contab. +contemp. +contr. +coord. +Coord. +cop. +copul. +copulat. +coq. +coreog. +coreogr. +corog. +corogr. +corp. +corr. +corresp. +corrup. +corrupt. +corrut. +corv. +Corv. +cos. +cosec. +cosm. +cosmog. +cosmogr. +cosmol. +cost. +cot. +côv. +cp. +cr. +créd. +cresc. +crim. +criminol. +cript. +criptog. +crist. +cristalogr. +cristand. +cristian. +crôn. +cron.f.pl. +cron.m.pl. +cronol. +cronom. +cronôn. +crust. +ctv. +cu.ft. +cu.yd. +cul. +culin. +cult. +curt. +cut. +cutel. +cv. +cvs. +cx. +cyat. +D. +d.C. +dactilog. +dactilogr. +dactilosc. +dad. +dasim. +dat. +DD. +dec. +Dec. +decl. +declin. +decor. +decr. +Decr. +decresc. +ded. +def. +defect. +defin. +definit. +dem. +democ. +democr. +demog. +demogr. +demonstr. +dens. +dep. +Dep. +depr. +deprec. +deps. +dept. +depto. +deriv. +derm. +des. +desc. +descr. +desemb. +Desemb. +desemboc. +desen. +desin. +desp. +desus. +det. +determ. +dev. +diác. +dial. +dialet. +dialét. +dic. +Dic. +did. +didát. +dif. +Dif. +dim. +dimin. +din. +dinam. +dinâm. +dioc. +dipl. +diplom. +dir. +disc. +disfem. +diss. +distr. +doc. +docs. +docum. +dog. +dogm. +dogmát. +dól. +dom. +domin. +domín. +dór. +dpto. +Dpto. +dra. +Dra. +dram. +drav. +Drs. +dualid. +dur. +dz. +E. +e.g. +E.U.A. +ecd. +ecl. +écl. +ecles. +ecol. +econ. +Econ. +ed. +Ed. +edd. +edif. +Edif. +educ. +EE. +efem. +egíp. +el.s.f.pl. +el.s.m.pl. +eletr. +elétr. +eletrodin. +eletrol. +eletrom. +eletrôn. +eletrot. +elipt. +emb. +Emb. +embal. +embr. +embriol. +emigr. +emol. +emp. +empír. +empr. +Empreend. +emprés. +enc. +encicl. +encícl. +End. +endoc. +energ. +energét. +enf. +eng. +Eng. +eng.ª +Eng.ª +eng.º +Eng.º +ens. +ent. +entom. +entomol. +enx. +epig. +epigr. +epíst. +eq. +Eq. +equat. +equip. +Equip. +EQUIP. +equit. +equiv. +erud. +esc. +Esc. +escand. +escoc. +escolást. +escr. +escul. +escult. +esgr. +esl. +eslav. +eslov. +esot. +esp. +Esp. +espec. +especialm. +especif. +específ. +espect. +espectrogr. +espel. +espeleol. +espet. +espir. +espirit. +esport. +esq. +est. +Est. +estad. +estat. +estatíst. +estenog. +estenogr. +estereogr. +estét. +estil. +estim. +eston. +estr. +estrang. +estrangeir. +estrat. +estratég. +estrut. +estud. +et. +et.m.pl. +etim. +etimol. +etióp. +etn. +étn. +etnog. +etnogr. +etnol. +etnolog. +etol. +euf. +eufem. +eufêm. +eufon. +eufôn. +eufor. +eur. +Ex.ª +Ex.º +Exa. +exag. +excl. +exclam. +exclamat. +excurs. +exe. +exérc. +EXMª. +EXMº. +exp. +exper. +experim. +expl. +explet. +explor. +explos. +export. +expr. +express. +expression. +ext. +extens. +extrat. +f.adv. +f.aport. +f.nom. +f.parl. +f.port. +f.red. +f.verb. +fáb. +fac. +falc. +fam. +farm. +farmac. +farmacol. +farmacop. +fasc. +fascs. +fauv. +fed. +feit. +fem. +fen. +fenom. +fenôm. +fer. +ferrad. +ferrov. +feud. +fev. +Fev. +ff. +fg. +fig. +figd. +figur. +figurat. +figurativ. +fil. +filat. +filol. +fin. +finl. +fís. +fisc. +fisioc. +fisiocr. +fisiocrat. +fisiol. +fisl. +fispat. +fitog. +fitogr. +fitopat. +fitossoc. +fl. +Fl. +flam. +floric. +fls. +flum. +flumin. +fluv. +fm. +fo. +fog. +fol. +folc. +folcl. +folh. +fols. +fonét. +fonol. +form. +fórm. +formul. +forrag. +fort. +fos. +fot. +fotoan. +fotoanál. +fotogr. +fotom. +fov. +fr. +Fr. +frac. +fracc. +frag. +franc. +frânc. +freg. +frenol. +freq. +frig. +frut. +frutíf. +fs. +fss. +ft. +ft.p. +fulv. +fund. +fut. +futb. +futeb. +futur. +G. +gaél. +gal. +Gal. +galfímia. +galic. +galv. +gar. +gasc. +gat. +gaul. +gav. +gen. +gên. +Gen. +geneal. +genét. +genov. +geod. +geof. +geofís. +geog. +geogn. +geogr. +geol. +geom. +geomor. +geomorf. +geon. +geôn. +ger. +germ. +gin. +ginást. +ginec. +ginecol. +gír. +gliptog. +gliptogr. +gliptol. +gliptot. +gloss. +glót. +glotol. +gn. +gnom. +gót. +gov. +Gov. +gr. +graf. +gráf. +grafol. +grafosc. +grav. +grd. +groen. +groenl. +gt. +guat. +guatem. +guin. +gutt. +guz. +H. +h.cont. +h.mod. +h.sag. +hab. +hag. +hagiog. +hagiogr. +hagiol. +haplol. +Hares. +hast. +hebd. +hebr. +helm. +helmin. +helmintol. +heort. +heort.f. +heort.f.pl. +heort.m. +heort.m.pl. +heortôn. +her. +herál. +heráld. +herb. +herp. +herpet. +herpét. +herpetogr. +herpetol. +het. +hib. +híb. +hibr. +híbr. +hibrid. +hidr. +hidrául. +hidrod. +hidrog. +hidrogr. +hidrom. +hidrost. +hidrot. +hier. +hier.f. +hier.f.pl. +hier.m. +hier.m.pl. +hierôn. +hierosolim. +hig. +higr. +hind. +hip. +híp. +hipál. +hipérb. +hipiat. +hipnot. +hipnoter. +hipoc. +hipocor. +hipol. +hipot. +hipót. +hipotét. +hisp. +hist. +histol. +historiog. +historiogr. +hom. +homeop. +homof. +homog. +homogr. +homon. +homôn. +hon. +hond. +hort. +hortic. +hot. +hotent. +húng. +I. +i.e. +iat. +ib. +ibér. +ibid. +iconog. +iconogr. +iconol. +ict. +idiot. +idol. +idolol. +igr. +Il. +Ilmo. +Ilmos. +ilum. +ilusion. +ilustr. +imigr. +imit. +imp. +imper. +imperat. +imperf. +impess. +import. +impr. +impres. +impression. +improp. +impror. +inc. +incoat. +incóg. +incs. +ind. +índ. +indef. +indet. +indian. +indiv. +indoch. +indon. +indost. +indum. +indust. +indúst. +inf. +infan. +infant. +infer. +infin. +infinit. +infinitiv. +infl. +inform. +ing. +ingl. +ins. +Insc. +inscr. +insep. +inst. +instit. +int. +integr. +intens. +interamn. +interj. +interjet. +intern. +internac. +interr. +interrog. +interrogat. +intj. +intr. +intrans. +inus. +inv. +invariav. +invenç. +invest. +investig. +iog. +iran. +iraq. +irl. +iron. +irôn. +irr. +irreg. +isl. +islam. +island. +isr. +it. +ít. +ital. +itál. +italian. +iug. +J. +jam. +jan. +jap. +jard. +jardin. +jav. +jes. +joalh. +joc. +jog. +jorn. +Jr. +jud. +jul. +jun. +jur. +jurisp. +jurispr. +just. +K. +L. +lab. +laborat. +labort. +lact. +lad. +lâm. +lanc. +lanç. +lap. +lapid. +larg. +lat. +latit. +latoar. +laud. +lb. +leg. +lég. +legisl. +légs. +leit. +let. +lex. +lib. +LibreOffice.org +lig. +lim. +limit. +lin. +ling. +líng. +linguíst. +líq. +lit. +Lit. +liter. +literat. +litog. +litogr. +litol. +Litt.D. +lituan. +litur. +liturg. +liv. +livr. +Livr. +ll. +loc. +loc.s. +loc.v. +log. +lóg. +logíst. +lomb. +long. +loq. +lr. +Ltd. +ltda. +Ltda. +ludol. +lug. +lund. +lunf. +lusit. +lut. +luv. +M. +m.q.perf. +maç. +maçon. +mad. +madeir. +mag. +Mag.ª +magn. +magnet. +magnit. +mai. +Mai. +maiúsc. +maj. +Maj. +malab. +malac. +malacol. +malg. +malh. +man. +manuf. +maomet. +map. +maq. +máq. +maquinof. +maranh. +marc. +march. +marchet. +marg. +marin. +marinh. +marít. +marn. +marr. +marroq. +martin. +marx. +masc. +mat. +Mat. +matad. +mater. +matogros. +máx. +mct. +mec. +mecan. +mecân. +mecanogr. +med. +méd. +Med. +mediev. +medv. +mem. +memo. +memor. +mens. +mer. +merc. +mercad. +merid. +met. +metaf. +metáf. +metafis. +metafór. +metalog. +metalur. +metát. +meteor. +meton. +metr. +métr. +metrif. +metrol. +mex. +mexic. +micol. +microbiol. +microfot. +microg. +microl. +microm. +microsc. +microscóp. +mín. +miner. +ming. +minh. +minúsc. +mist. +míst. +mit. +mit.f. +mit.f.pl. +mit.gr. +mit.m. +mit.m.f. +mit.m.pl. +mitol. +mitôn. +mk. +MM. +mMin. +moag. +mob. +moçamb. +mod. +moed. +mon. +monog. +monogr. +mont. +montanh. +moralid. +morf. +mov. +Mr. +Mrs. +mult. +mun. +mús. +museol. +N. +n.a. +n.º +N.Obs. +n.p.loc. +n.p.pers. +n.pr. +N.SS.P. +n.t. +nac. +nap. +nat. +natur. +náu. +náua. +náut. +nav. +nav.fl. +naz. +neerl. +neg. +neoguin. +neolog. +neozel. +nep. +neur. +neutr. +nicarag. +nig. +nob. +nobil. +nom. +nom.f. +nom.f.pl. +nom.m. +nom.m.pl. +nor. +nórd. +norm. +normat. +norueg. +notic. +nov. +Nov. +nucl. +núm. +numis. +numism. +nutr. +o.k. +O.k. +O.S. +ob. +Ob. +obed. +obr. +Obr. +obs. +Obs. +obsol. +obst. +obstet. +ocean. +oceanogr. +ocid. +ocul. +ocult. +odont. +odontol. +of. +Of. +ofid. +oft. +oftalm. +oftalmol. +olig. +onç. +oneol. +onom. +onomást. +onomat. +onomatop. +op. +Op. +opos. +opp. +ópt. +optat. +or. +orat. +ord. +ordin. +org. +organiz. +orig. +origin. +orign. +orn. +ornit. +ornitol. +orog. +orogr. +orôn. +ort. +ortogr. +ortográf. +ortop. +ostr. +otorr. +otorrin. +our. +ouriv. +out. +oz. +P. +p.ae. +p.al. +p.ext. +pa. +pa.g. +pag. +pág. +Pág. +pagg. +págg. +págs. +paleob. +paleog. +paleogr. +paleont. +paleontol. +paleoz. +pals. +paq. +paraens. +parag. +parág. +Parág. +paraib. +paran. +parassint. +parl. +parn. +parnas. +parôn. +part. +partic. +pass. +passm. +passr. +passt. +past. +patol. +patr. +pátr. +patr.f. +patr.f.pl. +patr.m. +patr.m.pl. +patron. +patrôn. +paulist. +pç. +pça. +Pça. +pdl. +Pe. +pec. +ped. +pedag. +pediat. +pedol. +pedr. +pej. +pel. +pen. +pent. +peq. +pér. +perf. +perfum. +períf. +perífr. +pern. +pernamb. +pers. +pérs. +perspect. +perspectiv. +peruv. +pes. +pesc. +pesq. +pess. +pet. +petr. +petrog. +petrogr. +petrol. +petroq. +pf. +pg. +pgto. +Ph.B. +Ph.D. +piauien. +píl. +pint. +pinx. +pirot. +pirotec. +pirotéc. +pisc. +piscic. +pizz. +pl. +planej. +plat. +pleb. +pm. +poét. +pol. +Pol. +políc. +polin. +polít. +popul. +port. +posit. +poss. +possess. +pot. +potam. +pov. +Pov. +pp. +pr. +Pr. +prác. +prát. +prec. +preced. +precis. +pred. +predic. +pref. +Pref. +prep. +prepos. +pres. +Pres. +presc. +presid. +Presid. +prest. +pret. +prev. +prim. +primit. +princ. +princip. +priv. +probl. +problem. +proc. +prod. +prof. +Prof. +prof.ª +Prof.ª +prof.as +Prof.as +prof.s +Prof.s +profis. +profiss. +profission. +prom. +pron. +pronon. +pronún. +prop. +propag. +propos. +propr. +prosc. +prosôn. +prost. +prostét. +prot. +prót. +protest. +protét. +protoc. +protoz. +prov. +provav. +provb. +provc. +provç. +proven. +provenç. +provinc. +prox. +pseud. +psic. +psican. +psicofisl. +psicogn. +psicol. +psicopat. +psiq. +psiquiat. +pto. +pts. +pub. +púb. +Púb. +públ. +public. +pug. +pulv. +Q. +q.e.d. +ql. +qq.v. +qua. +qualif. +quant. +Quant. +quantit. +quart. +quest. +qui. +quí. +quích. +quím. +quimb. +quinz. +quinzen. +quirom. +r. +rac. +racion. +rád. +radioat. +radiod. +radiodif. +radiog. +radiogr. +radiol. +radiot. +radiotec. +radiotéc. +radiotécn. +radioter. +rall. +Rd. +realid. +rec. +recip. +recíp. +recípr. +red. +ref. +refl. +reform. +reg. +regim. +region. +regress. +rel. +relaç. +relat. +relativ. +relig. +reloj. +rem. +rep. +repart. +repert. +report. +repúb. +res. +Resp. +RESP. +rest. +restr. +restrit. +result. +ret. +retór. +retrosp. +revers. +rg. +rib. +rit. +rod. +romn. +rot. +rub. +rur. +russ. +rúst. +S. +S.A. +s.f.pl. +s.loc. +s.m. +s.m.pl. +S.O.S. +s.p.loc. +s.p.pers. +S.Paulo +s.vv. +sab. +Sáb. +sac. +sagr. +sals. +san. +sân. +sâns. +sânscr. +sap. +sapat. +Sarg. +sát. +sc. +sch. +scr. +scul. +sec. +Sec. +secr. +sect. +seg. +segg. +segs. +sel. +semânt. +semic. +semin. +semiol. +semit. +semít. +sen. +Sen. +sent. +sep. +septent. +seq. +seqq. +sér. +serg. +seric. +sericic. +serr. +serralh. +serv. +sérv. +set. +setent. +sex. +sf. +sg. +sh.tn. +sib. +sid. +sider. +siderogr. +siderotéc. +sigil. +sign. +signif. +síl. +silog. +silv. +silvic. +simb. +símb. +simbol. +simból. +simpl. +sin. +sinéd. +sing. +Sing. +sinon. +sinôn. +sint. +sínt. +sir. +sír. +sist. +sit. +Snr. +soc. +Soc. +sociol. +sól. +son. +Sór. +sost. +sov. +soviét. +spp. +sq.ft. +sq.in. +sq.m. +sq.rd. +sq.yd. +sr. +sra. +Sras. +Sres. +srta. +Srtas. +ss. +SS. +stac. +sto. +subafl. +subafls. +Subdiác. +subj. +subjunt. +subord. +subst. +subvar. +suc. +Suc. +suec. +suf. +suff. +suj. +Súm. +sup. +superf. +superl. +supl. +suprf. +suprl. +suprs. +surr. +Súv. +T. +t.geogr. +tab. +táb. +tail. +tam. +tâm. +tan. +tang. +taquigr. +tard. +tát. +taur. +taurom. +taxid. +teat. +teatr. +tec. +téc. +tecel. +tecgo. +tecn. +técn. +tecna. +tecnog. +tecnogr. +tecnol. +tect. +tel. +Tel. +telec. +telecom. +telef. +telef.s. +teleg. +telegr. +telégr. +telev. +temp. +temper. +ten. +Ten. +teol. +teôn. +teos. +ter. +terap. +terapêut. +terat. +teratol. +term. +térm. +termin. +terminol. +termod. +termodinâm. +termom. +terr. +territ. +tes. +test. +têxt. +tib. +tibet. +tint. +tip. +tipogr. +tipol. +tir. +tít. +ton. +tôn. +tóp. +top.f.pl. +top.m.pl. +topog. +topogr. +topol. +topon. +topôn. +torp. +tosc. +tox. +toxiol. +tr. +trab. +trabalh. +trad. +tradic. +tráf. +transit. +transj. +transm. +transmont. +transobj. +transp. +trat. +Trav. +trib. +trig. +trigon. +trim. +trimest. +trit. +triv. +trop. +TT. +tun. +tunis. +tup. +tur. +turc. +turism. +tv. +U. +u.inf. +u.sup. +ucr. +ucraín. +ucran. +ult. +umb. +un. +unid. +unif. +univ. +univers. +urb. +urban. +urol. +urug. +utilid. +utilit. +utop. +utópi. +v. +V.Exa. +V.Exas. +v.g. +V.Sa. +V.Sas. +vad. +vadm. +vasc. +vb. +vc. +vect. +veg. +vel. +veloc. +ven. +venat. +vend. +venez. +venezuel. +verb. +vern. +veros. +veross. +vers. +versif. +vet. +veter. +vid. +Vid. +vidr. +vig. +Vig. +vin. +vinic. +viol. +vit. +Vit. +vitic. +vitr. +viz. +vl. +vo. +voc. +vog. +vol. +volat. +voll. +vols. +vox. +vulc. +vulg. +W. +X. +xenof. +xerog. +xerogr. +xilog. +xin. +yd.p.sec. +zend. +zo. +zool. +zoot. +zootec. +zootéc. diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/do_not_suggest.txt b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/do_not_suggest.txt new file mode 100644 index 000000000000..c942ce818a21 --- /dev/null +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/do_not_suggest.txt @@ -0,0 +1,2 @@ +puta +babaca diff --git a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt index 0d85031dc53e..aa588b7e3fd4 100644 --- a/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt +++ b/languagetool-language-modules/pt/src/main/resources/org/languagetool/resource/pt/multiwords.txt @@ -4496,6 +4496,7 @@ valeat quam L_LEGAL_ venire contra L_LEGAL_ vera sunt L_LEGAL_ verba volant L_LEGAL_ +verba volant, scripta remnant L_LEGAL_ verbi gratia L_LEGAL_ veritate accipitur L_LEGAL_ vigilavit iustitiae L_LEGAL_ diff --git a/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java index 6bddc2df3483..274c9325ce2b 100644 --- a/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java +++ b/languagetool-language-modules/pt/src/test/java/org/languagetool/rules/pt/MorfologikPortugueseSpellerRuleTest.java @@ -1,3 +1,21 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Marcin Miłkowski (http://www.languagetool.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ package org.languagetool.rules.pt; import org.junit.Test; @@ -11,12 +29,16 @@ import java.util.Arrays; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; public class MorfologikPortugueseSpellerRuleTest { private final MorfologikPortugueseSpellerRule ruleBR = getSpellerRule("BR"); private final JLanguageTool ltBR = getLT("BR"); private final MorfologikPortugueseSpellerRule rulePT = getSpellerRule("PT"); private final JLanguageTool ltPT = getLT("PT"); + // This one is used to test the pre-90 agreement spellings + private final MorfologikPortugueseSpellerRule ruleMZ = getSpellerRule("MZ"); + private final JLanguageTool ltMZ = getLT("MZ"); public MorfologikPortugueseSpellerRuleTest() throws IOException { } @@ -43,6 +65,20 @@ private void assertErrorLength(String sentence, int length, JLanguageTool lt, } } + private void assertSingleErrorWithNegativeSuggestion(String sentence, JLanguageTool lt, + MorfologikPortugueseSpellerRule rule, + String badSuggestion) throws IOException { + RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence)); + // TODO: just debugging, must delete later! + if (matches.length > 0) { + System.out.println(matches[0].getSuggestedReplacements()); + } + assertEquals(1, matches.length); + if (matches.length > 0) { + assertFalse(matches[0].getSuggestedReplacements().contains(badSuggestion)); + } + } + private void assertSingleErrorAndPos(String sentence, JLanguageTool lt, MorfologikPortugueseSpellerRule rule, String[] suggestions, int fromPos, int toPos) throws IOException { RuleMatch[] matches = rule.match(lt.getAnalyzedSentence(sentence)); @@ -65,9 +101,16 @@ private void assertSingleError(String sentence, JLanguageTool lt, private void assertTwoWayDialectError(String sentenceBR, String sentencePT) throws IOException { assertNoErrors(sentenceBR, ltBR, ruleBR); - assertSingleError(sentenceBR, ltPT, rulePT, new String[]{sentencePT}); - assertNoErrors(sentencePT, ltPT, rulePT); assertSingleError(sentencePT, ltBR, ruleBR, new String[]{sentenceBR}); + assertNoErrors(sentencePT, ltPT, rulePT); + assertSingleError(sentenceBR, ltPT, rulePT, new String[]{sentencePT}); + } + + private void assertTwoWayOrthographicAgreementError(String sentence90, String sentence45) throws IOException { + assertNoErrors(sentence90, ltPT, rulePT); + assertSingleError(sentence45, ltPT, rulePT, new String[]{sentence90}); + assertNoErrors(sentence45, ltMZ, ruleMZ); + assertSingleError(sentence90, ltMZ, ruleMZ, new String[]{sentence45}); } @Test @@ -109,8 +152,66 @@ public void testPortugueseHyphenatedClitics() throws Exception { @Test public void testPortugueseSymmetricalDialectDifferences() throws Exception { assertTwoWayDialectError("anônimo", "anónimo"); + assertTwoWayDialectError("tênis", "ténis"); + assertTwoWayDialectError("ônus", "ónus"); + // I swear I'm not being immature, there was some weirdness with "pêni"/"pênis" in pt-BR ;) + assertTwoWayDialectError("pênis", "pénis"); assertTwoWayDialectError("detecção", "deteção"); assertTwoWayDialectError("dezesseis", "dezasseis"); + // new words from portal da língua portuguesa + assertTwoWayDialectError("napoleônia", "napoleónia"); + assertTwoWayDialectError("hiperêmese", "hiperémese"); + // orthographic reforms + assertTwoWayOrthographicAgreementError("detetar", "detectar"); + // not working yet + assertTwoWayDialectError("detectar", "detetar"); + // will not work due to tokenisation quirk, bebê-lo, must be fixed + // assertTwoWayDialectError("bebê", "bebé"); + } + + @Test + public void testPortugueseSpellingDiminutives() throws Exception { + assertNoErrors("franguito", ltBR, ruleBR); + assertNoErrors("irmãozinho", ltBR, ruleBR); + assertNoErrors("retratozinho", ltBR, ruleBR); + assertNoErrors("notebookzinho", ltBR, ruleBR); + assertNoErrors("finaizitos", ltBR, ruleBR); + assertNoErrors("cafezito", ltBR, ruleBR); + assertNoErrors("chorõezitos", ltBR, ruleBR); + assertNoErrors("assadito", ltBR, ruleBR); + } + + @Test + public void testPortugueseSpellingProductiveAdverbs() throws Exception { + assertNoErrors("enciclopedicamente", ltBR, ruleBR); + assertNoErrors("nefastamente", ltBR, ruleBR); + assertNoErrors("funereamente", ltBR, ruleBR); + } + + @Test + public void testPortugueseSpellingValidAbbreviations() throws Exception { + // need to understand how to test segment.srx here! + assertSingleError("primit", ltBR, ruleBR, new String[]{"primit."}); + assertSingleError("Islam,", ltBR, ruleBR, new String[]{"Islam."}); + assertNoErrors("xerogr.", ltBR, ruleBR); + assertNoErrors("Baixei a vers. 7.0.0", ltBR, ruleBR); + assertSingleError("Sem terminol exata, nunca vamos saber.", ltBR, ruleBR, new String[]{"terminol."}); + } + + @Test + public void testPortugueseSpellingMultiwords() throws Exception { + assertSingleError("volant", ltBR, ruleBR, new String[]{}); + assertNoErrors("verba volant, scripta remnant", ltBR, ruleBR); + assertSingleError("Raspberry", ltBR, ruleBR, new String[]{}); + assertNoErrors("Raspberry Pi", ltBR, ruleBR); + } + + @Test + public void testPortugueseSpellingDoesNotSuggestOffensiveWords() throws Exception { + // some words should not be suggested; this test makes sure they are *not* in the returned suggestions for + // each given incorrectly spelt word + assertSingleErrorWithNegativeSuggestion("pwta", ltBR, ruleBR, "puta"); + assertSingleErrorWithNegativeSuggestion("bâbaca", ltBR, ruleBR, "babaca"); } @Test