Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solving the issue "Template matching regex features only matches prefix" #170

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
14 changes: 7 additions & 7 deletions articlequality/feature_lists/enwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@

# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"infobox", name="enwiki.revision.infobox_templates")
r"infobox$", name="enwiki.revision.infobox_templates")
CN_TEMPLATES = [
r"Citation[_ ]needed",
r"Cn",
r"Fact"
]
cn_templates = wikitext.revision.template_names_matching(
"|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates")
"$|".join(CN_TEMPLATES)+"$", name="enwiki.revision.cn_templates")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you considered a regex in the format (?:foo|bar|baz)$, with a single $ after the parenthesis?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay I'll try to check that

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hey, are you indicating a case scenario where we would have a regex as
Citation[_ ]needed$|Cn$|Fact$|(?:foo|bar|baz)$$ causing a double $

I've tired to check if it works as below
image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, not really. I was thinking in something like this:
(?:Citation[_ ]needed|Cn|Fact)$
See https://regex101.com/r/GinCwl/1.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, not really. I was thinking in something like this: (?:Citation[_ ]needed|Cn|Fact)$ See https://regex101.com/r/GinCwl/1.

Oh that does seem much better, thank you.
I'll do that and run some tests

who_templates = wikitext.revision.template_names_matching(
"Who", name="enwiki.revision.who_templates")
"Who$", name="enwiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
"Main", name="enwiki.main_article_templates")
"Main$", name="enwiki.main_article_templates")
CITE_TEMPLATES = [
r"Cite",
r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb",
Expand All @@ -39,15 +39,15 @@
r"Harvp"
]
cite_templates = wikitext.revision.template_names_matching(
"|".join(CITE_TEMPLATES), name="enwiki.revision.cite_templates")
"$|".join(CITE_TEMPLATES)+"$", name="enwiki.revision.cite_templates")
SFN_TEMPLATES = [
r"Shortened footnote template", r"sfn",
r"Sfnp",
r"Sfnm",
r"Sfnmp"
]
shortened_footnote_templates = wikitext.revision.template_names_matching(
"|".join(SFN_TEMPLATES),
"$|".join(SFN_TEMPLATES)+"$",
name="enwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
Expand All @@ -67,7 +67,7 @@
r"File|Image\:", name="enwiki.revision.image_links")

image_templates = wikitext.revision.template_names_matching(
r"((Wide|Tall|scalable) image)|Panorama|Panorama 2",
r"((Wide$|Tall$|scalable) image)$|Panorama$|Panorama 2$",
name='enwiki.revision.image_template')


Expand Down
2 changes: 1 addition & 1 deletion articlequality/feature_lists/euwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
r"[\w\s_]*infotaula[ _]automatikoa$",
name="euwiki.revision.infobox_templates")
cn_templates = wikitext.revision.template_names_matching(
r"erref[ _]behar", name="euwiki.revision.cn_templates")
r"erref[ _]behar$", name="euwiki.revision.cn_templates")

# Links
# Excluding category_links based on https://phabricator.wikimedia.org/T240467
Expand Down
6 changes: 3 additions & 3 deletions articlequality/feature_lists/fawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@

# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"infobox|جعبه", name="fawiki.revision.infobox_templates")
r"infobox$|جعبه$", name="fawiki.revision.infobox_templates")
CN_TEMPLATES = [
r"Citation[_ ]needed",
r"Cn",
r"Fact",
r"مدرک"
]
cn_templates = wikitext.revision.template_names_matching(
"|".join(CN_TEMPLATES), name="fawiki.revision.cn_templates")
"$|".join(CN_TEMPLATES)+"$", name="fawiki.revision.cn_templates")
who_templates = wikitext.revision.template_names_matching(
"Who|چه کسی|چه‌کسی", name="fawiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
"Main|اصلی", name="fawiki.main_article_templates")
cite_templates = wikitext.revision.template_names_matching(
r"cite|یادکرد", name="fawiki.revision.cite_templates")
r"cite$|یادکرد$", name="fawiki.revision.cite_templates")
proportion_of_templated_references = \
cite_templates / max(wikitext.revision.ref_tags, 1)
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
Expand Down
18 changes: 9 additions & 9 deletions articlequality/feature_lists/frwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
r"Ouvrage"
]
cite_templates = wikitext.revision.template_names_matching(
"|".join(CITE_TEMPLATES),
"$|".join(CITE_TEMPLATES)+"$",
name="frwiki.revision.cite_templates")
proportion_of_templated_references = \
cite_templates / max(wikitext.revision.ref_tags, 1)
Expand All @@ -30,7 +30,7 @@
name="frwiki.revision.non_cite_templates"
)
infobox_templates = wikitext.revision.template_names_matching(
r"^infobox",
r"^infobox$",
name="frwiki.revision.infobox_templates")

# Copied (2015-10-29) from:
Expand All @@ -39,37 +39,37 @@
r"Référence[ _]à[ _]confirmer",
r"Référence[ _]nécessaire", r"Inédit"]
lvl1_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL1_CN_TEMPLATES),
"$|".join(LVL1_CN_TEMPLATES)+"$",
name="frwiki.revision.lvl1_cn_templates")

LVL2_CN_TEMPLATES = [r"Référence[ _]insuffisante", r"Référence[ _]incomplète",
r"Détournement[ _]de[ _]sources",
r"Section[ _]à[ _]sourcer"]
lvl2_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL2_CN_TEMPLATES),
"$|".join(LVL2_CN_TEMPLATES)+"$",
name="frwiki.revision.lvl2_cn_templates")

LVL3_CN_TEMPLATES = [r"Sources[ _]à[ _]lier", r"Sources[ _]obsolètes",
r"Référence[ _]obsolète", r"À[ _]sourcer",
r"Sources[ _]secondaires", r"BPV[ _]à[ _]sourcer"]
lvl3_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL3_CN_TEMPLATES),
"$|".join(LVL3_CN_TEMPLATES)+"$",
name="frwiki.revision.lvl3_cn_templates")

LVL4_CN_TEMPLATES = [r"À[ _]prouver", r"Faut[ _]sourcer"]
lvl4_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL4_CN_TEMPLATES),
"$|".join(LVL4_CN_TEMPLATES)+"$",
name="frwiki.revision.lvl4_cn_templates")

LVL5_CN_TEMPLATES = [r"À[ _]vérifier", r"Vérifiabilité"]
lvl5_cn_templates = wikitext.revision.template_names_matching(
"|".join(LVL5_CN_TEMPLATES),
"$|".join(LVL5_CN_TEMPLATES)+"$",
name="frwiki.revision.lvl5_cn_templates")
main_article_templates = wikitext.revision.template_names_matching(
r"Article[ _](principal|détaillé)",
r"Article[ _](principal$|détaillé)$",
name="frwiki.main_article_templates")
date_templates = wikitext.revision.template_names_matching(
r"date",
r"date$",
name="frwiki.revision.date_templates")

# Links
Expand Down
8 changes: 5 additions & 3 deletions articlequality/feature_lists/glwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
# Infoboxes:
# https://gl.wikipedia.org/wiki/Categor%C3%ADa:Caixas_de_informaci%C3%B3n
# They don't have a common naming scheme.


cn_templates = wikitext.revision.template_names_matching(
r"cómpre[ _]referencia|cando|quen|clarificar|" +
r"sen[ _]referencias|cómpre[ _]páxina|" +
r"verificar[ _]credibilidade", name="glwiki.revision.cn_templates")
r"cómpre[ _]referencia$|cando$|quen$|clarificar$|" +
r"sen[ _]referencias$|cómpre[ _]páxina$|" +
r"verificar[ _]credibilidade$", name="glwiki.revision.cn_templates")

# Links
category_links = wikitext.revision.wikilink_titles_matching(
Expand Down
23 changes: 15 additions & 8 deletions articlequality/feature_lists/ptwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@

# Templates
infobox_templates = wikitext.revision.template_names_matching(
r"(Info|Infobox)", name="ptwiki.revision.infobox_templates")
r"(Info|Infobox)$", name="ptwiki.revision.infobox_templates")
CN_TEMPLATES = [
r"Carece[ _]de[ _]fontes",
r"Carece[ _]de[ _]fontes2",
r"Carece[ _]de[ _]fontes/bloco",
r"Carece[ _]de[ _]fontes/bloco2"
]
cn_templates = wikitext.revision.template_names_matching(
"|".join(CN_TEMPLATES), name="ptwiki.revision.cn_templates")
"$|".join(CN_TEMPLATES)+"$", name="ptwiki.revision.cn_templates")
MAIN_TEMPLATES = [
r"Artigo[ _]principal",
r"Ver[ _]artigo[ _]principal",
Expand All @@ -36,7 +36,7 @@
r"AP", r"Details", r"Ver[ _]artigo"
]
main_article_templates = wikitext.revision.template_names_matching(
"|".join(MAIN_TEMPLATES), name="ptwiki.main_article_templates")
"$|".join(MAIN_TEMPLATES)+"$", name="ptwiki.main_article_templates")
CITE_TEMPLATES = [
r"Cite",
r"Citar",
Expand All @@ -50,9 +50,9 @@
r"Harvp"
]
cite_templates = wikitext.revision.template_names_matching(
"|".join(CITE_TEMPLATES), name="ptwiki.revision.cite_templates")
"$|".join(CITE_TEMPLATES)+"$", name="ptwiki.revision.cite_templates")
shortened_footnote_templates = wikitext.revision.template_names_matching(
r"sfn", name="ptwiki.revision.shortened_footnote_templates")
r"sfn$", name="ptwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
proportion_of_templated_references = \
Expand All @@ -71,12 +71,19 @@
r"(File|Ficheiro|Arquivo|Imagem?)\s*\:",
name="ptwiki.revision.image_links")

IMG_TEMPLATES = [
r"Scalable[ _]image",
r"Panorama",
r"Imagem[ _]vertical",
r"Panorama",
r"Panorama 2"
]

image_templates = wikitext.revision.template_names_matching(
r"(Scalable[ _]image|Panorama|Imagem[ _]vertical|Panorama|Panorama 2)",
name='ptwiki.revision.image_template')
"$|".join(IMG_TEMPLATES)+"$", name='ptwiki.revision.image_template')

side_by_side_image_templates = wikitext.revision.template_names_matching(
r"Imagem[ _]dupla",
r"Imagem[ _]dupla$",
name='ptwiki.revision.side_by_side_image_templates')


Expand Down
2 changes: 1 addition & 1 deletion articlequality/feature_lists/ruwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from . import wikipedia

cn_templates = wikitext.revision.template_names_matching(
r"Нет[ _]АИ",
r"Нет[ _]АИ$",
name="ruwiki.revision.cn_templates")

# Links
Expand Down
2 changes: 1 addition & 1 deletion articlequality/feature_lists/svwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from . import wikipedia

cn_templates = wikitext.revision.template_names_matching(
r"Källa[ _]behövs|Kb",
r"Källa[ _]behövs$|Kb$",
name="svwiki.revision.cn_templates")

# Links
Expand Down
8 changes: 4 additions & 4 deletions articlequality/feature_lists/trwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from . import wikipedia

cite_templates = wikitext.revision.template_names_matching(
r"Kaynak|.*[ _]kaynağı",
r"Kaynak$|.*[ _]kaynağı$",
name="trwiki.revision.cite_templates")
proportion_of_templated_references = \
cite_templates / max(wikitext.revision.ref_tags, 1)
Expand All @@ -19,17 +19,17 @@
name="trwiki.revision.non_cite_templates"
)
infobox_templates = wikitext.revision.template_names_matching(
r".*[ _]bilgi[ _]kutusu",
r".*[ _]bilgi[ _]kutusu$",
name="trwiki.revision.infobox_templates")

# Copied (2015-10-29) from:
# https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources
cn_templates = wikitext.revision.template_names_matching(
r"Kaynak[ _]belirt|Olgu|Fact|Delil",
r"Kaynak[ _]belirt$|Olgu$|Fact$|Delil$",
name="trwiki.revision.lvl1_cn_templates")

main_article_templates = wikitext.revision.template_names_matching(
r"Ana|Anamadde",
r"Ana$|Anamadde$",
name="trwiki.main_article_templates")

# Links
Expand Down
8 changes: 4 additions & 4 deletions articlequality/feature_lists/ukwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
r"Fact"
]
cn_templates = wikitext.revision.template_names_matching(
"|".join(CN_TEMPLATES), name="ukwiki.revision.cn_templates")
"$|".join(CN_TEMPLATES)+"$", name="ukwiki.revision.cn_templates")
MAIN_TEMPLATES = [
r"Main",
r"Докладніше"
]
main_article_templates = wikitext.revision.template_names_matching(
"|".join(MAIN_TEMPLATES), name="ukwiki.revision.main_article_templates")
"$|".join(MAIN_TEMPLATES)+"$", name="ukwiki.revision.main_article_templates")
CITE_TEMPLATES = [
r"Cite",
r"Harvard[_ ]citation[_ ]no[_ ]brackets", r"harvnb",
Expand All @@ -39,7 +39,7 @@
r"Citation"
]
cite_templates = wikitext.revision.template_names_matching(
"|".join(CITE_TEMPLATES), name="ukwiki.revision.cite_templates")
"$|".join(CITE_TEMPLATES)+"$", name="ukwiki.revision.cite_templates")
shortened_footnote_templates = wikitext.revision.template_names_matching(
"sfn", name="ukwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
Expand Down Expand Up @@ -72,7 +72,7 @@
"|".join(IMAGE_LINKS), name="ukwiki.revision.image_links")

image_templates = wikitext.revision.template_names_matching(
r"((Wide|Tall|scalable) image)|Panorama|Panorama 2",
r"((Wide$|Tall$|scalable) image)$|Panorama$|Panorama 2$",
name="ukwiki.revision.image_template")


Expand Down