Skip to content

Commit

Permalink
Parse pii in the web parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael-E-Rose authored Apr 22, 2024
2 parents 5943a6e + bf5e771 commit e49a78f
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
9 changes: 8 additions & 1 deletion pubmed_parser/pubmed_web_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,13 @@ def parse_pubmed_web_tree(tree):
doi = doi[0].text
except IndexError:
doi = None


pii = tree.xpath('//elocationid[@eidtype="pii"]')
try:
pii = pii[0].text
except IndexError:
pii = None

language = tree.xpath("//language")
try:
language = language[0].text
Expand All @@ -152,6 +158,7 @@ def parse_pubmed_web_tree(tree):
"authors": authors_text,
"keywords": keywords,
"doi": doi,
"pii": pii,
"year": year,
"language": language
}
Expand Down
10 changes: 10 additions & 0 deletions tests/test_pubmed_web_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_pubmed_web_parser_all_fields_content():
"authors": "Rieka von der Warth; Isabelle Hempler",
"keywords": "",
"doi": "10.1016/j.zefq.2023.11.002",
"pii": "S1865-9217(23)00212-X",
"year": "2024",
"language": "ger",
"pmid": "38218666",
Expand All @@ -27,6 +28,7 @@ def test_pubmed_web_parser_all_fields_content():
"authors": "Andreas Leimbach; Jörg Hacker; Ulrich Dobrindt",
"keywords": "D000818:Animals;D004926:Escherichia coli;D004927:Escherichia coli Infections;D023281:Genomics;D006801:Humans;D007413:Intestinal Mucosa;D007422:Intestines;D010802:Phylogeny;D013559:Symbiosis",
"doi": "10.1007/82_2012_303",
"pii": None,
"year": "2013",
"language": "eng",
"pmid": "23340801",
Expand All @@ -49,6 +51,7 @@ def test_pubmed_web_parser_all_fields_existence():
"authors",
"keywords",
"doi",
"pii",
"year",
"language",
"pmid",
Expand All @@ -66,7 +69,14 @@ def test_pubmed_web_parser_save_xml():

assert "xml" in pubmed_dict


def test_doi():
"""Test the correct parsing of the doi."""
pubmed_dict = pp.parse_xml_web("32145645", save_xml=False)
assert pubmed_dict['doi'] == "10.1016/j.ejmech.2020.112186"


def test_pii():
"""Test the correct parsing of the pii."""
pubmed_dict = pp.parse_xml_web("32145645", save_xml=False)
assert pubmed_dict['pii'] == "S0223-5234(20)30153-7"

0 comments on commit e49a78f

Please sign in to comment.