From bf5e771a754f6959a7dcf65d6026f5194b701328 Mon Sep 17 00:00:00 2001 From: Nils Herrmann Date: Mon, 22 Apr 2024 15:21:04 +0200 Subject: [PATCH] Parse pii in the web parser --- pubmed_parser/pubmed_web_parser.py | 9 ++++++++- tests/test_pubmed_web_parser.py | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pubmed_parser/pubmed_web_parser.py b/pubmed_parser/pubmed_web_parser.py index fc401da..8e06c0b 100644 --- a/pubmed_parser/pubmed_web_parser.py +++ b/pubmed_parser/pubmed_web_parser.py @@ -137,7 +137,13 @@ def parse_pubmed_web_tree(tree): doi = doi[0].text except IndexError: doi = None - + + pii = tree.xpath('//elocationid[@eidtype="pii"]') + try: + pii = pii[0].text + except IndexError: + pii = None + language = tree.xpath("//language") try: language = language[0].text @@ -152,6 +158,7 @@ def parse_pubmed_web_tree(tree): "authors": authors_text, "keywords": keywords, "doi": doi, + "pii": pii, "year": year, "language": language } diff --git a/tests/test_pubmed_web_parser.py b/tests/test_pubmed_web_parser.py index 5b47e9a..120fe71 100644 --- a/tests/test_pubmed_web_parser.py +++ b/tests/test_pubmed_web_parser.py @@ -15,6 +15,7 @@ def test_pubmed_web_parser_all_fields_content(): "authors": "Rieka von der Warth; Isabelle Hempler", "keywords": "", "doi": "10.1016/j.zefq.2023.11.002", + "pii": "S1865-9217(23)00212-X", "year": "2024", "language": "ger", "pmid": "38218666", @@ -27,6 +28,7 @@ def test_pubmed_web_parser_all_fields_content(): "authors": "Andreas Leimbach; Jörg Hacker; Ulrich Dobrindt", "keywords": "D000818:Animals;D004926:Escherichia coli;D004927:Escherichia coli Infections;D023281:Genomics;D006801:Humans;D007413:Intestinal Mucosa;D007422:Intestines;D010802:Phylogeny;D013559:Symbiosis", "doi": "10.1007/82_2012_303", + "pii": None, "year": "2013", "language": "eng", "pmid": "23340801", @@ -49,6 +51,7 @@ def test_pubmed_web_parser_all_fields_existence(): "authors", "keywords", "doi", + "pii", "year", "language", "pmid", @@ -66,7 +69,14 @@ def test_pubmed_web_parser_save_xml(): assert "xml" in pubmed_dict + def test_doi(): """Test the correct parsing of the doi.""" pubmed_dict = pp.parse_xml_web("32145645", save_xml=False) assert pubmed_dict['doi'] == "10.1016/j.ejmech.2020.112186" + + +def test_pii(): + """Test the correct parsing of the pii.""" + pubmed_dict = pp.parse_xml_web("32145645", save_xml=False) + assert pubmed_dict['pii'] == "S0223-5234(20)30153-7"