From a7e88c8be7a09be811eb86b7775508670c7aafa9 Mon Sep 17 00:00:00 2001 From: Nils Herrmann <88451442+nils-herrmann@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:52:33 +0200 Subject: [PATCH] Drop all_paragraph from parse_pubmed_paragraph() (close #147) --- pubmed_parser/pubmed_oa_parser.py | 13 +++---------- tests/test_pubmed_oa_parser.py | 2 +- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/pubmed_parser/pubmed_oa_parser.py b/pubmed_parser/pubmed_oa_parser.py index 33dc22f..9e3cfe3 100644 --- a/pubmed_parser/pubmed_oa_parser.py +++ b/pubmed_parser/pubmed_oa_parser.py @@ -367,7 +367,7 @@ def parse_pubmed_references(path): return dict_refs -def parse_pubmed_paragraph(path, all_paragraph=False): +def parse_pubmed_paragraph(path): """ Give path to a given PubMed OA file, parse and return a dictionary of all paragraphs, section that it belongs to, @@ -377,13 +377,6 @@ def parse_pubmed_paragraph(path, all_paragraph=False): ---------- path: str A string to an XML path. - all_paragraph: bool - By default, this function will only append a paragraph if there is at least - one reference made in a paragraph (to aviod noisy parsed text). - A boolean indicating if you want to include paragraph with no references made or not - if True, include all paragraphs - if False, include only paragraphs that have references - default: False Return ------ @@ -421,8 +414,8 @@ def parse_pubmed_paragraph(path, all_paragraph=False): "section": section, "text": paragraph_text, } - if len(ref_ids) >= 1 or all_paragraph: - dict_pars.append(dict_par) + + dict_pars.append(dict_par) return dict_pars diff --git a/tests/test_pubmed_oa_parser.py b/tests/test_pubmed_oa_parser.py index 577622f..b9c834e 100644 --- a/tests/test_pubmed_oa_parser.py +++ b/tests/test_pubmed_oa_parser.py @@ -60,7 +60,7 @@ def test_parse_pubmed_paragraph(): paragraphs = pp.parse_pubmed_paragraph(pubmed_xml_3460867) assert isinstance(paragraphs, list) assert isinstance(paragraphs[0], dict) - assert len(paragraphs) == 29, "Expected number of paragraphs to be 29" + assert len(paragraphs) == 58, "Expected number of paragraphs to be 58" assert ( len(paragraphs[0]["reference_ids"]) == 11 ), "Expected number of references in the first paragraph to be 11"