From 18d586e344bae22bd3f61fa8cab4c8edf5a7eef3 Mon Sep 17 00:00:00 2001 From: Michael-E-Rose Date: Wed, 4 Sep 2024 08:27:05 +0000 Subject: [PATCH] deploy: 3ae0bb6c42eef8df7d2021d49e321100492b6796 --- .buildinfo | 2 +- _modules/index.html | 6 ++--- _modules/pubmed_parser/medline_parser.html | 6 ++--- _modules/pubmed_parser/pubmed_oa_parser.html | 6 ++--- _modules/pubmed_parser/pubmed_web_parser.html | 21 +++++++++++++----- _static/documentation_options.js | 2 +- api.html | 8 ++++--- genindex.html | 6 ++--- index.html | 6 ++--- install.html | 6 ++--- objects.inv | Bin 482 -> 482 bytes resources.html | 6 ++--- search.html | 6 ++--- searchindex.js | 2 +- spark.html | 6 ++--- 15 files changed, 51 insertions(+), 38 deletions(-) diff --git a/.buildinfo b/.buildinfo index bd2e77f..70cf335 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 77e324f858092f01294cd64afe6342c2 +config: df62e4c3d116525450b5e30e2e63dcc0 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/_modules/index.html b/_modules/index.html index b8ebaec..ba9cf74 100644 --- a/_modules/index.html +++ b/_modules/index.html @@ -3,7 +3,7 @@ - Overview: module code — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Overview: module code — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -14,7 +14,7 @@ - + @@ -34,7 +34,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/_modules/pubmed_parser/medline_parser.html b/_modules/pubmed_parser/medline_parser.html index 5adb823..42f6c29 100644 --- a/_modules/pubmed_parser/medline_parser.html +++ b/_modules/pubmed_parser/medline_parser.html @@ -3,7 +3,7 @@ - pubmed_parser.medline_parser — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + pubmed_parser.medline_parser — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -14,7 +14,7 @@ - + @@ -34,7 +34,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/_modules/pubmed_parser/pubmed_oa_parser.html b/_modules/pubmed_parser/pubmed_oa_parser.html index cfd9b53..90031fd 100644 --- a/_modules/pubmed_parser/pubmed_oa_parser.html +++ b/_modules/pubmed_parser/pubmed_oa_parser.html @@ -3,7 +3,7 @@ - pubmed_parser.pubmed_oa_parser — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + pubmed_parser.pubmed_oa_parser — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -14,7 +14,7 @@ - + @@ -34,7 +34,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/_modules/pubmed_parser/pubmed_web_parser.html b/_modules/pubmed_parser/pubmed_web_parser.html index 9a5ca3e..f3525c2 100644 --- a/_modules/pubmed_parser/pubmed_web_parser.html +++ b/_modules/pubmed_parser/pubmed_web_parser.html @@ -3,7 +3,7 @@ - pubmed_parser.pubmed_web_parser — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + pubmed_parser.pubmed_web_parser — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -14,7 +14,7 @@ - + @@ -34,7 +34,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
@@ -138,7 +138,7 @@

Source code for pubmed_parser.pubmed_web_parser

< 'title', 'abstract', 'journal', 'affliation' (string of affiliation with ';' separated), 'authors' (string with ';' separated), 'keywords' (keywords and MeSH terms from an XML -- if MeSH term it will be 'MeSH descriptor':'MeSH name') - 'doi', 'year' + 'doi', 'pii', 'year', 'language', 'version_id', 'version_date' """ if len(tree.xpath("//articletitle")) != 0: title = " ".join([title.text for title in tree.xpath("//articletitle")]) @@ -227,6 +227,13 @@

Source code for pubmed_parser.pubmed_web_parser

< language = language[0].text except IndexError: language = None + + medline_citation = tree.xpath('//medlinecitation') + try: + version_id = medline_citation[0].attrib.get('versionid') + version_date = medline_citation[0].attrib.get('versiondate') + except IndexError: + version_id, version_date = None, None dict_out = { "title": title, @@ -238,7 +245,9 @@

Source code for pubmed_parser.pubmed_web_parser

< "doi": doi, "pii": pii, "year": year, - "language": language + "language": language, + "version_id": version_id, + "version_date": version_date, } return dict_out @@ -279,6 +288,8 @@

Source code for pubmed_parser.pubmed_web_parser

< 'keywords': 'D000818:Animals;D005075:Biological Evolution;...', 'doi': '10.1126/science.1060852', 'year': '2001', + 'version_id': None, + 'version_date': None, 'pmid': '11360989' } """ diff --git a/_static/documentation_options.js b/_static/documentation_options.js index 05f365a..c88394b 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '0.5.2.dev2+g245dd09', + VERSION: '0.5.2.dev3+g3ae0bb6', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/api.html b/api.html index 406d543..51cc3a7 100644 --- a/api.html +++ b/api.html @@ -4,7 +4,7 @@ - API Documentation — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + API Documentation — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -37,7 +37,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/genindex.html b/genindex.html index 2be7e63..d391a58 100644 --- a/genindex.html +++ b/genindex.html @@ -3,7 +3,7 @@ - Index — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Index — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -14,7 +14,7 @@ - + @@ -34,7 +34,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/index.html b/index.html index 9568888..e663ef4 100644 --- a/index.html +++ b/index.html @@ -4,7 +4,7 @@ - Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -36,7 +36,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/install.html b/install.html index 5af67b9..0e0b80d 100644 --- a/install.html +++ b/install.html @@ -4,7 +4,7 @@ - Installation — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Installation — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -37,7 +37,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/objects.inv b/objects.inv index 8d7bfdf2bb11b55f3ba603617c3e7bf49309e81e..d6f950212b565b868502f4093e82c5eef819cf4b 100644 GIT binary patch delta 21 ccmaFF{D^si7niYix^ZHvK~j?0#*jWn08&>6D*ylh delta 21 ccmaFF{D^si7nhNCx{--#N{WHy#*jWn08d>8(*OVf diff --git a/resources.html b/resources.html index 9d29dcd..5e6a765 100644 --- a/resources.html +++ b/resources.html @@ -4,7 +4,7 @@ - Resources — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Resources — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -37,7 +37,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/search.html b/search.html index e60eaaa..4ac4390 100644 --- a/search.html +++ b/search.html @@ -3,7 +3,7 @@ - Search — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Search — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -37,7 +37,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6
diff --git a/searchindex.js b/searchindex.js index b6356d1..27187aa 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"API Documentation": [[0, null]], "About the dataset": [[1, "about-the-dataset"]], "Alternative implementation of MEDLINE parsers": [[3, "alternative-implementation-of-medline-parsers"]], "Contents": [[1, "contents"]], "Download PubMed OA figures": [[3, "download-pubmed-oa-figures"]], "Examples": [[0, "examples"], [0, "id10"], [0, "id15"], [0, "id18"]], "Installation": [[2, null]], "Links to download PubMed OA and MEDLINE dataset": [[3, "links-to-download-pubmed-oa-and-medline-dataset"]], "PMC Copyright Notice": [[3, "pmc-copyright-notice"]], "Parameters": [[0, "parameters"], [0, "id1"], [0, "id2"], [0, "id4"], [0, "id6"], [0, "id8"], [0, "id11"], [0, "id13"], [0, "id16"], [0, "id19"]], "Parse MEDLINE XML": [[0, "parse-medline-xml"]], "Parse PubMed OA XML": [[0, "parse-pubmed-oa-xml"]], "Parse from Website": [[0, "parse-from-website"]], "Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset": [[1, null]], "Questions / Contributions / Bugs": [[1, "questions-contributions-bugs"]], "Resources": [[3, null]], "Return": [[0, "return"], [0, "id3"], [0, "id5"], [0, "id7"], [0, "id9"], [0, "id12"], [0, "id14"], [0, "id17"], [0, "id20"]], "Returns": [[0, "returns"]], "Setting up Pubmed Parser with PySpark": [[4, null]]}, "docnames": ["api", "index", "install", "resources", "spark"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["api.rst", "index.rst", "install.rst", "resources.rst", "spark.rst"], "indexentries": {"parse_citation_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_citation_web", false]], "parse_grant_id() (in module pubmed_parser)": [[0, "pubmed_parser.parse_grant_id", false]], "parse_medline_xml() (in module pubmed_parser)": [[0, "pubmed_parser.parse_medline_xml", false]], "parse_outgoing_citation_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_outgoing_citation_web", false]], "parse_pubmed_caption() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_caption", false]], "parse_pubmed_paragraph() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_paragraph", false]], "parse_pubmed_references() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_references", false]], "parse_pubmed_table() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_table", false]], "parse_pubmed_xml() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_xml", false]], "parse_xml_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_xml_web", false]]}, "objects": {"pubmed_parser": [[0, 0, 1, "", "parse_citation_web"], [0, 0, 1, "", "parse_grant_id"], [0, 0, 1, "", "parse_medline_xml"], [0, 0, 1, "", "parse_outgoing_citation_web"], [0, 0, 1, "", "parse_pubmed_caption"], [0, 0, 1, "", "parse_pubmed_paragraph"], [0, 0, 1, "", "parse_pubmed_references"], [0, 0, 1, "", "parse_pubmed_table"], [0, 0, 1, "", "parse_pubmed_xml"], [0, 0, 1, "", "parse_xml_web"]]}, "objnames": {"0": ["py", "function", "Python function"]}, "objtypes": {"0": "py:function"}, "terms": {"": 0, "0": [0, 4], "0000217": 0, "08": 3, "1": [0, 4], "10": [0, 4], "1014": 0, "1060852": 0, "11": 0, "1126": 0, "11360989": 0, "16g": 4, "17299597": 0, "1790863": 0, "18159221": 0, "2": [0, 4], "2001": 0, "221212": 0, "25": 4, "3": [0, 4], "30705152": 0, "30m": 1, "31624211": 0, "5": 4, "500m": 4, "6933944": 0, "8g": 4, "9": 4, "A": 0, "If": [0, 3], "In": [3, 4], "It": [0, 1], "The": [0, 3], "There": 3, "To": [1, 2, 3], "_build": 2, "aax1562": 0, "about": 0, "abov": 1, "abstract": [0, 1], "access": [0, 3, 4], "acknowledg": 0, "acronym": 0, "ad": 0, "advanc": 0, "affili": 0, "affiliation_list": 0, "after": 0, "agenc": 0, "aim": 1, "all": [0, 1, 3], "alloc": 4, "also": 1, "alwai": 1, "an": [0, 1, 3], "anaconda3": 4, "analyz": 1, "anim": 0, "api": 1, "append": 0, "appmasterenv": 4, "appnam": 4, "ar": 3, "articl": [0, 1], "article_iter": 0, "assign": 0, "attempt": 0, "author": 0, "author_list": 0, "avail": [0, 3], "back": 0, "ban": 3, "base": 0, "been": 0, "being": 0, "belong": 0, "below": [0, 3, 4], "between": 0, "bin": 4, "biolog": 0, "biologi": 0, "biomed": 1, "bool": 0, "browser": 2, "budapest": 0, "build": 2, "builder": 4, "bulk": [0, 3], "c": 0, "can": [0, 1, 2, 3, 4], "caption": [0, 3], "case": 4, "categori": 0, "cdh5": 4, "central": 0, "chang": 2, "check": [0, 3], "choic": 0, "citat": [0, 1], "cite": 0, "clone": 2, "codegen": 4, "colbud": 0, "collegium": 0, "column": 3, "com": [0, 2], "complex": 0, "compress": 1, "concaten": 0, "conf": 4, "config": 4, "contain": [0, 1], "core": [0, 4], "correspond": [0, 3], "countri": 0, "cov": 2, "createdatafram": 4, "csv": 3, "d000818": 0, "d005075": 0, "dai": 0, "dask": 4, "data": [0, 1, 3, 4], "databas": [0, 3], "datafram": 4, "date": [0, 1], "dbfrom": 0, "dd": 0, "default": 0, "definit": 3, "delet": 0, "detail": 4, "develop": 1, "dict": 0, "dict_capt": 0, "dict_out": 0, "dict_par": 0, "dict_ref": 0, "dictionari": [0, 1], "differ": 4, "dimension": 0, "directli": 2, "directori": 2, "divid": 0, "do": 3, "doc": 2, "doc_id": 0, "document": [1, 2, 3, 4], "doi": 0, "download": [0, 1, 4], "driver": 4, "dtd": 3, "dynamicalloc": 4, "e": [0, 1], "e0": 3, "each": 0, "easili": 1, "effort": 1, "either": 0, "element": 0, "elink": 0, "entrez": [0, 1], "entri": 0, "epublication_d": 0, "eutil": 0, "evolut": 0, "executor": 4, "explain": [0, 3], "extract": 0, "f": 0, "fals": 0, "fast": 1, "fcgi": 0, "few": 3, "field": 0, "fig_capt": 0, "fig_id": 0, "fig_label": 0, "figur": 0, "figure_id": 3, "file": [0, 1, 3], "findspark": 4, "first": 3, "fisher": 0, "folder": [2, 3, 4], "follow": [0, 2, 3], "form": 0, "format": [0, 1, 3], "from": [1, 2, 3, 4], "ftp": [0, 3], "fu": 3, "full": [0, 1, 3, 4], "full_titl": 0, "function": [0, 1, 3], "g001": 0, "galleri": 2, "gene": 0, "geometr": 0, "get": [1, 3], "getorcr": 4, "git": 2, "github": 2, "give": [0, 1], "given": [0, 3], "go": 3, "good": 0, "gov": [0, 3], "grant": 0, "grant_list": 0, "graphic_ref": 0, "guidelin": 1, "gz": [0, 3], "h": 0, "ha": [1, 4], "harvest": 0, "have": [0, 1, 3, 4], "here": 3, "high": 1, "how": [0, 1, 3, 4], "html": 2, "http": [0, 2, 3], "hu": 0, "hungari": 0, "i": [0, 1, 3], "id": [0, 3], "id_typ": 0, "imag": 3, "implement": [0, 4], "import": 4, "includ": 0, "include_path": 0, "incorpor": 4, "inform": [0, 1, 3], "init": 4, "input": 0, "instal": 1, "institut": 0, "int": 0, "integ": 0, "interest": 3, "ip": 3, "iter": 0, "its": 0, "javascript": 3, "jord\u00e1n": 0, "journal": 0, "jupyt": 4, "keep": 1, "kei": 0, "keyword": 0, "kung": 3, "label": 0, "languag": 1, "less": 4, "level": 1, "librari": 1, "linknam": 0, "list": 0, "load": 0, "local": 4, "long": 0, "lxml": [0, 1], "made": 0, "main": 0, "major": 0, "make": 2, "manuscript": 3, "map": 4, "maxexecutor": 4, "maxresults": 4, "mb": 4, "medic": 3, "medleas": 3, "medleasebaselin": 3, "medlin": 4, "medlinexmltojson": 3, "memori": 4, "memoryoverhead": 4, "mesh": 0, "mesh_term": 0, "metadata": 0, "might": [1, 3, 4], "million": 4, "mine": 1, "minimum": 4, "minut": 4, "mm": 0, "model": 0, "molecular": 0, "month": 0, "more": [1, 4], "multipl": 4, "n": 0, "n_citat": 0, "name": 0, "namespac": 0, "natur": 1, "ncbi": [0, 3], "need": 2, "new": 0, "nih": [0, 3], "nlm": [0, 3], "nlm_categori": 0, "nlmdata": 3, "node": 0, "none": 0, "normal": 1, "note": [0, 4], "notebook": 4, "now": 1, "number": [0, 1], "nxml": 0, "o": 4, "oa": 1, "oa_bulk": 3, "oa_file_list": 3, "oa_packag": 3, "obtain": 1, "one": 0, "onli": 0, "open": [0, 2, 3], "opt": 4, "origin": 0, "other": [0, 1], "our": 1, "out": 3, "output": 0, "page": [1, 2], "paper": [0, 1], "paragraph": 0, "parallel": 4, "pars": [1, 3, 4], "parse_article_info": 0, "parse_citation_web": 0, "parse_downto_mesh_subterm": 0, "parse_grant_id": 0, "parse_medline_xml": 0, "parse_outgoing_citation_web": 0, "parse_pubmed_capt": [0, 3], "parse_pubmed_paragraph": 0, "parse_pubmed_refer": 0, "parse_pubmed_t": 0, "parse_pubmed_xml": 0, "parse_xml_web": 0, "parser": 0, "part": 0, "path": [0, 3, 4], "path_to_fil": 0, "pdf": 1, "phenotyp": 0, "pip": 2, "pipelin": 1, "pleas": [3, 4], "pmc": 0, "pmc13900": 3, "pmc_cite": 0, "pmc_refs_pubm": 0, "pmcid": 0, "pmid": [0, 3], "pmid_cit": 0, "point": 0, "pone": 0, "prefix": 0, "print": 0, "process": [1, 4], "processor": 4, "program": 1, "provid": [0, 1, 3], "pub": [0, 3], "pubdat": 0, "public": 0, "publication_d": 0, "publication_year": 0, "publish": 1, "publisher_id": 0, "pubmed20n0014": 0, "pubmed_articl": 0, "pubmed_pars": [0, 2, 3], "pum": 0, "put": 4, "pyspark": 1, "pyspark_driver_python": 4, "pyspark_python": 4, "pytest": 2, "python": [0, 4], "p\u00e1l": 0, "queri": 1, "question": 0, "r": 0, "read": 0, "reduc": [1, 4], "refer": 0, "reference_id": 0, "reference_list": 0, "regular": [1, 2], "relat": 0, "remov": 0, "repositori": [1, 2, 4], "research": 1, "resolut": 0, "resourc": 1, "return_xml": 0, "rid": 0, "run": 2, "save": 0, "save_xml": 0, "schedul": 4, "scienc": 0, "scrape": 3, "script": 4, "section": 0, "see": [0, 1, 3, 4], "set": 1, "setappnam": 4, "setmast": 4, "setup": 4, "singl": 0, "sleep": 0, "small": 4, "snippet": 4, "so": 1, "some": 3, "soon": 4, "sourc": 0, "space": 0, "spark": 4, "spark_hom": 4, "sparkconf": 4, "sparkcontext": 4, "sparksess": 4, "specif": 3, "sphinx": 2, "sql": 4, "stackoverflow": 0, "str": 0, "string": 0, "strip": 0, "structur": [0, 1], "studi": 0, "subject": 0, "submit": 1, "subset": 0, "subterm": 0, "szathmari": 0, "szathm\u00e1ri": 0, "szenth\u00e1roms\u00e1g": 0, "t": 0, "tabl": 0, "table_dict": 0, "table_xml": 0, "tag": 3, "take": 1, "tar": 3, "term": 0, "test": [2, 4], "text": [0, 1], "than": [0, 4], "them": 3, "thi": [0, 1, 2, 3, 4], "those": 1, "through": 1, "time": [1, 4], "titipata": 2, "titl": 0, "tool": [0, 3], "tree": 0, "true": [0, 4], "two": 0, "type": [0, 3], "u": 0, "until": 1, "up": 1, "updat": [3, 4], "url": 0, "us": [0, 1, 2, 3, 4], "util": 1, "vari": 0, "verbos": 2, "via": 2, "wait": 0, "want": [0, 3], "we": [0, 1, 3, 4], "websit": 3, "weekli": 3, "when": [3, 4], "where": 0, "which": [0, 1, 2, 3, 4], "who": 1, "wiki": 1, "won": 0, "work": 1, "workflow": 4, "write": 1, "www": 3, "xml": [3, 4], "y": 0, "yarn": 4, "year": 0, "year_info_onli": 0, "yield": 0, "york": 0, "you": [0, 1, 2, 3, 4], "your": [2, 3], "yyyi": 0}, "titles": ["API Documentation", "Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset", "Installation", "Resources", "Setting up Pubmed Parser with PySpark"], "titleterms": {"A": 1, "about": 1, "access": 1, "altern": 3, "api": 0, "bug": 1, "content": 1, "contribut": 1, "copyright": 3, "dataset": [1, 3], "document": 0, "download": 3, "exampl": 0, "figur": 3, "from": 0, "implement": 3, "instal": 2, "link": 3, "medlin": [0, 1, 3], "notic": 3, "oa": [0, 3], "open": 1, "paramet": 0, "pars": 0, "parser": [1, 3, 4], "pmc": 3, "pubm": [0, 1, 3, 4], "pyspark": 4, "python": 1, "question": 1, "resourc": 3, "return": 0, "set": 4, "subset": 1, "up": 4, "websit": 0, "xml": [0, 1]}}) \ No newline at end of file +Search.setIndex({"alltitles": {"API Documentation": [[0, null]], "About the dataset": [[1, "about-the-dataset"]], "Alternative implementation of MEDLINE parsers": [[3, "alternative-implementation-of-medline-parsers"]], "Contents": [[1, "contents"]], "Download PubMed OA figures": [[3, "download-pubmed-oa-figures"]], "Examples": [[0, "examples"], [0, "id10"], [0, "id15"], [0, "id18"]], "Installation": [[2, null]], "Links to download PubMed OA and MEDLINE dataset": [[3, "links-to-download-pubmed-oa-and-medline-dataset"]], "PMC Copyright Notice": [[3, "pmc-copyright-notice"]], "Parameters": [[0, "parameters"], [0, "id1"], [0, "id2"], [0, "id4"], [0, "id6"], [0, "id8"], [0, "id11"], [0, "id13"], [0, "id16"], [0, "id19"]], "Parse MEDLINE XML": [[0, "parse-medline-xml"]], "Parse PubMed OA XML": [[0, "parse-pubmed-oa-xml"]], "Parse from Website": [[0, "parse-from-website"]], "Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset": [[1, null]], "Questions / Contributions / Bugs": [[1, "questions-contributions-bugs"]], "Resources": [[3, null]], "Return": [[0, "return"], [0, "id3"], [0, "id5"], [0, "id7"], [0, "id9"], [0, "id12"], [0, "id14"], [0, "id17"], [0, "id20"]], "Returns": [[0, "returns"]], "Setting up Pubmed Parser with PySpark": [[4, null]]}, "docnames": ["api", "index", "install", "resources", "spark"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["api.rst", "index.rst", "install.rst", "resources.rst", "spark.rst"], "indexentries": {"parse_citation_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_citation_web", false]], "parse_grant_id() (in module pubmed_parser)": [[0, "pubmed_parser.parse_grant_id", false]], "parse_medline_xml() (in module pubmed_parser)": [[0, "pubmed_parser.parse_medline_xml", false]], "parse_outgoing_citation_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_outgoing_citation_web", false]], "parse_pubmed_caption() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_caption", false]], "parse_pubmed_paragraph() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_paragraph", false]], "parse_pubmed_references() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_references", false]], "parse_pubmed_table() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_table", false]], "parse_pubmed_xml() (in module pubmed_parser)": [[0, "pubmed_parser.parse_pubmed_xml", false]], "parse_xml_web() (in module pubmed_parser)": [[0, "pubmed_parser.parse_xml_web", false]]}, "objects": {"pubmed_parser": [[0, 0, 1, "", "parse_citation_web"], [0, 0, 1, "", "parse_grant_id"], [0, 0, 1, "", "parse_medline_xml"], [0, 0, 1, "", "parse_outgoing_citation_web"], [0, 0, 1, "", "parse_pubmed_caption"], [0, 0, 1, "", "parse_pubmed_paragraph"], [0, 0, 1, "", "parse_pubmed_references"], [0, 0, 1, "", "parse_pubmed_table"], [0, 0, 1, "", "parse_pubmed_xml"], [0, 0, 1, "", "parse_xml_web"]]}, "objnames": {"0": ["py", "function", "Python function"]}, "objtypes": {"0": "py:function"}, "terms": {"": 0, "0": [0, 4], "0000217": 0, "08": 3, "1": [0, 4], "10": [0, 4], "1014": 0, "1060852": 0, "11": 0, "1126": 0, "11360989": 0, "16g": 4, "17299597": 0, "1790863": 0, "18159221": 0, "2": [0, 4], "2001": 0, "221212": 0, "25": 4, "3": [0, 4], "30705152": 0, "30m": 1, "31624211": 0, "5": 4, "500m": 4, "6933944": 0, "8g": 4, "9": 4, "A": 0, "If": [0, 3], "In": [3, 4], "It": [0, 1], "The": [0, 3], "There": 3, "To": [1, 2, 3], "_build": 2, "aax1562": 0, "about": 0, "abov": 1, "abstract": [0, 1], "access": [0, 3, 4], "acknowledg": 0, "acronym": 0, "ad": 0, "advanc": 0, "affili": 0, "affiliation_list": 0, "after": 0, "agenc": 0, "aim": 1, "all": [0, 1, 3], "alloc": 4, "also": 1, "alwai": 1, "an": [0, 1, 3], "anaconda3": 4, "analyz": 1, "anim": 0, "api": 1, "append": 0, "appmasterenv": 4, "appnam": 4, "ar": 3, "articl": [0, 1], "article_iter": 0, "assign": 0, "attempt": 0, "author": 0, "author_list": 0, "avail": [0, 3], "back": 0, "ban": 3, "base": 0, "been": 0, "being": 0, "belong": 0, "below": [0, 3, 4], "between": 0, "bin": 4, "biolog": 0, "biologi": 0, "biomed": 1, "bool": 0, "browser": 2, "budapest": 0, "build": 2, "builder": 4, "bulk": [0, 3], "c": 0, "can": [0, 1, 2, 3, 4], "caption": [0, 3], "case": 4, "categori": 0, "cdh5": 4, "central": 0, "chang": 2, "check": [0, 3], "choic": 0, "citat": [0, 1], "cite": 0, "clone": 2, "codegen": 4, "colbud": 0, "collegium": 0, "column": 3, "com": [0, 2], "complex": 0, "compress": 1, "concaten": 0, "conf": 4, "config": 4, "contain": [0, 1], "core": [0, 4], "correspond": [0, 3], "countri": 0, "cov": 2, "createdatafram": 4, "csv": 3, "d000818": 0, "d005075": 0, "dai": 0, "dask": 4, "data": [0, 1, 3, 4], "databas": [0, 3], "datafram": 4, "date": [0, 1], "dbfrom": 0, "dd": 0, "default": 0, "definit": 3, "delet": 0, "detail": 4, "develop": 1, "dict": 0, "dict_capt": 0, "dict_out": 0, "dict_par": 0, "dict_ref": 0, "dictionari": [0, 1], "differ": 4, "dimension": 0, "directli": 2, "directori": 2, "divid": 0, "do": 3, "doc": 2, "doc_id": 0, "document": [1, 2, 3, 4], "doi": 0, "download": [0, 1, 4], "driver": 4, "dtd": 3, "dynamicalloc": 4, "e": [0, 1], "e0": 3, "each": 0, "easili": 1, "effort": 1, "either": 0, "element": 0, "elink": 0, "entrez": [0, 1], "entri": 0, "epublication_d": 0, "eutil": 0, "evolut": 0, "executor": 4, "explain": [0, 3], "extract": 0, "f": 0, "fals": 0, "fast": 1, "fcgi": 0, "few": 3, "field": 0, "fig_capt": 0, "fig_id": 0, "fig_label": 0, "figur": 0, "figure_id": 3, "file": [0, 1, 3], "findspark": 4, "first": 3, "fisher": 0, "folder": [2, 3, 4], "follow": [0, 2, 3], "form": 0, "format": [0, 1, 3], "from": [1, 2, 3, 4], "ftp": [0, 3], "fu": 3, "full": [0, 1, 3, 4], "full_titl": 0, "function": [0, 1, 3], "g001": 0, "galleri": 2, "gene": 0, "geometr": 0, "get": [1, 3], "getorcr": 4, "git": 2, "github": 2, "give": [0, 1], "given": [0, 3], "go": 3, "good": 0, "gov": [0, 3], "grant": 0, "grant_list": 0, "graphic_ref": 0, "guidelin": 1, "gz": [0, 3], "h": 0, "ha": [1, 4], "harvest": 0, "have": [0, 1, 3, 4], "here": 3, "high": 1, "how": [0, 1, 3, 4], "html": 2, "http": [0, 2, 3], "hu": 0, "hungari": 0, "i": [0, 1, 3], "id": [0, 3], "id_typ": 0, "imag": 3, "implement": [0, 4], "import": 4, "includ": 0, "include_path": 0, "incorpor": 4, "inform": [0, 1, 3], "init": 4, "input": 0, "instal": 1, "institut": 0, "int": 0, "integ": 0, "interest": 3, "ip": 3, "iter": 0, "its": 0, "javascript": 3, "jord\u00e1n": 0, "journal": 0, "jupyt": 4, "keep": 1, "kei": 0, "keyword": 0, "kung": 3, "label": 0, "languag": 1, "less": 4, "level": 1, "librari": 1, "linknam": 0, "list": 0, "load": 0, "local": 4, "long": 0, "lxml": [0, 1], "made": 0, "main": 0, "major": 0, "make": 2, "manuscript": 3, "map": 4, "maxexecutor": 4, "maxresults": 4, "mb": 4, "medic": 3, "medleas": 3, "medleasebaselin": 3, "medlin": 4, "medlinexmltojson": 3, "memori": 4, "memoryoverhead": 4, "mesh": 0, "mesh_term": 0, "metadata": 0, "might": [1, 3, 4], "million": 4, "mine": 1, "minimum": 4, "minut": 4, "mm": 0, "model": 0, "molecular": 0, "month": 0, "more": [1, 4], "multipl": 4, "n": 0, "n_citat": 0, "name": 0, "namespac": 0, "natur": 1, "ncbi": [0, 3], "need": 2, "new": 0, "nih": [0, 3], "nlm": [0, 3], "nlm_categori": 0, "nlmdata": 3, "node": 0, "none": 0, "normal": 1, "note": [0, 4], "notebook": 4, "now": 1, "number": [0, 1], "nxml": 0, "o": 4, "oa": 1, "oa_bulk": 3, "oa_file_list": 3, "oa_packag": 3, "obtain": 1, "one": 0, "onli": 0, "open": [0, 2, 3], "opt": 4, "origin": 0, "other": [0, 1], "our": 1, "out": 3, "output": 0, "page": [1, 2], "paper": [0, 1], "paragraph": 0, "parallel": 4, "pars": [1, 3, 4], "parse_article_info": 0, "parse_citation_web": 0, "parse_downto_mesh_subterm": 0, "parse_grant_id": 0, "parse_medline_xml": 0, "parse_outgoing_citation_web": 0, "parse_pubmed_capt": [0, 3], "parse_pubmed_paragraph": 0, "parse_pubmed_refer": 0, "parse_pubmed_t": 0, "parse_pubmed_xml": 0, "parse_xml_web": 0, "parser": 0, "part": 0, "path": [0, 3, 4], "path_to_fil": 0, "pdf": 1, "phenotyp": 0, "pip": 2, "pipelin": 1, "pleas": [3, 4], "pmc": 0, "pmc13900": 3, "pmc_cite": 0, "pmc_refs_pubm": 0, "pmcid": 0, "pmid": [0, 3], "pmid_cit": 0, "point": 0, "pone": 0, "prefix": 0, "print": 0, "process": [1, 4], "processor": 4, "program": 1, "provid": [0, 1, 3], "pub": [0, 3], "pubdat": 0, "public": 0, "publication_d": 0, "publication_year": 0, "publish": 1, "publisher_id": 0, "pubmed20n0014": 0, "pubmed_articl": 0, "pubmed_pars": [0, 2, 3], "pum": 0, "put": 4, "pyspark": 1, "pyspark_driver_python": 4, "pyspark_python": 4, "pytest": 2, "python": [0, 4], "p\u00e1l": 0, "queri": 1, "question": 0, "r": 0, "read": 0, "reduc": [1, 4], "refer": 0, "reference_id": 0, "reference_list": 0, "regular": [1, 2], "relat": 0, "remov": 0, "repositori": [1, 2, 4], "research": 1, "resolut": 0, "resourc": 1, "return_xml": 0, "rid": 0, "run": 2, "save": 0, "save_xml": 0, "schedul": 4, "scienc": 0, "scrape": 3, "script": 4, "section": 0, "see": [0, 1, 3, 4], "set": 1, "setappnam": 4, "setmast": 4, "setup": 4, "singl": 0, "sleep": 0, "small": 4, "snippet": 4, "so": 1, "some": 3, "soon": 4, "sourc": 0, "space": 0, "spark": 4, "spark_hom": 4, "sparkconf": 4, "sparkcontext": 4, "sparksess": 4, "specif": 3, "sphinx": 2, "sql": 4, "stackoverflow": 0, "str": 0, "string": 0, "strip": 0, "structur": [0, 1], "studi": 0, "subject": 0, "submit": 1, "subset": 0, "subterm": 0, "szathmari": 0, "szathm\u00e1ri": 0, "szenth\u00e1roms\u00e1g": 0, "t": 0, "tabl": 0, "table_dict": 0, "table_xml": 0, "tag": 3, "take": 1, "tar": 3, "term": 0, "test": [2, 4], "text": [0, 1], "than": [0, 4], "them": 3, "thi": [0, 1, 2, 3, 4], "those": 1, "through": 1, "time": [1, 4], "titipata": 2, "titl": 0, "tool": [0, 3], "tree": 0, "true": [0, 4], "two": 0, "type": [0, 3], "u": 0, "until": 1, "up": 1, "updat": [3, 4], "url": 0, "us": [0, 1, 2, 3, 4], "util": 1, "vari": 0, "verbos": 2, "version_d": 0, "version_id": 0, "via": 2, "wait": 0, "want": [0, 3], "we": [0, 1, 3, 4], "websit": 3, "weekli": 3, "when": [3, 4], "where": 0, "which": [0, 1, 2, 3, 4], "who": 1, "wiki": 1, "won": 0, "work": 1, "workflow": 4, "write": 1, "www": 3, "xml": [3, 4], "y": 0, "yarn": 4, "year": 0, "year_info_onli": 0, "yield": 0, "york": 0, "you": [0, 1, 2, 3, 4], "your": [2, 3], "yyyi": 0}, "titles": ["API Documentation", "Pubmed Parser: A Python Parser for PubMed Open-Access XML Subset and MEDLINE XML Dataset", "Installation", "Resources", "Setting up Pubmed Parser with PySpark"], "titleterms": {"A": 1, "about": 1, "access": 1, "altern": 3, "api": 0, "bug": 1, "content": 1, "contribut": 1, "copyright": 3, "dataset": [1, 3], "document": 0, "download": 3, "exampl": 0, "figur": 3, "from": 0, "implement": 3, "instal": 2, "link": 3, "medlin": [0, 1, 3], "notic": 3, "oa": [0, 3], "open": 1, "paramet": 0, "pars": 0, "parser": [1, 3, 4], "pmc": 3, "pubm": [0, 1, 3, 4], "pyspark": 4, "python": 1, "question": 1, "resourc": 3, "return": 0, "set": 4, "subset": 1, "up": 4, "websit": 0, "xml": [0, 1]}}) \ No newline at end of file diff --git a/spark.html b/spark.html index f6ffd85..83d29b9 100644 --- a/spark.html +++ b/spark.html @@ -4,7 +4,7 @@ - Setting up Pubmed Parser with PySpark — Pubmed Parser 0.5.2.dev2+g245dd09 documentation + Setting up Pubmed Parser with PySpark — Pubmed Parser 0.5.2.dev3+g3ae0bb6 documentation @@ -15,7 +15,7 @@ - + @@ -36,7 +36,7 @@ Pubmed Parser
- 0.5.2.dev2+g245dd09 + 0.5.2.dev3+g3ae0bb6