From 65542c2821ea4bca3968ecc9cda1a6215dd9c72d Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Mon, 17 Jan 2022 14:15:11 -0800 Subject: [PATCH 01/22] remove cite_style --- mkdocs_bibtex/plugin.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/mkdocs_bibtex/plugin.py b/mkdocs_bibtex/plugin.py index 8398cfb..81c181c 100644 --- a/mkdocs_bibtex/plugin.py +++ b/mkdocs_bibtex/plugin.py @@ -18,10 +18,6 @@ class BibTexPlugin(BasePlugin): Options: bib_file (string): path to a single bibtex file for entries bib_dir (string): path to a directory of bibtex files for entries - cite_style (string): either "plain" or "pandoc" to define the cite key style - defaults to "pandoc" - plain - @cite_key - pandoc - [@cite_key] bib_command (string): command to place a bibliography relevant to just that file defaults to \bibliography full_bib_command (string): command to place a full bibliography of all references @@ -31,10 +27,6 @@ class BibTexPlugin(BasePlugin): config_scheme = [ ("bib_file", config_options.File(exists=True, required=False)), ("bib_dir", config_options.Dir(exists=True, required=False)), - ( - "cite_style", - config_options.Choice(choices=["plain", "pandoc"], default="plain"), - ), ("bib_command", config_options.Type(str, default="\\bibliography")), ("full_bib_command", config_options.Type(str, default="\\full_bibliography")), ("csl_file", config_options.File(exists=True, required=False)), @@ -68,17 +60,10 @@ def on_config(self, config): self.bib_data = BibliographyData(entries=refs) - cite_style = self.config.get("cite_style", "pandoc") - # Decide on how citations are entered into the markdown text - if cite_style == "plain": - self.cite_regex = re.compile(r"\@(\w+)") - self.insert_regex = r"\@{}" - elif cite_style == "pandoc": - self.cite_regex = re.compile(r"\[\@((?:(?:\w+)[\-:]?)+)\]") - self.insert_regex = r"\[@{}\]" - else: - raise Exception("Invalid citation style: {}".format(cite_style)) - + + self.cite_regex = re.compile(r"\[\@((?:(?:\w+)[\-:]?)+)\]") + self.insert_regex = r"\[@{}\]" + self.csl_file = self.config.get("csl_file", None) self.unescape_for_arithmatex = self.config.get("unescape_for_arithmatex", False) From 2fd52ba85db415fd4d7ad56e2e789b7d437d1047 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Tue, 18 Jan 2022 11:24:17 -0800 Subject: [PATCH 02/22] update documentation on markdown conversion --- mkdocs_bibtex/plugin.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mkdocs_bibtex/plugin.py b/mkdocs_bibtex/plugin.py index 81c181c..2c16e78 100644 --- a/mkdocs_bibtex/plugin.py +++ b/mkdocs_bibtex/plugin.py @@ -75,9 +75,13 @@ def on_page_markdown(self, markdown, page, config, files): Parses the markdown for each page, extracting the bibtex references If a local reference list is requested, this will render that list where requested - 1. Finds all cite keys - 2. Convert all the corresponding bib entries into citations - 3. Insert the ordered citation numbers into the markdown text + 1. Finds all cite keys (may include multiple citation references) + 2. Convert all cite keys to citation quads: + (full cite key, + induvidual cite key, + citation key in corresponding style, + citation for induvidual cite key) + 3. Insert formatted cite keys into text 4. Insert the bibliography into the markdown 5. Insert the full bibliograph into the markdown """ From 212ac9c64782386cf9144895822e1c529e4080ab Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Tue, 18 Jan 2022 11:28:41 -0800 Subject: [PATCH 03/22] refactor markdown processing --- mkdocs_bibtex/plugin.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/mkdocs_bibtex/plugin.py b/mkdocs_bibtex/plugin.py index 2c16e78..3716f03 100644 --- a/mkdocs_bibtex/plugin.py +++ b/mkdocs_bibtex/plugin.py @@ -87,27 +87,16 @@ def on_page_markdown(self, markdown, page, config, files): """ # 1. Grab all the cited keys in the markdown - cite_keys = self.cite_regex.findall(markdown) - citations = [ - (cite_key, self.bib_data.entries[cite_key]) - for cite_key in cite_keys - if cite_key in self.bib_data.entries - ] + cite_keys = find_cite_keys(markdown) # 2. Convert all the citations to text references - references = self.format_citations(citations) + citation_quads = self.format_citations(cite_keys) # 3. Insert in numbers into the main markdown and build bibliography - bibliography = [] - for number, key in enumerate(references.keys()): - markdown = re.sub( - self.insert_regex.format(key), "[^{}]".format(number + 1), markdown - ) - bibliography_text = "[^{}]: {}".format(number + 1, references[key]) - bibliography.append(bibliography_text) + markdown = insert_citation_keys(citation_quads,markdown) # 4. Insert in the bibliopgrahy text into the markdown - bibliography = "\n\n".join(bibliography) + bibliography = format_bibliography(citation_quads) markdown = re.sub( re.escape(self.config.get("bib_command", "\\bibliography")), bibliography, From 25a94c9966030bfd8564ca3b263a2c1ad14a5671 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 13:22:43 -0800 Subject: [PATCH 04/22] scaffold new functions --- mkdocs_bibtex/plugin.py | 75 +++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/mkdocs_bibtex/plugin.py b/mkdocs_bibtex/plugin.py index 3716f03..d7b3e4f 100644 --- a/mkdocs_bibtex/plugin.py +++ b/mkdocs_bibtex/plugin.py @@ -60,10 +60,6 @@ def on_config(self, config): self.bib_data = BibliographyData(entries=refs) - - self.cite_regex = re.compile(r"\[\@((?:(?:\w+)[\-:]?)+)\]") - self.insert_regex = r"\[@{}\]" - self.csl_file = self.config.get("csl_file", None) self.unescape_for_arithmatex = self.config.get("unescape_for_arithmatex", False) @@ -112,32 +108,17 @@ def on_page_markdown(self, markdown, page, config, files): return markdown - def format_citations(self, citations): + def format_citations(self, cite_keys): """ - Formats references and adds them to the global registry + Formats references into citation quads and adds them to the global registry Args: - citations (dict): mapping of cite_key to entry + cite_keys (list): List of full cite_keys that maybe compound keys - Returns OrderedDict of references + Returns: + citation_quads: quad tupples of the citation inforamtion """ - style = PlainStyle() - backend = MarkdownBackend() - references = OrderedDict() - for key, entry in citations: - if self.csl_file is not None: - entry_text = to_markdown_pandoc(entry, self.csl_file) - else: - formatted_entry = style.format_entry("", entry) - entry_text = formatted_entry.text.render(backend) - entry_text = entry_text.replace("\n", " ") - if self.unescape_for_arithmatex: - entry_text = entry_text.replace("\(", "(").replace("\)", ")") - # Local reference list for this file - references[key] = entry_text - # Global reference list for all files - self.all_references[key] = entry_text - return references + pass @property def full_bibliography(self): @@ -207,3 +188,47 @@ def to_markdown_pandoc(entry, csl_path): citation_regex = re.compile(r"(?:1\.)?(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation + + + +def find_cite_keys(markdown): + """ + Finds the cite keys in the markdown text + This function can handle multiple keys in a single reference + + Args: + markdown (str): the markdown text to be extract citation + keys from + """ + + cite_regex = re.compile(r"\[((?:@\w+;{0,1}\s*)+)\]") + cite_keys = cite_regex.findall(markdown) + return list(cite_keys) + + +def insert_citation_keys(citation_quads,markdown): + """ + Insert citations into the markdown text replacing + the old citation keys + + Args: + citation_quads (tuple): a quad tuple of all citation info + markdown (str): the markdown text to modify + + Returns: + markdown (str): the modified Markdown + """ + pass + + +def format_bibliography(citation_quads): + """ + Generates a bibliography from the citation quads + + Args: + citation_quads (tuple): a quad tuple of all citation info + + Returns: + markdown (str): the Markdown string for the bibliography + """ + pass \ No newline at end of file From e6a080d1e9cd076826efbc0ecc9cafbd9ca070ca Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 15:01:02 -0800 Subject: [PATCH 05/22] convert bib files into one --- test_files/parenthesis.bib | 8 -------- test_files/single.bib | 8 -------- test_files/{long_cite.bib => test.bib} | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 16 deletions(-) delete mode 100644 test_files/parenthesis.bib delete mode 100644 test_files/single.bib rename test_files/{long_cite.bib => test.bib} (77%) diff --git a/test_files/parenthesis.bib b/test_files/parenthesis.bib deleted file mode 100644 index ac40d95..0000000 --- a/test_files/parenthesis.bib +++ /dev/null @@ -1,8 +0,0 @@ -@article{test, - title={{Test Title (TT)}}, - author={Author, First and Author, Second}, - journal={Testing Journal (TJ)}, - volume={1}, - year={2019}, - publisher={Test_Publisher (TP)} -} diff --git a/test_files/single.bib b/test_files/single.bib deleted file mode 100644 index 7fb4a89..0000000 --- a/test_files/single.bib +++ /dev/null @@ -1,8 +0,0 @@ -@article{test, - title={Test Title}, - author={Author, First and Author, Second}, - journal={Testing Journal}, - volume={1}, - year={2019}, - publisher={Test_Publisher} -} diff --git a/test_files/long_cite.bib b/test_files/test.bib similarity index 77% rename from test_files/long_cite.bib rename to test_files/test.bib index 2a7fa9e..d77845b 100644 --- a/test_files/long_cite.bib +++ b/test_files/test.bib @@ -1,3 +1,21 @@ +@article{test, + title={Test Title}, + author={Author, First and Author, Second}, + journal={Testing Journal}, + volume={1}, + year={2019}, + publisher={Test_Publisher} +} + +@article{test2, + title={{Test Title (TT)}}, + author={Author, First and Author, Second}, + journal={Testing Journal (TJ)}, + volume={1}, + year={2019}, + publisher={Test_Publisher (TP)} +} + @article{Bivort2016, title = {Evidence for Selective Attention in the Insect Brain}, author = {De Bivort, Benjamin L. and Van Swinderen, Bruno}, @@ -11,3 +29,4 @@ @article{Bivort2016 keywords = {attention,bees,drosophila,insects}, pmid = {27436727} } + From 1d292ed19ed450f475bbc7bbf1204d488bf9b7bd Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:53:58 -0800 Subject: [PATCH 06/22] refactor utility functions into separate module --- mkdocs_bibtex/utils.py | 126 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 mkdocs_bibtex/utils.py diff --git a/mkdocs_bibtex/utils.py b/mkdocs_bibtex/utils.py new file mode 100644 index 0000000..98ea44c --- /dev/null +++ b/mkdocs_bibtex/utils.py @@ -0,0 +1,126 @@ +import re +import tempfile +from itertools import groupby +from pathlib import Path + +import pypandoc +from pybtex.database import BibliographyData + + +def to_markdown_pandoc(entry, csl_path): + """ + Converts the PyBtex entry into formatted markdown citation text + """ + bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex") + if tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) >= ( + 2, + 11, + ): + markdown = pypandoc.convert_text( + source=bibtex_string, + to="markdown-citations", + format="bibtex", + extra_args=[ + "--citeproc", + "--csl", + csl_path, + ], + ) + + # This should cut off the pandoc preamble and ending triple colons + markdown = " ".join(markdown.split("\n")[2:-2]) + + citation_regex = re.compile( + r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}" + ) + try: + + citation = citation_regex.findall(markdown)[0] + except IndexError: + citation = markdown + else: + # Older citeproc-filter version of pandoc + with tempfile.TemporaryDirectory() as tmpdir: + bib_path = Path(tmpdir).joinpath("temp.bib") + with open(bib_path, "w") as bibfile: + bibfile.write(bibtex_string) + citation_text = """ +--- +nocite: '@*' +--- +""" + + markdown = pypandoc.convert_text( + source=citation_text, + to="markdown_strict", + format="md", + extra_args=["--csl", csl_path, "--bibliography", bib_path], + filters=["pandoc-citeproc"], + ) + + citation_regex = re.compile(r"(1\.)?(.*)") + citation = citation_regex.findall(markdown.replace("\n", " "))[0] + return citation + + +def find_cite_keys(markdown): + """ + Finds the cite keys in the markdown text + This function can handle multiple keys in a single reference + + Args: + markdown (str): the markdown text to be extract citation + keys from + """ + + cite_regex = re.compile(r"(\[(?:@\w+;{0,1}\s*)+\])") + cite_keys = cite_regex.findall(markdown) + return list(cite_keys) + + +def insert_citation_keys(citation_quads, markdown): + """ + Insert citations into the markdown text replacing + the old citation keys + + Args: + citation_quads (tuple): a quad tuple of all citation info + markdown (str): the markdown text to modify + + Returns: + markdown (str): the modified Markdown + """ + + # Renumber quads if using numbers for citation links + if all(quad[2].isnumeric() for quad in citation_quads): + citation_quads = [ + (quad[0], quad[1], str(n + 1), quad[2]) + for n, quad in enumerate(citation_quads) + ] + + grouped_quads = [list(g) for _, g in groupby(citation_quads, key=lambda x: x[0])] + for quad_group in grouped_quads: + full_citation = quad_group[0][0] # the first key in the whole citation + replacement_citaton = "".join(["[^{}]".format(quad[2]) for quad in quad_group]) + markdown = markdown.replace(full_citation, replacement_citaton) + + return markdown + + +def format_bibliography(citation_quads): + """ + Generates a bibliography from the citation quads + + Args: + citation_quads (tuple): a quad tuple of all citation info + + Returns: + markdown (str): the Markdown string for the bibliography + """ + new_bib = {quad[2]: quad[3] for quad in citation_quads} + bibliography = [] + for key, citation in new_bib.items(): + bibliography_text = "[^{}]: {}".format(key, citation) + bibliography.append(bibliography_text) + + return "\n".join(bibliography) From 4461c8de9a693583ad0856c168aba5f22ed03930 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 16:54:24 -0800 Subject: [PATCH 07/22] move tests out of module --- mkdocs_bibtex/test_plugin.py | 103 -------------------- test_files/test_plugin.py | 183 +++++++++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 103 deletions(-) delete mode 100644 mkdocs_bibtex/test_plugin.py create mode 100644 test_files/test_plugin.py diff --git a/mkdocs_bibtex/test_plugin.py b/mkdocs_bibtex/test_plugin.py deleted file mode 100644 index 04abe77..0000000 --- a/mkdocs_bibtex/test_plugin.py +++ /dev/null @@ -1,103 +0,0 @@ -import os -import unittest - -from pybtex.database import parse_file - -from mkdocs_bibtex import BibTexPlugin - -module_dir = os.path.dirname(os.path.abspath(__file__)) -test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) - - -class TestPlugin(unittest.TestCase): - def setUp(self): - self.plugin = BibTexPlugin() - self.plugin.load_config( - options={"bib_file": os.path.join(test_files_dir, "single.bib")}, - config_file_path=test_files_dir, - ) - - def test_unescape_for_arithmatex(self): - test_data = parse_file(os.path.join(test_files_dir, "parenthesis.bib")) - self.plugin.csl_file = None - - self.plugin.unescape_for_arithmatex = True - self.assertIn( - "First Author and Second Author\\. Test Title (TT)\\. *Testing Journal (TJ)*, 2019", - self.plugin.format_citations(test_data.entries.items())["test"], - ) - - self.plugin.unescape_for_arithmatex = False - self.assertIn( - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", - self.plugin.format_citations(test_data.entries.items())["test"], - ) - - def test_config_one_bibtex_file(self): - self.plugin.on_config(self.plugin.config) - self.assertEqual(len(self.plugin.bib_data.entries), 1) - - def test_config_one_bibtex_dir(self): - plugin = BibTexPlugin() - plugin.load_config( - options={"bib_dir": os.path.join(test_files_dir, "multi_bib")}, - config_file_path=test_files_dir, - ) - - plugin.on_config(plugin.config) - self.assertEqual(len(plugin.bib_data.entries), 2) - - def test_format_citations(self): - test_data = parse_file(os.path.join(test_files_dir, "single.bib")) - self.plugin.csl_file = None - self.assertIn( - "First Author and Second Author", - self.plugin.format_citations(test_data.entries.items())["test"], - ) - - self.plugin.csl_file = os.path.join(test_files_dir, "nature.csl") - self.assertIn( - "Author, F. & Author, S", - self.plugin.format_citations(test_data.entries.items())["test"], - ) - # TODO: Check CSL - - def test_long_citation(self): - test_data = parse_file(os.path.join(test_files_dir, "long_cite.bib")) - self.plugin.csl_file = None - self.assertIn( - "Benjamin L\\. De Bivort and Bruno Van Swinderen", - self.plugin.format_citations(test_data.entries.items())["Bivort2016"], - ) - - self.plugin.csl_file = os.path.join(test_files_dir, "nature.csl") - self.assertIn( - "De Bivort, B. L. & Van Swinderen", - self.plugin.format_citations(test_data.entries.items())["Bivort2016"], - ) - - def test_full_bibliography(self): - test_data = parse_file(os.path.join(test_files_dir, "single.bib")) - self.plugin.csl_file = None - self.plugin.format_citations(test_data.entries.items()) - - self.assertIn("First Author and Second Author", self.plugin.full_bibliography) - - self.plugin.csl_file = os.path.join(test_files_dir, "nature.csl") - self.plugin.format_citations(test_data.entries.items()) - self.assertIn("Author, F. & Author, S", self.plugin.full_bibliography) - - self.plugin.csl_file = os.path.join( - test_files_dir, "springer-basic-author-date.csl" - ) - self.plugin.format_citations(test_data.entries.items()) - self.assertIn("Author F, Author S", self.plugin.full_bibliography) - - def test_on_page_markdown(self): - self.plugin.on_config(self.plugin.config) - test_markdown = "This is a citation. [@test]\n\n \\bibliography" - - self.assertIn( - "[^1]: First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", - self.plugin.on_page_markdown(test_markdown, None, None, None), - ) diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py new file mode 100644 index 0000000..9225865 --- /dev/null +++ b/test_files/test_plugin.py @@ -0,0 +1,183 @@ +import os + +import pytest + +from mkdocs_bibtex.plugin import BibTexPlugin +from mkdocs_bibtex.utils import (find_cite_keys, format_bibliography, + insert_citation_keys) + +module_dir = os.path.dirname(os.path.abspath(__file__)) +test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) + + +@pytest.fixture +def plugin(): + plugin = BibTexPlugin() + plugin.load_config( + options={"bib_file": os.path.join(test_files_dir, "test.bib")}, + config_file_path=test_files_dir, + ) + plugin.on_config(plugin.config) + return plugin + + +def test_bibtex_loading_bibfile(plugin): + assert len(plugin.bib_data.entries) == 3 + + +def test_bibtex_loading_bibdir(): + plugin = BibTexPlugin() + plugin.load_config( + options={"bib_dir": os.path.join(test_files_dir, "multi_bib")}, + config_file_path=test_files_dir, + ) + + plugin.on_config(plugin.config) + assert len(plugin.bib_data.entries) == 2 + + +@pytest.mark.xfail() +def test_format_citations(plugin): + plugin.csl_file = None + + assert ( + "[@test]", + "@test", + "1.", + "First Author and Second Author", + ) == plugin.format_citations(["@test"])[0] + + # Test arithmatex compatability formatting + assert ( + "[@test2]", + "@test2", + "1.", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + ) == plugin.format_citations(["@test2"]) + + plugin.unescape_for_arithmatex = True + assert ( + "[@test2]", + "@test2", + "1.", + "First Author and Second Author\\. Test Title (TT)\\. *Testing Journal (TJ)*, 2019", + ) == plugin.format_citations(["@test2"]) + + plugin.unescape_for_arithmatex = False + + # Test compound citation + assert [ + ( + "[@test; @test2]", + "@test", + "1.", + "First Author and Second Author", + ), + ( + "[@test; @test2]", + "@test2", + "2.", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + ), + ] == plugin.format_citations(["@test; @test2"]) + + # test long citation + + plugin.csl_file = None + assert ( + "@Biovort2016", + "@Biovort2016", + "1", + "Benjamin L\\. De Bivort and Bruno Van Swinderen", + ) == plugin.format_citations(["Bivort2016"]) + + # Test formatting using a CSL style + plugin.csl_file = os.path.join(test_files_dir, "nature.csl") + assert ( + "@test", + "@test", + "1.", + "Author, F. & Author, S", + ) == plugin.format_citations(["@test"]) + + plugin.csl_file = os.path.join(test_files_dir, "nature.csl") + assert ( + "@Biovort2016", + "@Biovort2016", + "1", + "De Bivort, B. L. & Van Swinderen", + ) == plugin.format_citations(["Bivort2016"]) + + plugin.csl_file = os.path.join(test_files_dir, "springer-basic-author-date.csl") + assert ("@test", "@test", "1", "Author F, Author S") == plugin.format_citations( + ["@test"] + ) + + +def test_find_cite_keys(): + assert find_cite_keys("[@test]") == ["[@test]"] + assert find_cite_keys("[@test; @test2]") == ["[@test; @test2]"] + assert find_cite_keys("[@test]\n [@test; @test2]") == ["[@test]", "[@test; @test2]"] + + +def test_insert_citation_keys(): + assert ( + insert_citation_keys( + [ + ( + "[@test]", + "@test", + "1", + "First Author and Second Author", + ) + ], + "[@test]", + ) + == "[^1]" + ) + + assert ( + insert_citation_keys( + [ + ( + "[@test; @test2]", + "@test", + "1", + "First Author and Second Author", + ), + ( + "[@test; @test2]", + "@test2", + "2", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + ), + ], + "[@test; @test2]", + ) + == "[^1][^2]" + ) + + +def test_format_bibliography(): + quads = [ + ( + "[@test; @test2]", + "@test", + "1", + "First Author and Second Author", + ), + ( + "[@test; @test2]", + "@test2", + "2", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + ), + ] + + bib = format_bibliography(quads) + + assert "[^1]: First Author and Second Author" in bib + assert ( + "[^2]: First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019" + in bib + ) \ No newline at end of file From f5193b3038f58a205ce5363c2c6723f70688b12d Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:00:02 -0800 Subject: [PATCH 08/22] bring formatting to utils --- mkdocs_bibtex/utils.py | 131 ++++++++++++++++++++++++++------------ test_files/test_plugin.py | 53 +++++++++++++-- 2 files changed, 138 insertions(+), 46 deletions(-) diff --git a/mkdocs_bibtex/utils.py b/mkdocs_bibtex/utils.py index 98ea44c..fe1d310 100644 --- a/mkdocs_bibtex/utils.py +++ b/mkdocs_bibtex/utils.py @@ -1,65 +1,112 @@ import re import tempfile +from collections import OrderedDict from itertools import groupby from pathlib import Path import pypandoc +from pybtex.backends.markdown import Backend as MarkdownBackend from pybtex.database import BibliographyData +from pybtex.style.formatting.plain import Style as PlainStyle -def to_markdown_pandoc(entry, csl_path): +def format_simple(entries): + """ + Format the entries using a simple built in style + + Args: + entries (dict): dictionary of entries + + Returns: + references (dict): dictionary of citation texts + """ + style = PlainStyle() + backend = MarkdownBackend() + citations = OrderedDict() + for key, entry in entries.items(): + formatted_entry = style.format_entry("", entry) + entry_text = formatted_entry.text.render(backend) + entry_text = entry_text.replace("\n", " ") + # Local reference list for this file + citations[key] = entry_text + return citations + + +def format_pandoc(entries, csl_path): + """ + Format the entries using pandoc + + Args: + entries (dict): dictionary of entries + csl_path (str): path to formatting CSL Fle + Returns: + references (dict): dictionary of citation texts + """ + pandoc_version = tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) + citations = OrderedDict() + for key, entry in entries.items(): + bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex") + if pandoc_version >= (2, 11): + citations[key] = _convert_pandoc_new(bibtex_string, csl_path) + else: + citations[key] = _convert_pandoc_legacy(bibtex_string, csl_path) + + return citations + + +def _convert_pandoc_new(bibtex_string, csl_path): """ Converts the PyBtex entry into formatted markdown citation text + using pandoc version 2.11 or newer """ - bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex") - if tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) >= ( - 2, - 11, - ): - markdown = pypandoc.convert_text( - source=bibtex_string, - to="markdown-citations", - format="bibtex", - extra_args=[ - "--citeproc", - "--csl", - csl_path, - ], - ) + markdown = pypandoc.convert_text( + source=bibtex_string, + to="markdown-citations", + format="bibtex", + extra_args=[ + "--citeproc", + "--csl", + csl_path, + ], + ) + + # This should cut off the pandoc preamble and ending triple colons + markdown = " ".join(markdown.split("\n")[2:-2]) + + citation_regex = re.compile(r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}") + try: + + citation = citation_regex.findall(markdown)[0] + except IndexError: + citation = markdown + return citation - # This should cut off the pandoc preamble and ending triple colons - markdown = " ".join(markdown.split("\n")[2:-2]) - citation_regex = re.compile( - r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}" - ) - try: - - citation = citation_regex.findall(markdown)[0] - except IndexError: - citation = markdown - else: - # Older citeproc-filter version of pandoc - with tempfile.TemporaryDirectory() as tmpdir: - bib_path = Path(tmpdir).joinpath("temp.bib") - with open(bib_path, "w") as bibfile: - bibfile.write(bibtex_string) - citation_text = """ +def _convert_pandoc_legacy(bibtex_string, csl_file): + """ + Converts the PyBtex entry into formatted markdown citation text + using pandoc version older than 2.11 + """ + with tempfile.TemporaryDirectory() as tmpdir: + bib_path = Path(tmpdir).joinpath("temp.bib") + with open(bib_path, "w") as bibfile: + bibfile.write(bibtex_string) + citation_text = """ --- nocite: '@*' --- """ - markdown = pypandoc.convert_text( - source=citation_text, - to="markdown_strict", - format="md", - extra_args=["--csl", csl_path, "--bibliography", bib_path], - filters=["pandoc-citeproc"], - ) + markdown = pypandoc.convert_text( + source=citation_text, + to="markdown_strict", + format="md", + extra_args=["--csl", csl_path, "--bibliography", bib_path], + filters=["pandoc-citeproc"], + ) - citation_regex = re.compile(r"(1\.)?(.*)") - citation = citation_regex.findall(markdown.replace("\n", " "))[0] + citation_regex = re.compile(r"(1\.)?(.*)") + citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 9225865..41899ed 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -2,9 +2,14 @@ import pytest -from mkdocs_bibtex.plugin import BibTexPlugin -from mkdocs_bibtex.utils import (find_cite_keys, format_bibliography, - insert_citation_keys) +from mkdocs_bibtex.plugin import BibTexPlugin, parse_file +from mkdocs_bibtex.utils import ( + find_cite_keys, + format_bibliography, + insert_citation_keys, + format_simple, + format_pandoc, +) module_dir = os.path.dirname(os.path.abspath(__file__)) test_files_dir = os.path.abspath(os.path.join(module_dir, "..", "test_files")) @@ -21,6 +26,12 @@ def plugin(): return plugin +@pytest.fixture +def entries(): + bibdata = parse_file(os.path.join(test_files_dir, "test.bib")) + return bibdata.entries + + def test_bibtex_loading_bibfile(plugin): assert len(plugin.bib_data.entries) == 3 @@ -180,4 +191,38 @@ def test_format_bibliography(): assert ( "[^2]: First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019" in bib - ) \ No newline at end of file + ) + + +def test_format_simple(entries): + citations = format_simple(entries) + + assert all(k in citations for k in entries) + assert all(entry != citations[k] for k, entry in entries.items()) + + print(citations) + assert ( + citations["test"] + == "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\." + ) + assert ( + citations["test2"] + == "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\." + ) + + +def test_format_pandoc(entries): + citations = format_pandoc(entries, os.path.join(test_files_dir, "nature.csl")) + + assert all(k in citations for k in entries) + assert all(entry != citations[k] for k, entry in entries.items()) + + print(citations) + assert ( + citations["test"] + == "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019)." + ) + assert ( + citations["test2"] + == "Author, F. & Author, S. Test Title (TT). *Testing Journal (TJ)* **1**, (2019)." + ) From 01a05cbc17b08ac3c4ae3925d315d979521cb293 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:27:35 -0800 Subject: [PATCH 09/22] update all tests --- test_files/test_plugin.py | 91 +++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 41899ed..eb7634f 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -47,82 +47,71 @@ def test_bibtex_loading_bibdir(): assert len(plugin.bib_data.entries) == 2 -@pytest.mark.xfail() def test_format_citations(plugin): plugin.csl_file = None assert ( "[@test]", - "@test", - "1.", - "First Author and Second Author", - ) == plugin.format_citations(["@test"])[0] - - # Test arithmatex compatability formatting - assert ( - "[@test2]", - "@test2", - "1.", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", - ) == plugin.format_citations(["@test2"]) + "test", + "1", + "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + ) == plugin.format_citations(["[@test]"])[0] - plugin.unescape_for_arithmatex = True assert ( "[@test2]", - "@test2", - "1.", - "First Author and Second Author\\. Test Title (TT)\\. *Testing Journal (TJ)*, 2019", - ) == plugin.format_citations(["@test2"]) - - plugin.unescape_for_arithmatex = False + "test2", + "1", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\.", + ) == plugin.format_citations(["[@test2]"])[0] # Test compound citation assert [ ( "[@test; @test2]", - "@test", - "1.", - "First Author and Second Author", + "test", + "1", + "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", ), ( "[@test; @test2]", - "@test2", - "2.", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + "test2", + "1", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\.", ), - ] == plugin.format_citations(["@test; @test2"]) + ] == plugin.format_citations(["[@test; @test2]"]) # test long citation - - plugin.csl_file = None assert ( - "@Biovort2016", - "@Biovort2016", + "[@Bivort2016]", + "Bivort2016", "1", - "Benjamin L\\. De Bivort and Bruno Van Swinderen", - ) == plugin.format_citations(["Bivort2016"]) + "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", + ) == plugin.format_citations(["[@Bivort2016]"])[0] # Test formatting using a CSL style plugin.csl_file = os.path.join(test_files_dir, "nature.csl") assert ( - "@test", - "@test", - "1.", - "Author, F. & Author, S", - ) == plugin.format_citations(["@test"]) + "[@test]", + "test", + "1", + "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + ) == plugin.format_citations(["[@test]"])[0] - plugin.csl_file = os.path.join(test_files_dir, "nature.csl") assert ( - "@Biovort2016", - "@Biovort2016", + "[@Bivort2016]", + "Bivort2016", "1", - "De Bivort, B. L. & Van Swinderen", - ) == plugin.format_citations(["Bivort2016"]) + "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", + ) == plugin.format_citations(["[@Bivort2016]"])[0] + # Test a CSL that outputs references in a different style plugin.csl_file = os.path.join(test_files_dir, "springer-basic-author-date.csl") - assert ("@test", "@test", "1", "Author F, Author S") == plugin.format_citations( - ["@test"] - ) + assert ( + "[@test]", + "test", + "1", + "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + ) == plugin.format_citations(["[@test]"])[0] def test_find_cite_keys(): @@ -226,3 +215,13 @@ def test_format_pandoc(entries): citations["test2"] == "Author, F. & Author, S. Test Title (TT). *Testing Journal (TJ)* **1**, (2019)." ) + + +def test_on_page_markdown(plugin): + plugin.on_config(plugin.config) + test_markdown = "This is a citation. [@test]\n\n \\bibliography" + + assert ( + "[^1]: First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\." + in plugin.on_page_markdown(test_markdown, None, None, None) + ) From 3bd57bc90ac38823de76e306523716e69053d552 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:27:46 -0800 Subject: [PATCH 10/22] finish integration --- mkdocs_bibtex/plugin.py | 159 +++++++++++----------------------------- 1 file changed, 43 insertions(+), 116 deletions(-) diff --git a/mkdocs_bibtex/plugin.py b/mkdocs_bibtex/plugin.py index d7b3e4f..88e5396 100644 --- a/mkdocs_bibtex/plugin.py +++ b/mkdocs_bibtex/plugin.py @@ -1,14 +1,18 @@ import re -from pathlib import Path -import tempfile from collections import OrderedDict +from pathlib import Path -import pypandoc from mkdocs.config import config_options from mkdocs.plugins import BasePlugin -from pybtex.backends.markdown import Backend as MarkdownBackend from pybtex.database import BibliographyData, parse_file -from pybtex.style.formatting.plain import Style as PlainStyle + +from mkdocs_bibtex.utils import ( + find_cite_keys, + format_bibliography, + format_pandoc, + format_simple, + insert_citation_keys, +) class BibTexPlugin(BasePlugin): @@ -30,7 +34,6 @@ class BibTexPlugin(BasePlugin): ("bib_command", config_options.Type(str, default="\\bibliography")), ("full_bib_command", config_options.Type(str, default="\\full_bibliography")), ("csl_file", config_options.File(exists=True, required=False)), - ("unescape_for_arithmatex", config_options.Type(bool, required=False)), ] def __init__(self): @@ -62,8 +65,6 @@ def on_config(self, config): self.csl_file = self.config.get("csl_file", None) - self.unescape_for_arithmatex = self.config.get("unescape_for_arithmatex", False) - return config def on_page_markdown(self, markdown, page, config, files): @@ -72,7 +73,7 @@ def on_page_markdown(self, markdown, page, config, files): If a local reference list is requested, this will render that list where requested 1. Finds all cite keys (may include multiple citation references) - 2. Convert all cite keys to citation quads: + 2. Convert all cite keys to citation quads: (full cite key, induvidual cite key, citation key in corresponding style, @@ -89,7 +90,7 @@ def on_page_markdown(self, markdown, page, config, files): citation_quads = self.format_citations(cite_keys) # 3. Insert in numbers into the main markdown and build bibliography - markdown = insert_citation_keys(citation_quads,markdown) + markdown = insert_citation_keys(citation_quads, markdown) # 4. Insert in the bibliopgrahy text into the markdown bibliography = format_bibliography(citation_quads) @@ -118,117 +119,43 @@ def format_citations(self, cite_keys): Returns: citation_quads: quad tupples of the citation inforamtion """ - pass - @property - def full_bibliography(self): - """ - Returns the full bibliography text - """ - full_bibliography = [] + # Deal with arithmatex fix at some point + - for number, key in enumerate(self.all_references.keys()): - bibliography_text = "{}: {}".format(number + 1, self.all_references[key]) - full_bibliography.append(bibliography_text) + # 1. First collect any unformated references + entries = {} + for key_set in cite_keys: + for key in key_set.strip().strip("]").strip("[").split(";"): + key = key.strip().strip("@") + if key not in self.all_references: + entries[key] = self.bib_data.entries[key] - return "\n".join(full_bibliography) - - -def to_markdown_pandoc(entry, csl_path): - """ - Converts the PyBtex entry into formatted markdown citation text - """ - bibtex_string = BibliographyData(entries={entry.key: entry}).to_string("bibtex") - if tuple(int(ver) for ver in pypandoc.get_pandoc_version().split(".")) >= ( - 2, - 11, - ): - markdown = pypandoc.convert_text( - source=bibtex_string, - to="markdown-citations", - format="bibtex", - extra_args=[ - "--citeproc", - "--csl", - csl_path, - ], - ) - - # This should cut off the pandoc preamble and ending triple colons - markdown = " ".join(markdown.split("\n")[2:-2]) - - citation_regex = re.compile( - r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}" - ) - try: - - citation = citation_regex.findall(markdown)[0] - except IndexError: - citation = markdown - else: - # Older citeproc-filter version of pandoc - with tempfile.TemporaryDirectory() as tmpdir: - bib_path = Path(tmpdir).joinpath("temp.bib") - with open(bib_path, "w") as bibfile: - bibfile.write(bibtex_string) - citation_text = """ ---- -nocite: '@*' ---- -""" - - markdown = pypandoc.convert_text( - source=citation_text, - to="markdown_strict", - format="md", - extra_args=["--csl", csl_path, "--bibliography", bib_path], - filters=["pandoc-citeproc"], - ) - - citation_regex = re.compile(r"(?:1\.)?(.*)") - citation = citation_regex.findall(markdown.replace("\n", " "))[0] - return citation - - - -def find_cite_keys(markdown): - """ - Finds the cite keys in the markdown text - This function can handle multiple keys in a single reference - - Args: - markdown (str): the markdown text to be extract citation - keys from - """ - - cite_regex = re.compile(r"\[((?:@\w+;{0,1}\s*)+)\]") - cite_keys = cite_regex.findall(markdown) - return list(cite_keys) - - -def insert_citation_keys(citation_quads,markdown): - """ - Insert citations into the markdown text replacing - the old citation keys + # 2. Format entries + if self.csl_file: + self.all_references.update(format_pandoc(entries, self.csl_file)) + else: + self.all_references.update(format_simple(entries)) - Args: - citation_quads (tuple): a quad tuple of all citation info - markdown (str): the markdown text to modify + # 3. Construct quads + quads = [] + for key_set in cite_keys: + for key in key_set.strip().strip("]").strip("[").split(";"): + key = key.strip().strip("@") + ref = self.all_references[key] + quads.append((key_set, key, "1", self.all_references[key])) - Returns: - markdown (str): the modified Markdown - """ - pass + return quads + @property + def full_bibliography(self): + """ + Returns the full bibliography text + """ -def format_bibliography(citation_quads): - """ - Generates a bibliography from the citation quads + bibliography = [] + for number, (key, citation) in enumerate(self.all_references.items()): + bibliography_text = "[^{}]: {}".format(number, citation) + bibliography.append(bibliography_text) - Args: - citation_quads (tuple): a quad tuple of all citation info - - Returns: - markdown (str): the Markdown string for the bibliography - """ - pass \ No newline at end of file + return "\n".join(bibliography) From 8116f4decd9a7ff3c26dd54115d030367747832d Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:30:24 -0800 Subject: [PATCH 11/22] move package to src --- setup.py | 3 ++- {mkdocs_bibtex => src/mkdocs_bibtex}/__init__.py | 0 {mkdocs_bibtex => src/mkdocs_bibtex}/plugin.py | 0 {mkdocs_bibtex => src/mkdocs_bibtex}/utils.py | 0 4 files changed, 2 insertions(+), 1 deletion(-) rename {mkdocs_bibtex => src/mkdocs_bibtex}/__init__.py (100%) rename {mkdocs_bibtex => src/mkdocs_bibtex}/plugin.py (100%) rename {mkdocs_bibtex => src/mkdocs_bibtex}/utils.py (100%) diff --git a/setup.py b/setup.py index 08f1c54..603599e 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ python_requires=">=3.6", install_requires=["mkdocs>=1", "markdown>=3.1.1", "pybtex>=0.22", "pypandoc>=1.5"], tests_require=["pytest"], - packages=find_packages(), + packages=find_packages("src"), + package_dir={"": "src"}, entry_points={"mkdocs.plugins": ["bibtex = mkdocs_bibtex.plugin:BibTexPlugin"]}, ) diff --git a/mkdocs_bibtex/__init__.py b/src/mkdocs_bibtex/__init__.py similarity index 100% rename from mkdocs_bibtex/__init__.py rename to src/mkdocs_bibtex/__init__.py diff --git a/mkdocs_bibtex/plugin.py b/src/mkdocs_bibtex/plugin.py similarity index 100% rename from mkdocs_bibtex/plugin.py rename to src/mkdocs_bibtex/plugin.py diff --git a/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py similarity index 100% rename from mkdocs_bibtex/utils.py rename to src/mkdocs_bibtex/utils.py From e6b99e6768d76124841d4a81e83e21d63094d79b Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:33:44 -0800 Subject: [PATCH 12/22] pre-commit formatting --- .github/workflows/post-process.yml | 2 +- src/mkdocs_bibtex/plugin.py | 2 -- src/mkdocs_bibtex/utils.py | 2 +- test_files/springer-basic-author-date.csl | 2 +- test_files/test.bib | 1 - test_files/test_plugin.py | 6 +++--- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/post-process.yml b/.github/workflows/post-process.yml index fa77f62..d7241f9 100644 --- a/.github/workflows/post-process.yml +++ b/.github/workflows/post-process.yml @@ -20,7 +20,7 @@ jobs: uses: ridedott/merge-me-action@v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + auto-gen-release: runs-on: ubuntu-latest env: diff --git a/src/mkdocs_bibtex/plugin.py b/src/mkdocs_bibtex/plugin.py index 88e5396..c62851e 100644 --- a/src/mkdocs_bibtex/plugin.py +++ b/src/mkdocs_bibtex/plugin.py @@ -121,7 +121,6 @@ def format_citations(self, cite_keys): """ # Deal with arithmatex fix at some point - # 1. First collect any unformated references entries = {} @@ -142,7 +141,6 @@ def format_citations(self, cite_keys): for key_set in cite_keys: for key in key_set.strip().strip("]").strip("[").split(";"): key = key.strip().strip("@") - ref = self.all_references[key] quads.append((key_set, key, "1", self.all_references[key])) return quads diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index fe1d310..72b54c9 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -82,7 +82,7 @@ def _convert_pandoc_new(bibtex_string, csl_path): return citation -def _convert_pandoc_legacy(bibtex_string, csl_file): +def _convert_pandoc_legacy(bibtex_string, csl_path): """ Converts the PyBtex entry into formatted markdown citation text using pandoc version older than 2.11 diff --git a/test_files/springer-basic-author-date.csl b/test_files/springer-basic-author-date.csl index db74824..5edf631 100644 --- a/test_files/springer-basic-author-date.csl +++ b/test_files/springer-basic-author-date.csl @@ -236,4 +236,4 @@ - \ No newline at end of file + diff --git a/test_files/test.bib b/test_files/test.bib index d77845b..4fedd56 100644 --- a/test_files/test.bib +++ b/test_files/test.bib @@ -29,4 +29,3 @@ @article{Bivort2016 keywords = {attention,bees,drosophila,insects}, pmid = {27436727} } - diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index eb7634f..5ab3fb0 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -85,7 +85,7 @@ def test_format_citations(plugin): "[@Bivort2016]", "Bivort2016", "1", - "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", + "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", # noqa: E501 ) == plugin.format_citations(["[@Bivort2016]"])[0] # Test formatting using a CSL style @@ -101,7 +101,7 @@ def test_format_citations(plugin): "[@Bivort2016]", "Bivort2016", "1", - "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", + "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", # noqa: E501 ) == plugin.format_citations(["[@Bivort2016]"])[0] # Test a CSL that outputs references in a different style @@ -149,7 +149,7 @@ def test_insert_citation_keys(): "[@test; @test2]", "@test2", "2", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", # noqa: E501 ), ], "[@test; @test2]", From d19babf0ff67b26c5217b78369fa4c9598ca6fa5 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:48:01 -0800 Subject: [PATCH 13/22] remove stale print --- test_files/test_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 5ab3fb0..14b565c 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -189,7 +189,6 @@ def test_format_simple(entries): assert all(k in citations for k in entries) assert all(entry != citations[k] for k, entry in entries.items()) - print(citations) assert ( citations["test"] == "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\." From 3af3908d21b8911d554b7ed3c195e878fee0054c Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:48:14 -0800 Subject: [PATCH 14/22] parse multiline markdown --- src/mkdocs_bibtex/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index 72b54c9..a23bca8 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -76,7 +76,7 @@ def _convert_pandoc_new(bibtex_string, csl_path): citation_regex = re.compile(r"\{\.csl-left-margin\}\[(.*)\]\{\.csl-right-inline\}") try: - citation = citation_regex.findall(markdown)[0] + citation = citation_regex.findall(markdown.replace("\n", " "))[0] except IndexError: citation = markdown return citation From 7affef938184525bab519e13dba037a0b2bade07 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:48:32 -0800 Subject: [PATCH 15/22] fix linting for new src heirarchy --- .github/workflows/testing.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index d84219d..59d40b4 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -30,20 +30,21 @@ jobs: - name: Lint with pycodestyle run: | pip install pycodestyle - pycodestyle mkdocs_bibtex + pycodestyle src - name: Lint with mypy run: | pip install mypy - mypy mkdocs_bibtex + mypy src - name: Lint with flake8 run: | pip install flake8 # stop the build if there are Python syntax errors or undefined names - flake8 --count --show-source --statistics mkdocs_bibtex + flake8 --count --show-source --statistics src # exit-zero treats all errors as warnings. - flake8 --count --exit-zero --max-complexity=20 --statistics mkdocs_bibtex + flake8 --count --exit-zero --max-complexity=20 --statistics src + test: strategy: max-parallel: 6 From 7086f6f336c0b0be2015ee92739493de0f53b807 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 18:59:19 -0800 Subject: [PATCH 16/22] make first capture group optional --- src/mkdocs_bibtex/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index a23bca8..b96ee6e 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -105,7 +105,7 @@ def _convert_pandoc_legacy(bibtex_string, csl_path): filters=["pandoc-citeproc"], ) - citation_regex = re.compile(r"(1\.)?(.*)") + citation_regex = re.compile(r"(?:1\.)?(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation From 2264b423417b0179aa4ec6fcb195bda9fd94faa4 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 20:43:55 -0800 Subject: [PATCH 17/22] better regex for legacy parsing --- src/mkdocs_bibtex/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index b96ee6e..a8bdbe8 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -105,7 +105,7 @@ def _convert_pandoc_legacy(bibtex_string, csl_path): filters=["pandoc-citeproc"], ) - citation_regex = re.compile(r"(?:1\.)?(.*)") + citation_regex = re.compile(r"([\d\.\\]+)\s+(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation From 7b9d0f87c593841b49655f295c4d50a193c61ddb Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 20:49:58 -0800 Subject: [PATCH 18/22] ensure only one capture group --- src/mkdocs_bibtex/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index a8bdbe8..35c123d 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -105,7 +105,7 @@ def _convert_pandoc_legacy(bibtex_string, csl_path): filters=["pandoc-citeproc"], ) - citation_regex = re.compile(r"([\d\.\\]+)\s+(.*)") + citation_regex = re.compile(r"(?:[\d\.\\]+)\s+(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation From 2cec1f05f2142ccca8edda4f3fb5c1c2c8a5cbe7 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 20:50:11 -0800 Subject: [PATCH 19/22] sometimes i hate regex --- src/mkdocs_bibtex/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index 35c123d..26f4e6f 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -105,7 +105,7 @@ def _convert_pandoc_legacy(bibtex_string, csl_path): filters=["pandoc-citeproc"], ) - citation_regex = re.compile(r"(?:[\d\.\\]+)\s+(.*)") + citation_regex = re.compile(r"[\d\.\\\s]*(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] return citation From 3fb00db3ddca4e1d7cc7ed6ce7980247eec10879 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 20:53:12 -0800 Subject: [PATCH 20/22] remove padded whitespaces --- src/mkdocs_bibtex/utils.py | 4 ++-- test_files/test_plugin.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index 26f4e6f..985e2f5 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -79,7 +79,7 @@ def _convert_pandoc_new(bibtex_string, csl_path): citation = citation_regex.findall(markdown.replace("\n", " "))[0] except IndexError: citation = markdown - return citation + return citation.strip() def _convert_pandoc_legacy(bibtex_string, csl_path): @@ -107,7 +107,7 @@ def _convert_pandoc_legacy(bibtex_string, csl_path): citation_regex = re.compile(r"[\d\.\\\s]*(.*)") citation = citation_regex.findall(markdown.replace("\n", " "))[0] - return citation + return citation.strip() def find_cite_keys(markdown): diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 14b565c..78bc76e 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -205,7 +205,6 @@ def test_format_pandoc(entries): assert all(k in citations for k in entries) assert all(entry != citations[k] for k, entry in entries.items()) - print(citations) assert ( citations["test"] == "Author, F. & Author, S. Test title. *Testing Journal* **1**, (2019)." From 1f198ea1f4c6cd4414b2a8ce8375fc1fe91ed887 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 21:07:26 -0800 Subject: [PATCH 21/22] fix backslashing in pybtex parsiong --- src/mkdocs_bibtex/utils.py | 4 +++- test_files/test_plugin.py | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/mkdocs_bibtex/utils.py b/src/mkdocs_bibtex/utils.py index 985e2f5..044e005 100644 --- a/src/mkdocs_bibtex/utils.py +++ b/src/mkdocs_bibtex/utils.py @@ -28,7 +28,9 @@ def format_simple(entries): entry_text = formatted_entry.text.render(backend) entry_text = entry_text.replace("\n", " ") # Local reference list for this file - citations[key] = entry_text + citations[key] = ( + entry_text.replace("\\(", "(").replace("\\)", ")").replace("\\.", ".") + ) return citations diff --git a/test_files/test_plugin.py b/test_files/test_plugin.py index 78bc76e..2d554ed 100644 --- a/test_files/test_plugin.py +++ b/test_files/test_plugin.py @@ -54,14 +54,14 @@ def test_format_citations(plugin): "[@test]", "test", "1", - "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + "First Author and Second Author. Test title. *Testing Journal*, 2019.", ) == plugin.format_citations(["[@test]"])[0] assert ( "[@test2]", "test2", "1", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\.", + "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019.", ) == plugin.format_citations(["[@test2]"])[0] # Test compound citation @@ -70,13 +70,13 @@ def test_format_citations(plugin): "[@test; @test2]", "test", "1", - "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + "First Author and Second Author. Test title. *Testing Journal*, 2019.", ), ( "[@test; @test2]", "test2", "1", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\.", + "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019.", ), ] == plugin.format_citations(["[@test; @test2]"]) @@ -85,7 +85,7 @@ def test_format_citations(plugin): "[@Bivort2016]", "Bivort2016", "1", - "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", # noqa: E501 + "Benjamin L. De Bivort and Bruno Van Swinderen. Evidence for selective attention in the insect brain. *Current Opinion in Insect Science*, 15:1–7, 2016. [doi:10.1016/j.cois.2016.02.007](https://doi.org/10.1016/j.cois.2016.02.007).", # noqa: E501 ) == plugin.format_citations(["[@Bivort2016]"])[0] # Test formatting using a CSL style @@ -94,14 +94,14 @@ def test_format_citations(plugin): "[@test]", "test", "1", - "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + "First Author and Second Author. Test title. *Testing Journal*, 2019.", ) == plugin.format_citations(["[@test]"])[0] assert ( "[@Bivort2016]", "Bivort2016", "1", - "Benjamin L\\. De Bivort and Bruno Van Swinderen\\. Evidence for selective attention in the insect brain\\. *Current Opinion in Insect Science*, 15:1–7, 2016\\. [doi:10\\.1016/j\\.cois\\.2016\\.02\\.007](https://doi.org/10.1016/j.cois.2016.02.007)\\.", # noqa: E501 + "Benjamin L. De Bivort and Bruno Van Swinderen. Evidence for selective attention in the insect brain. *Current Opinion in Insect Science*, 15:1–7, 2016. [doi:10.1016/j.cois.2016.02.007](https://doi.org/10.1016/j.cois.2016.02.007).", # noqa: E501 ) == plugin.format_citations(["[@Bivort2016]"])[0] # Test a CSL that outputs references in a different style @@ -110,7 +110,7 @@ def test_format_citations(plugin): "[@test]", "test", "1", - "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\.", + "First Author and Second Author. Test title. *Testing Journal*, 2019.", ) == plugin.format_citations(["[@test]"])[0] @@ -149,7 +149,7 @@ def test_insert_citation_keys(): "[@test; @test2]", "@test2", "2", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", # noqa: E501 + "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019", # noqa: E501 ), ], "[@test; @test2]", @@ -170,7 +170,7 @@ def test_format_bibliography(): "[@test; @test2]", "@test2", "2", - "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019", + "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019", ), ] @@ -178,7 +178,7 @@ def test_format_bibliography(): assert "[^1]: First Author and Second Author" in bib assert ( - "[^2]: First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019" + "[^2]: First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019" in bib ) @@ -191,11 +191,11 @@ def test_format_simple(entries): assert ( citations["test"] - == "First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\." + == "First Author and Second Author. Test title. *Testing Journal*, 2019." ) assert ( citations["test2"] - == "First Author and Second Author\\. Test Title \\(TT\\)\\. *Testing Journal \\(TJ\\)*, 2019\\." + == "First Author and Second Author. Test Title (TT). *Testing Journal (TJ)*, 2019." ) @@ -220,6 +220,6 @@ def test_on_page_markdown(plugin): test_markdown = "This is a citation. [@test]\n\n \\bibliography" assert ( - "[^1]: First Author and Second Author\\. Test title\\. *Testing Journal*, 2019\\." + "[^1]: First Author and Second Author. Test title. *Testing Journal*, 2019." in plugin.on_page_markdown(test_markdown, None, None, None) ) From 9943d36097a413ab1ece468fe6854a6d17448165 Mon Sep 17 00:00:00 2001 From: shyamd <16827130+shyamd@users.noreply.github.com> Date: Wed, 19 Jan 2022 22:21:26 -0800 Subject: [PATCH 22/22] Update README --- README.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5a4c86e..3afed16 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ plugins: - search - bibtex: bib_file: "refs.bib" - cite_style: "pandoc" markdown_extensions: - footnotes ``` @@ -36,17 +35,15 @@ The footnotes extension is how citations are linked for now. - `bib_file` - Name of your bibtex file. Either the absolute path or the path relative to `mkdocs.yml` - `bib_dir` - Directory for bibtex files to load, same as above for path resolution -- `cite_style` - The way you place citations into text: "pandoc" for `[@myRef]` and "plain" for `@myRef` - `bib_command` - The command for your bibliography, defaults to `\bibliography` -- `full_bib_command` - The command for your bibliography, defaults to `\full_bibliography` +- `full_bib_command` - The command for your full bibliography, defaults to `\full_bibliography` - `csl_file` - Bibtex CSL file to format the citation with, defaults to None, using a built in plain format instead -- `unescape_for_arithmatex` - Optional; set to `True` to avoid the `\(` `\)` [issue](https://github.com/shyamd/mkdocs-bibtex/issues/3) with [pymdownx.arithmatex](https://facelessuser.github.io/pymdown-extensions/extensions/arithmatex/) ## Usage In your markdown files: -1. Add your citations as you would normally using either "plain" or "pandoc" style +1. Add your citations as you would if you used pandoc, IE: `[@first_cite;@second_cite]` 2. Add in `\bibliography` or whatever you set your `bib_command` to where you want your references. -3. Add in `\full_bibliography` or whatever you set your `full_bib_command` to where you want the full set of references. *Note*: This is not guaranteed to work yet since one issue is the order in which markdown files are processed. Might need to do something using the `on_files()` event first. +3. Add in `\full_bibliography` or whatever you set your `full_bib_command` to where you want the full set of references. *Note*: This is not work just right since this plugin can't dictate the orer in which files are processed. The best way to ensure the file with the full bibliography gets processed last is to use numbers in front of file/folder names to enforce an order of processing, IE: `01_my_first_file.md` 4. (Optional) Setup `csl_file` to control the citation text formatting.