diff --git a/news/changelog-1.4.md b/news/changelog-1.4.md index 749573e78b..4da357c4e7 100644 --- a/news/changelog-1.4.md +++ b/news/changelog-1.4.md @@ -223,6 +223,10 @@ - ([#6620](https://github.com/quarto-dev/quarto-cli/issues/6620)): Introduce `FloatRefTarget` AST nodes that generalize crossref targets to include figures, tables, and custom floating elements. - ([#7200](https://github.com/quarto-dev/quarto-cli/issues/7200)): Support Unicode in subref labels. +## Input format + +- ([#7905](https://github.com/quarto-dev/quarto-cli/issues/7905)): Use `html+raw_html` as input format when processing HTML rawblocks for tables to avoid Pandoc converting SVG elements to images. + ## Other Fixes and Improvements - Exit if project pre or post render script fails diff --git a/src/resources/filters/normalize/parsehtml.lua b/src/resources/filters/normalize/parsehtml.lua index cb2a4b2d0a..127f34f169 100644 --- a/src/resources/filters/normalize/parsehtml.lua +++ b/src/resources/filters/normalize/parsehtml.lua @@ -53,6 +53,13 @@ function parse_html_tables() return nil end + -- we're already at a state of sin here, cf https://stackoverflow.com/a/1732454 + -- but this is important enough to do a little more work anyway + -- + -- specifically, we should do our best not to break good HTML when it's there + + + local tableBegin,tableBody,tableEnd = el.text:match(pat) if tableBegin then local before_table = string.sub(el.text, 1, i - 1) @@ -75,7 +82,9 @@ function parse_html_tables() -- annotated td elements with th elements. tableHtml = preprocess_table_text(tableHtml) - local tableDoc = pandoc.read(tableHtml, "html") + -- process html with raw_html so that contents that are not parseable + -- by Pandoc end up as rawblock elements + local tableDoc = pandoc.read(tableHtml, "html+raw_html") local skip = false local found = false _quarto.ast.walk(tableDoc, { diff --git a/tests/docs/smoke-all/2023/11/10/7905.qmd b/tests/docs/smoke-all/2023/11/10/7905.qmd new file mode 100644 index 0000000000..5ecb26956e --- /dev/null +++ b/tests/docs/smoke-all/2023/11/10/7905.qmd @@ -0,0 +1,17 @@ +--- +title: "7905" +_quarto: + tests: + html: + ensureFileRegexMatches: + - [] + - + - "svg\\+xml" # can't use ensureHtmlElements because svg is not part of the HTML DOM + # so we instead ensure that pandoc didn't transform svg into data-uri-encoded image +--- + +```{=html} + + +
+```