From abca1cdb37cdf91429ea172779ca31c145aa7aab Mon Sep 17 00:00:00 2001 From: Nicola Ricciardi Date: Wed, 10 Jul 2024 01:16:24 +0200 Subject: [PATCH] Fixed svg and html tags --- Cargo.toml | 1 + src/cli.rs | 2 - src/compiler/assembler/html_assembler.rs | 12 +- src/compiler/codex.rs | 60 +++---- .../codex/modifier/standard_text_modifier.rs | 3 +- .../dossier/document/chapter/paragraph.rs | 2 + src/compiler/loader.rs | 5 + src/compiler/parsing/parsing_error.rs | 5 +- .../parsing/parsing_rule/constants.rs | 8 +- .../html_extended_block_quote_rule.rs | 6 +- .../parsing/parsing_rule/html_image_rule.rs | 120 +++++++------- .../parsing/parsing_rule/html_list_rule.rs | 8 +- .../parsing/parsing_rule/html_table_rule.rs | 10 +- .../parsing/parsing_rule/replacement_rule.rs | 39 ++++- src/resource/image_resource.rs | 149 ++++++++++++++---- src/utility.rs | 3 +- src/utility/text_utility.rs | 13 ++ 17 files changed, 312 insertions(+), 134 deletions(-) create mode 100644 src/utility/text_utility.rs diff --git a/Cargo.toml b/Cargo.toml index bd575d6..f9fd253 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ clap = "4.4.18" env_logger = "0.10.1" getset = "0.1.2" image = "0.24.8" +infer = "0.16.0" log = "0.4.20" notify = "6.1.1" once_cell = "1.19.0" diff --git a/src/cli.rs b/src/cli.rs index fdc90ca..139d20b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -127,9 +127,7 @@ impl NmdCli { .help("compile only a documents subset") .action(ArgAction::Append) ) - ) - ) .subcommand( Command::new("generate") diff --git a/src/compiler/assembler/html_assembler.rs b/src/compiler/assembler/html_assembler.rs index 168e52e..efe0d72 100644 --- a/src/compiler/assembler/html_assembler.rs +++ b/src/compiler/assembler/html_assembler.rs @@ -257,7 +257,9 @@ impl Assembler for HtmlAssembler { div_chapter = div_chapter.with_attributes(vec![("class", tag.value().as_ref().unwrap().as_str())]) }, - _ => () + _ => { + log::warn!("chapter tag key not supported yet") + } } } @@ -275,7 +277,13 @@ impl Assembler for HtmlAssembler { for paragraph in chapter.paragraphs() { if let Some(parsed_content) = paragraph.parsed_content().as_ref() { - div_chapter_content.push_str(&parsed_content.parsed_content()); + let parsed_content = parsed_content.parsed_content(); + + if parsed_content.is_empty() { + continue; + } + + div_chapter_content.push_str(&parsed_content); } else { return Err(AssemblerError::ParsedContentNotFound) diff --git a/src/compiler/codex.rs b/src/compiler/codex.rs index 63d7ab2..159abf5 100644 --- a/src/compiler/codex.rs +++ b/src/compiler/codex.rs @@ -3,6 +3,7 @@ pub mod modifier; use std::collections::HashMap; use getset::{Getters, Setters}; +use regex::Regex; use self::modifier::standard_paragraph_modifier::StandardParagraphModifier; use self::modifier::standard_text_modifier::StandardTextModifier; @@ -10,6 +11,7 @@ use self::modifier::ModifierIdentifier; use crate::compiler::output_format::OutputFormat; use self::codex_configuration::CodexConfiguration; +use super::parsing::parsing_rule::constants::ESCAPE_HTML; use super::parsing::parsing_rule::html_cite_rule::HtmlCiteRule; use super::parsing::parsing_rule::html_extended_block_quote_rule::HtmlExtendedBlockQuoteRule; use super::parsing::parsing_rule::html_greek_letter_rule::HtmlGreekLettersRule; @@ -74,7 +76,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])) as Box, + ]).with_pre_replacing(ESCAPE_HTML.clone())) as Box, ), ( StandardTextModifier::BookmarkWithId.identifier().clone(), @@ -84,7 +86,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"$3"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Bookmark.identifier().clone(), @@ -94,7 +96,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$2"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::GreekLetter.identifier().clone(), @@ -107,7 +109,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#">
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::AbridgedBookmark.identifier().clone(), @@ -115,7 +117,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::EmbeddedStyleWithId.identifier().clone(), @@ -123,7 +125,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)).with_references_at(vec![2]), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::EmbeddedStyle.identifier().clone(), @@ -131,7 +133,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::AbridgedEmbeddedStyleWithId.identifier().clone(), @@ -139,7 +141,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)).with_references_at(vec![2]), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::AbridgedEmbeddedStyle.identifier().clone(), @@ -147,7 +149,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Identifier.identifier().clone(), @@ -155,7 +157,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)).with_references_at(vec![2]), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Highlight.identifier().clone(), @@ -163,7 +165,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::InlineMath.identifier().clone(), @@ -171,7 +173,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"$$"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"$$"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::InlineCode.identifier().clone(), @@ -179,7 +181,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::BoldStarVersion.identifier().clone(), @@ -187,7 +189,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::BoldUnderscoreVersion.identifier().clone(), @@ -195,7 +197,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::ItalicStarVersion.identifier().clone(), @@ -203,7 +205,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::ItalicUnderscoreVersion.identifier().clone(), @@ -211,7 +213,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Strikethrough.identifier().clone(), @@ -219,7 +221,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Underlined.identifier().clone(), @@ -227,7 +229,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Superscript.identifier().clone(), @@ -235,7 +237,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Subscript.identifier().clone(), @@ -243,7 +245,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Link.identifier().clone(), @@ -251,7 +253,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)).with_references_at(vec![2]), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Comment.identifier().clone(), @@ -259,7 +261,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Checkbox.identifier().clone(), @@ -271,13 +273,13 @@ impl Codex { StandardTextModifier::CheckboxChecked.identifier().clone(), Box::new(ReplacementRule::new(StandardTextModifier::CheckboxChecked.modifier_pattern().clone(), vec![ ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Emoji.identifier().clone(), Box::new(ReplacementRule::new(StandardTextModifier::Emoji.modifier_pattern().clone(), vec![ ReplacementRuleReplacerPart::new_fixed(String::from(r#""#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardTextModifier::Escape.identifier().clone(), @@ -332,7 +334,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardParagraphModifier::MultilineTodo.identifier().clone(), @@ -340,7 +342,7 @@ impl Codex { ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), ReplacementRuleReplacerPart::new_mutable(String::from(r#"$1"#)), ReplacementRuleReplacerPart::new_fixed(String::from(r#"
"#)), - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardParagraphModifier::AbridgedEmbeddedParagraphStyle.identifier().clone(), @@ -366,7 +368,7 @@ impl Codex { StandardParagraphModifier::MathBlock.identifier().clone(), Box::new(ReplacementRule::new(StandardParagraphModifier::MathBlock.modifier_pattern().clone(), vec![ ReplacementRuleReplacerPart::new_fixed(String::from(r#"

$$$$${1}$$$$

"#)) - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardParagraphModifier::Image.identifier().clone(), @@ -384,7 +386,7 @@ impl Codex { StandardParagraphModifier::CodeBlock.identifier().clone(), Box::new(ReplacementRule::new(StandardParagraphModifier::CodeBlock.modifier_pattern().clone(), vec![ ReplacementRuleReplacerPart::new_fixed(String::from(r#"
$2
"#)) - ])), + ]).with_pre_replacing(ESCAPE_HTML.clone())) ), ( StandardParagraphModifier::List.identifier().clone(), diff --git a/src/compiler/codex/modifier/standard_text_modifier.rs b/src/compiler/codex/modifier/standard_text_modifier.rs index ed0e038..2eeac15 100644 --- a/src/compiler/codex/modifier/standard_text_modifier.rs +++ b/src/compiler/codex/modifier/standard_text_modifier.rs @@ -2,8 +2,7 @@ use super::{base_modifier::BaseModifier, constants::NEW_LINE, modifiers_bucket:: #[derive(Debug, PartialEq, Clone)] pub enum StandardTextModifier { - - // CONTENT MODIFIERs + BoldStarVersion, BoldUnderscoreVersion, ItalicStarVersion, diff --git a/src/compiler/dossier/document/chapter/paragraph.rs b/src/compiler/dossier/document/chapter/paragraph.rs index 402893e..2281637 100644 --- a/src/compiler/dossier/document/chapter/paragraph.rs +++ b/src/compiler/dossier/document/chapter/paragraph.rs @@ -51,6 +51,8 @@ impl Parsable for Paragraph { let parsing_outcome = Parser::parse_paragraph(&codex, self, Arc::clone(&parsing_configuration), parsing_configuration_overlay)?; + log::debug!("end to parse paragraph:\n{:#?}", parsing_outcome); + self.parsed_content = Some(parsing_outcome); Ok(()) diff --git a/src/compiler/loader.rs b/src/compiler/loader.rs index 3b7f39e..0f99e1d 100644 --- a/src/compiler/loader.rs +++ b/src/compiler/loader.rs @@ -268,6 +268,11 @@ impl Loader { return } + if matched_str.is_empty() { + log::debug!("paragraph discarded because empty"); + return; + } + let paragraph = Paragraph::new(matched_str, paragraph_modifier.identifier().clone()); if !paragraph.contains_only_newlines() { diff --git a/src/compiler/parsing/parsing_error.rs b/src/compiler/parsing/parsing_error.rs index 9f55f18..4a14dbc 100644 --- a/src/compiler/parsing/parsing_error.rs +++ b/src/compiler/parsing/parsing_error.rs @@ -1,6 +1,6 @@ use thiserror::Error; -use crate::resource::resource_reference::ResourceReferenceError; +use crate::resource::{resource_reference::ResourceReferenceError, ResourceError}; #[derive(Error, Debug)] pub enum ParsingError { @@ -19,6 +19,9 @@ pub enum ParsingError { #[error(transparent)] ReferenceError(#[from] ResourceReferenceError), + #[error(transparent)] + ResourceError(#[from] ResourceError), + #[error("unknown error occurs")] Unknown, } \ No newline at end of file diff --git a/src/compiler/parsing/parsing_rule/constants.rs b/src/compiler/parsing/parsing_rule/constants.rs index 92df2e3..06d5208 100644 --- a/src/compiler/parsing/parsing_rule/constants.rs +++ b/src/compiler/parsing/parsing_rule/constants.rs @@ -8,4 +8,10 @@ use crate::compiler::codex::modifier::constants::NEW_LINE; pub static DOUBLE_NEW_LINE_REGEX: Lazy = Lazy::new(|| Regex::new(&format!("{}{{2}}", NEW_LINE)).unwrap()); -pub const SPACE_TAB_EQUIVALENCE: &str = r" "; \ No newline at end of file +pub const SPACE_TAB_EQUIVALENCE: &str = r" "; + +pub static ESCAPE_HTML: Lazy> = Lazy::new(|| vec![ + (Regex::new(r"&").unwrap(), "&".to_string()), + (Regex::new(r"<").unwrap(), "<".to_string()), + (Regex::new(r">").unwrap(), ">".to_string()), +]); \ No newline at end of file diff --git a/src/compiler/parsing/parsing_rule/html_extended_block_quote_rule.rs b/src/compiler/parsing/parsing_rule/html_extended_block_quote_rule.rs index f89fa76..2cac16c 100644 --- a/src/compiler/parsing/parsing_rule/html_extended_block_quote_rule.rs +++ b/src/compiler/parsing/parsing_rule/html_extended_block_quote_rule.rs @@ -3,9 +3,9 @@ use std::sync::{Arc, RwLock}; use once_cell::sync::Lazy; use regex::Regex; -use crate::compiler::{codex::{modifier::{constants::NEW_LINE, modifiers_bucket::ModifiersBucket, standard_paragraph_modifier::StandardParagraphModifier, Modifier}, Codex}, parsing::{parsing_configuration::ParsingConfiguration, parsing_error::ParsingError, parsing_outcome::{ParsingOutcome, ParsingOutcomePart}}}; +use crate::{compiler::{codex::{modifier::{constants::NEW_LINE, modifiers_bucket::ModifiersBucket, standard_paragraph_modifier::StandardParagraphModifier, Modifier}, Codex}, parsing::{parsing_configuration::ParsingConfiguration, parsing_error::ParsingError, parsing_outcome::{ParsingOutcome, ParsingOutcomePart}}}, utility::text_utility}; -use super::{constants::DOUBLE_NEW_LINE_REGEX, ParsingRule}; +use super::{constants::{DOUBLE_NEW_LINE_REGEX, ESCAPE_HTML}, ParsingRule}; static CHECK_EXTENDED_BLOCK_QUOTE_REGEX: Lazy = Lazy::new(|| Regex::new(r"(?:^(?m:^> \[!(.*)\]))").unwrap()); @@ -74,7 +74,7 @@ impl ParsingRule for HtmlExtendedBlockQuoteRule {
"#, quote_type, quote_type, quote_type) }, - ParsingOutcomePart::Mutable { content: tag_body }, + ParsingOutcomePart::Mutable { content: text_utility::replace(&tag_body, &ESCAPE_HTML) }, ParsingOutcomePart::Fixed { content: String::from("
") } ]); diff --git a/src/compiler/parsing/parsing_rule/html_image_rule.rs b/src/compiler/parsing/parsing_rule/html_image_rule.rs index 5ae3ae2..70f9fce 100644 --- a/src/compiler/parsing/parsing_rule/html_image_rule.rs +++ b/src/compiler/parsing/parsing_rule/html_image_rule.rs @@ -17,8 +17,10 @@ use crate::compiler::parser::Parser; use crate::compiler::parsing::parsing_configuration::ParsingConfiguration; use crate::compiler::parsing::parsing_error::ParsingError; use crate::compiler::parsing::parsing_outcome::ParsingOutcome; +use crate::compiler::parsing::parsing_rule::constants::ESCAPE_HTML; use crate::resource::resource_reference::ResourceReference; use crate::resource::{image_resource::ImageResource, remote_resource::RemoteResource}; +use crate::utility::text_utility; use super::ParsingRule; @@ -74,7 +76,7 @@ impl HtmlImageRule { panic!("unsupported image modifier identifier"); } - fn create_figure_img(src: &str, alt: Option<&str>, caption: Option<&str>, id: Option, img_classes: Vec<&str>, style: Option) -> String { + fn build_html_img(src: &str, alt: Option<&String>, caption: Option<&String>, id: Option, img_classes: Vec<&str>, style: Option) -> String { let id_attr: String; @@ -94,7 +96,7 @@ impl HtmlImageRule { } if let Some(c) = caption { - html_caption = format!(r#"
{}
"#, c); + html_caption = format!(r#"
{}
"#, text_utility::replace(c, &ESCAPE_HTML)); } else { html_caption = String::new(); } @@ -113,86 +115,80 @@ impl HtmlImageRule { "#, id_attr, src, html_alt, img_classes.join(" "), style_attr, html_caption) } - fn build_img(src: &str, alt: Option<&str>, caption: Option<&str>, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> String { + fn build_not_embed_remote_img(image: &mut ImageResource, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> Result { + let src = Url::parse(image.src().to_str().unwrap()).unwrap(); - if RemoteResource::is_valid_remote_resource(src) { // remote image (e.g. URL) - - if parsing_configuration.embed_remote_image() { - - todo!() + return Ok(Self::build_html_img(&image.src().to_string_lossy().to_string(), image.label().as_ref(), image.caption().as_ref(), id, img_classes, figure_style)) + } - } else { - - let src = Url::parse(src).unwrap(); + fn build_embed_remote_img(image: &mut ImageResource, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> Result { + todo!() + } - return Self::create_figure_img(src.as_str(), alt, caption, id, img_classes, figure_style) - } + fn build_embed_local_img(image: &mut ImageResource, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> Result { + let base64_image = image.to_base64(parsing_configuration.compress_embed_image()); - } else { // local image + if let Some(mt) = image.mime_type().as_ref() { - let create_local_not_embed_figure_img = |src: PathBuf| { - let local_not_embed_src = fs::canonicalize(src).unwrap(); + return Ok(Self::build_html_img(&format!("data:{};base64,{}", mt, base64_image.unwrap()), image.label().as_ref(), image.caption().as_ref(), id, img_classes, figure_style)); - return Self::create_figure_img(local_not_embed_src.to_str().unwrap(), alt, caption, id.clone(), img_classes.clone(), figure_style.clone()); - }; + } else { + if parsing_configuration.strict_image_src_check() { - let mut src_path_buf = PathBuf::from(src); + return Err(ParsingError::ResourceError(crate::resource::ResourceError::InvalidResourceVerbose(format!("image {:?} mime type not found", image.src())))); - if src_path_buf.is_relative() { + } else { - let image_file_name = src; + log::warn!("{:?} will be parse as local NOT embed image due to an error", image.src()); - src_path_buf = parsing_configuration.input_location().clone().join(image_file_name); + return Ok(Self::build_not_embed_local_img(image, id, img_classes, figure_style, parsing_configuration).unwrap()); + } + } + } - if !src_path_buf.exists() { + fn build_not_embed_local_img(image: &mut ImageResource, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> Result { + let local_not_embed_src = fs::canonicalize(image.src()).unwrap(); - log::debug!("'{}' not found, try adding images directory path", src_path_buf.to_string_lossy()); + return Ok(Self::build_html_img(&local_not_embed_src.to_string_lossy().to_string(), image.label().as_ref(), image.caption().as_ref(), id.clone(), img_classes.clone(), figure_style.clone())); + } - src_path_buf = parsing_configuration.input_location().clone().join(dossier::ASSETS_DIR).join(dossier::IMAGES_DIR).join(image_file_name); - } - } - if src_path_buf.exists() { + fn build_img_from_parsing_configuration(image: &mut ImageResource, id: Option, img_classes: Vec<&str>, figure_style: Option, parsing_configuration: &RwLockReadGuard) -> Result { - if parsing_configuration.embed_local_image() { + if RemoteResource::is_valid_remote_resource(image.src().to_str().unwrap()) { // remote image (e.g. URL) - let image_res = ImageResource::try_from(src_path_buf.clone()); + if parsing_configuration.embed_remote_image() { - if let Ok(image) = image_res { - - let base64_image = image.to_base64(parsing_configuration.compress_embed_image()); - - return Self::create_figure_img(format!("data:image/png;base64,{}", base64_image.unwrap()).as_str(), alt, caption, id, img_classes, figure_style); - - - } else { - if parsing_configuration.strict_image_src_check() { + return Self::build_embed_remote_img(image, id, img_classes, figure_style, parsing_configuration); - image_res.err().unwrap(); - unreachable!() + } else { + + return Self::build_not_embed_remote_img(image, id, img_classes, figure_style, parsing_configuration); + } - } else { + } else { // local image - log::warn!("{:?} will be parse as local not embed image due to an error", src_path_buf); + if image.src().exists() { - return create_local_not_embed_figure_img(src_path_buf); - } - } + if parsing_configuration.embed_local_image() { + return Self::build_embed_local_img(image, id, img_classes, figure_style, parsing_configuration); } else { // local not embed - return create_local_not_embed_figure_img(src_path_buf); + + return Ok(Self::build_not_embed_local_img(image, id, img_classes, figure_style, parsing_configuration).unwrap()); } } else if parsing_configuration.strict_image_src_check() { - log::error!("{}", ParsingError::InvalidSource(String::from(src))); + log::error!("{}", ParsingError::InvalidSource(String::from(image.src().to_string_lossy().to_string()))); panic!("invalid src") } else { - return Self::create_figure_img(src, alt, caption, id, img_classes, figure_style) // create image tag of invalid image instead of panic + + return Ok(Self::build_html_img(&image.src().to_string_lossy().to_string(), image.label().as_ref(), image.caption().as_ref(), id, img_classes, figure_style)) // create image tag of invalid image instead of panic } } @@ -227,12 +223,23 @@ impl HtmlImageRule { if let Some(id) = captures.get(2) { let id = ResourceReference::of_internal_from_without_sharp(id.as_str(), Some(document_name)).unwrap(); - return Self::build_img(src.as_str(), Some(label.as_str()), Some(&parsed_label.parsed_content()), Some(id), vec!["image"], style, &parsing_configuration); + let mut image: ImageResource = ImageResource::new(PathBuf::from(src.as_str()), Some(parsed_label.parsed_content()), Some(label.as_str().to_string())) + .elaborating_relative_path(parsing_configuration.input_location()) + .inferring_mime_type() + .unwrap(); + + return Self::build_img_from_parsing_configuration(&mut image, Some(id), vec!["image"], style, &parsing_configuration).unwrap(); } else { + let id = ResourceReference::of(label.as_str(), Some(document_name)).unwrap(); - return Self::build_img(src.as_str(), Some(label.as_str()), Some(&parsed_label.parsed_content()), Some(id), vec!["image"], style, &parsing_configuration); + let mut image: ImageResource = ImageResource::new(PathBuf::from(src.as_str()), Some(parsed_label.parsed_content()), Some(label.as_str().to_string())) + .elaborating_relative_path(parsing_configuration.input_location()) + .inferring_mime_type() + .unwrap(); + + return Self::build_img_from_parsing_configuration(&mut image, Some(id), vec!["image"], style, &parsing_configuration).unwrap(); } } @@ -245,7 +252,7 @@ impl HtmlImageRule { Ok(ParsingOutcome::new_fixed(parsed_content)) } - fn parse_abridged_image(search_pattern_regex: &Regex, content: &str, codex: &Codex, parsing_configuration: Arc>) -> Result { + fn parse_abridged_image(search_pattern_regex: &Regex, content: &str, _codex: &Codex, parsing_configuration: Arc>) -> Result { let parsing_configuration = parsing_configuration.read().unwrap(); @@ -275,7 +282,12 @@ impl HtmlImageRule { style = None; } - return Self::build_img(src.as_str(), None, None, id, vec!["image", "abridged-image"], style, &parsing_configuration); + let mut image = ImageResource::new(PathBuf::from(src.as_str()), None, None) + .elaborating_relative_path(parsing_configuration.input_location()) + .inferring_mime_type() + .unwrap(); + + return Self::build_img_from_parsing_configuration(&mut image, id, vec!["image", "abridged-image"], style, &parsing_configuration).unwrap(); }).to_string(); @@ -376,7 +388,7 @@ impl ParsingRule for HtmlImageRule { Self::parse_image_from_identifier(&self.image_modifier_identifier, &self.search_pattern_regex, content, codex, Arc::clone(&parsing_configuration)) } - fn fast_parse(&self, content: &str, codex: &Codex, parsing_configuration: Arc>) -> Result { + fn fast_parse(&self, content: &str, _codex: &Codex, _parsing_configuration: Arc>) -> Result { Ok(ParsingOutcome::new_fixed(format!(r#"{}"#, content))) } diff --git a/src/compiler/parsing/parsing_rule/html_list_rule.rs b/src/compiler/parsing/parsing_rule/html_list_rule.rs index afea7e9..732bbc8 100644 --- a/src/compiler/parsing/parsing_rule/html_list_rule.rs +++ b/src/compiler/parsing/parsing_rule/html_list_rule.rs @@ -3,9 +3,9 @@ use std::sync::{Arc, RwLock}; use once_cell::sync::Lazy; use regex::Regex; -use crate::compiler::{codex::{modifier::standard_paragraph_modifier::StandardParagraphModifier, Codex}, parsing::{parsing_configuration::{list_bullet_configuration_record::{self, ListBulletConfigurationRecord}, ParsingConfiguration}, parsing_error::ParsingError, parsing_metadata::ParsingMetadata, parsing_outcome::ParsingOutcome}}; +use crate::{compiler::{codex::{modifier::standard_paragraph_modifier::StandardParagraphModifier, Codex}, parsing::{parsing_configuration::{list_bullet_configuration_record::{self, ListBulletConfigurationRecord}, ParsingConfiguration}, parsing_error::ParsingError, parsing_metadata::ParsingMetadata, parsing_outcome::ParsingOutcome}}, utility::text_utility}; -use super::{constants::SPACE_TAB_EQUIVALENCE, ParsingRule}; +use super::{constants::{ESCAPE_HTML, SPACE_TAB_EQUIVALENCE}, ParsingRule}; static SEARCH_LIST_ITEM_REGEX: Lazy = Lazy::new(|| Regex::new(&StandardParagraphModifier::ListItem.modifier_pattern()).unwrap()); @@ -100,12 +100,14 @@ impl ParsingRule for HtmlListRule { let bullet = Self::bullet_transform(bullet, indentation_level, parsing_configuration.read().unwrap().list_bullets_configuration()); + let content = text_utility::replace(&content, &ESCAPE_HTML); + parsing_outcome.add_fixed_part(r#"
  • "#.to_string()); parsing_outcome.add_fixed_part(LIST_ITEM_INDENTATION.repeat(indentation_level)); parsing_outcome.add_fixed_part(r#""#.to_string()); parsing_outcome.add_fixed_part(bullet); parsing_outcome.add_fixed_part(r#""#.to_string()); - parsing_outcome.add_mutable_part(content.to_string()); + parsing_outcome.add_mutable_part(content); parsing_outcome.add_fixed_part(r#"
  • "#.to_string()); } diff --git a/src/compiler/parsing/parsing_rule/html_table_rule.rs b/src/compiler/parsing/parsing_rule/html_table_rule.rs index 6244234..d9a5cd6 100644 --- a/src/compiler/parsing/parsing_rule/html_table_rule.rs +++ b/src/compiler/parsing/parsing_rule/html_table_rule.rs @@ -6,9 +6,9 @@ use build_html::TableRow as HtmlTableRow; use once_cell::sync::Lazy; use regex::Regex; -use crate::{compiler::{codex::{modifier::{constants::IDENTIFIER_PATTERN, standard_paragraph_modifier::StandardParagraphModifier}, Codex}, parser::Parser, parsing::{parsing_configuration::ParsingConfiguration, parsing_error::ParsingError, parsing_outcome::ParsingOutcome}}, resource::{resource_reference::ResourceReference, table::{Table, TableCell, TableCellAlignment}}}; +use crate::{compiler::{codex::{modifier::{constants::IDENTIFIER_PATTERN, standard_paragraph_modifier::StandardParagraphModifier}, Codex}, parser::Parser, parsing::{parsing_configuration::ParsingConfiguration, parsing_error::ParsingError, parsing_outcome::ParsingOutcome}}, resource::{resource_reference::ResourceReference, table::{Table, TableCell, TableCellAlignment}}, utility::text_utility}; -use super::ParsingRule; +use super::{constants::ESCAPE_HTML, ParsingRule}; /// (caption, id, style) @@ -158,12 +158,16 @@ impl HtmlTableRule { TableCellAlignment::Right => String::from("table-right-cell"), }; + let content = Parser::parse_text(codex, content, Arc::clone(&parsing_configuration), Arc::new(None))?.parsed_content(); + + let content = text_utility::replace(&content, &ESCAPE_HTML); + html_row.add_cell( HtmlTableCell::new(build_html::TableCellType::Data) .with_attributes(vec![ ("class", format!("table-cell {}", align_class).as_str()) ]) - .with_raw(Parser::parse_text(codex, content, Arc::clone(&parsing_configuration), Arc::new(None))?.parsed_content()) + .with_raw(content) ); }, } diff --git a/src/compiler/parsing/parsing_rule/replacement_rule.rs b/src/compiler/parsing/parsing_rule/replacement_rule.rs index d89b71c..5325c6d 100644 --- a/src/compiler/parsing/parsing_rule/replacement_rule.rs +++ b/src/compiler/parsing/parsing_rule/replacement_rule.rs @@ -1,6 +1,8 @@ +use std::collections::HashMap; use std::fmt::Debug; use std::sync::{Arc, RwLock}; +use getset::{Getters, Setters}; use log; use regex::{Captures, Regex, Replacer}; @@ -10,6 +12,7 @@ use crate::compiler::parsing::parsing_error::ParsingError; use crate::compiler::parsing::parsing_outcome::{ParsingOutcome, ParsingOutcomePart}; use crate::compiler::parsing::parsing_rule::constants::DOUBLE_NEW_LINE_REGEX; use crate::resource::resource_reference::ResourceReference; +use crate::utility::text_utility; use super::ParsingRule; @@ -65,12 +68,26 @@ impl ReplacementRuleReplacerPart { /// Rule to replace a NMD text based on a specific pattern matching rule +#[derive(Getters, Setters)] pub struct ReplacementRule { + + #[getset(set)] search_pattern: String, + + #[getset(set)] search_pattern_regex: Regex, + + #[getset(get = "pub", set)] replacer_parts: Vec>, + + #[getset(get = "pub", set)] newline_fix_pattern: Option, + + #[getset(get = "pub", set)] reference_at: Option, + + #[getset(get = "pub", set)] + pre_replacing: Option>, } impl ReplacementRule { @@ -85,7 +102,8 @@ impl ReplacementRule { search_pattern: searching_pattern, replacer_parts: replacers, newline_fix_pattern: None, - reference_at: None + reference_at: None, + pre_replacing: None, } } @@ -94,6 +112,13 @@ impl ReplacementRule { self } + + pub fn with_pre_replacing(mut self, pre_replacing: Vec<(Regex, String)>) -> Self { + + self.set_pre_replacing(Some(pre_replacing)); + + self + } } impl Debug for ReplacementRule { @@ -153,8 +178,18 @@ impl ParsingRule for ReplacementRule { outcome.add_fixed_part(parsed_content.to_string()); } else { + + let pc: String; + + if let Some(r) = &self.pre_replacing { + + pc = text_utility::replace(&parsed_content.to_string(), r); + + } else { + pc = parsed_content.to_string(); + } - outcome.add_mutable_part(parsed_content.to_string()); + outcome.add_mutable_part(pc); } } diff --git a/src/resource/image_resource.rs b/src/resource/image_resource.rs index 2cb1ff7..4685a39 100644 --- a/src/resource/image_resource.rs +++ b/src/resource/image_resource.rs @@ -1,10 +1,12 @@ -use std::{io::Cursor, path::PathBuf, str::FromStr}; +use std::{fs::File, io::{Cursor, Read}, path::PathBuf, str::FromStr}; use base64::{engine::general_purpose::STANDARD, Engine}; use getset::{Getters, Setters}; -use image::{DynamicImage, ImageOutputFormat}; +use image::{error::UnsupportedError, DynamicImage, ImageOutputFormat}; use oxipng::Options; +use crate::compiler::dossier; + use super::ResourceError; use image::io::Reader as ImageReader; @@ -17,11 +19,86 @@ pub struct ImageResource { src: PathBuf, #[getset(get = "pub", set = "pub")] - image: DynamicImage, + mime_type: Option, + + #[getset(get = "pub", set = "pub")] + caption: Option, + + #[getset(get = "pub", set = "pub")] + label: Option, + + // #[getset(get = "pub", set = "pub")] + // image: DynamicImage, } impl ImageResource { + pub fn new(src: PathBuf, caption: Option, label: Option) -> Self { + + Self { + src, + mime_type: None, + caption, + label + } + } + + pub fn inferring_mime_type(mut self) -> Result { + + let mime_type = infer::get_from_path(&self.src)?; + + if let Some(t) = mime_type { + + let mut mime_type = t.mime_type().to_string(); + + // work-around svg+xml + if mime_type.contains("text/xml") { + mime_type = String::from("image/svg+xml"); + } + + self.set_mime_type(Some(mime_type)); + + return Ok(self); + + } else { + return Err(ResourceError::InvalidResourceVerbose(format!("image {:?} mime type not found", self.src))); + } + } + + pub fn elaborating_relative_path(mut self, base_location: &PathBuf) -> Self { + if self.src().is_relative() { + + self.set_src(base_location.join(self.src())); + + if !self.src().exists() { + + log::debug!("{:?} not found, try adding images directory path", self.src()); + + let image_file_name = self.src().file_name().unwrap(); + + self.set_src(base_location.join(dossier::ASSETS_DIR).join(dossier::IMAGES_DIR).join(image_file_name)); + } + } + + self + } + + // pub fn try_guess_format(&mut self) -> Result<(), ResourceError> { + + // let image_res = ImageReader::open(self.src.clone()); + + // if let Ok(img) = image_res { + // if let Ok(img) = img.with_guessed_format() { + + // self.mime_type = Some(img.format().unwrap().to_mime_type().to_string()); + + // return Ok(()); + // } + // } + + // Err(ResourceError::InvalidResourceVerbose(format!("unrecognized format image {:?}", self.src))) + // } + /// Encode image in base64 pub fn to_base64(&self, compression: bool) -> Result { @@ -51,11 +128,17 @@ impl ImageResource { pub fn to_vec_u8(&self) -> Result, ResourceError> { - let mut buffer: Vec = Vec::new(); + // let mut buffer: Vec = Vec::new(); + + // self.image.write_to(&mut Cursor::new(&mut buffer), ImageOutputFormat::Png)?; + + // Ok(buffer) - self.image.write_to(&mut Cursor::new(&mut buffer), ImageOutputFormat::Png)?; + let mut image_file = File::open(self.src.clone())?; + let mut raw_image: Vec = Vec::new(); + image_file.read_to_end(&mut raw_image)?; - Ok(buffer) + Ok(raw_image) } /// Check if a PathBuf is an image @@ -71,47 +154,51 @@ impl ImageResource { } } -impl TryFrom for ImageResource { - type Error = ResourceError; +// impl TryFrom for ImageResource { +// type Error = ResourceError; - fn try_from(path: PathBuf) -> Result { +// fn try_from(path: PathBuf) -> Result { - // TODO: open is a time consuming operation - let image = ImageReader::open(path.clone())?.decode(); +// // TODO: open is a time consuming operation +// // let image = ImageReader::open(path.clone())?.decode(); - if image.is_err() { +// // if image.is_err() { - let e = image.err().unwrap(); +// // let e = image.err().unwrap(); - log::error!("error occurs during image opening ({:?}): {}", path, e); +// // log::error!("error occurs during image opening ({:?}): {}", path, e); - return Err(ResourceError::ImageError(e)) - } +// // return Err(ResourceError::ImageError(e)) +// // } - let image = image?; +// // let image = image?; - Ok(Self { - src: path, - image - }) - } -} +// Ok(Self { +// format: path.extension().unwrap().to_str().unwrap().to_string(), +// src: path, +// built_src: None, +// caption: None, +// label: None +// // image +// }) +// } +// } -impl TryFrom for ImageResource { - type Error = ResourceError; +// impl TryFrom for ImageResource { +// type Error = ResourceError; - fn try_from(path: String) -> Result { - let path = PathBuf::from(path); +// fn try_from(path: String) -> Result { +// let path = PathBuf::from(path); - Self::try_from(path) - } -} +// Self::try_from(path) +// } +// } impl FromStr for ImageResource { type Err = ResourceError; fn from_str(s: &str) -> Result { - Self::try_from(String::from(s)) + Ok(Self::new(PathBuf::from(s), None, None)) } } diff --git a/src/utility.rs b/src/utility.rs index 6f2644c..655f4fa 100644 --- a/src/utility.rs +++ b/src/utility.rs @@ -1 +1,2 @@ -pub mod file_utility; \ No newline at end of file +pub mod file_utility; +pub mod text_utility; \ No newline at end of file diff --git a/src/utility/text_utility.rs b/src/utility/text_utility.rs new file mode 100644 index 0000000..039d570 --- /dev/null +++ b/src/utility/text_utility.rs @@ -0,0 +1,13 @@ +use regex::Regex; + + + +pub fn replace(content: &str, replacements: &Vec<(Regex, String)>) -> String { + let mut result = String::from(content); + + for (regex, rep) in replacements { + result = regex.replace_all(&result, rep).to_string(); + } + + result +} \ No newline at end of file