From a15b0e156f7826a3665692cd5920ea2b2a8b7e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Tue, 23 Jul 2024 14:01:45 +0200 Subject: [PATCH] derive: less string escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ```text $ cd rinja_derive_standalone $ cargo bench hello_world time: [50.420 µs 50.529 µs 50.637 µs] change: [-0.6238% -0.3448% -0.0695%] (p = 0.02 < 0.05) Change within noise threshold. item_info.html time: [58.807 µs 59.108 µs 59.408 µs] change: [-2.4731% -2.0541% -1.6532%] (p = 0.00 < 0.05) Performance has improved. item_union.html time: [182.90 µs 183.97 µs 185.73 µs] change: [-2.6740% -1.8652% -0.7734%] (p = 0.00 < 0.05) Change within noise threshold. page.html time: [767.05 µs 768.05 µs 769.20 µs] change: [-4.5916% -4.2764% -3.9026%] (p = 0.00 < 0.05) Performance has improved. print_item.html time: [154.97 µs 155.26 µs 155.59 µs] change: [-2.0385% -1.7364% -1.4351%] (p = 0.00 < 0.05) Performance has improved. short_item_info.html time: [126.35 µs 126.49 µs 126.67 µs] change: [-6.5708% -6.1935% -5.8314%] (p = 0.00 < 0.05) Performance has improved. sidebar.html time: [196.25 µs 196.68 µs 197.14 µs] change: [-4.9096% -4.4107% -3.9467%] (p = 0.00 < 0.05) Performance has improved. source.html time: [112.69 µs 113.14 µs 113.76 µs] change: [-3.4552% -3.1557% -2.8072%] (p = 0.00 < 0.05) Performance has improved. type_layout.html time: [160.05 µs 160.30 µs 160.59 µs] change: [-11.126% -10.419% -9.2877%] (p = 0.00 < 0.05) Performance has improved. type_layout_size.html time: [66.006 µs 66.340 µs 66.664 µs] change: [-2.3099% -1.3973% +0.0223%] (p = 0.01 < 0.05) Change within noise threshold. ``` --- rinja_derive/Cargo.toml | 1 + rinja_derive/src/generator.rs | 49 ++++++++++++++++++++++-------- rinja_derive_standalone/Cargo.toml | 1 + 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/rinja_derive/Cargo.toml b/rinja_derive/Cargo.toml index 3614cb71..ffac3f1e 100644 --- a/rinja_derive/Cargo.toml +++ b/rinja_derive/Cargo.toml @@ -27,6 +27,7 @@ with-warp = [] [dependencies] parser = { package = "rinja_parser", version = "0.2.0", path = "../rinja_parser" } basic-toml = { version = "0.1.1", optional = true } +memchr = "2" mime = "0.3" mime_guess = "2" once_map = "0.4.18" diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 3ad8547e..386fb84a 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -1950,19 +1950,19 @@ impl Buffer { } fn write_writer(&mut self, s: &str) -> usize { - if self.discard { - // nothing to do - } else if !self.last_was_write_str { - write!(self.buf, "writer.write_str({s:#?})?;").unwrap(); - self.last_was_write_str = true; - } else { - // strip trailing `")?;`, leaving an unterminated string - let len = self.buf.strip_suffix("\")?;").unwrap().len(); - self.buf.truncate(len); - // append the new string, adding a stray `"` in the mid of the string - write!(self.buf, "{s:#?})?;").unwrap(); - // left shift new string by one to overwrite the stray `"` - self.buf.replace_range(len..=len, ""); + const OPEN: &str = r#"writer.write_str(""#; + const CLOSE: &str = r#"")?;"#; + + if !self.discard { + if !self.last_was_write_str { + self.last_was_write_str = true; + self.buf.push_str(OPEN); + } else { + // strip trailing `")?;`, leaving an unterminated string + self.buf.truncate(self.buf.len() - CLOSE.len()) + } + string_escape(&mut self.buf, s); + self.buf.push_str(CLOSE); } s.len() } @@ -2278,3 +2278,26 @@ fn normalize_identifier(ident: &str) -> &str { // SAFETY: We know that the input byte slice is pure-ASCII. unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) } } + +/// Similar to `write!(dest, "{src:?}")`, but only escapes the strictly needed characters, +/// and without the surrounding `"…"` quotation marks. +pub(crate) fn string_escape(dest: &mut String, src: &str) { + // SAFETY: we will only push valid str slices + let dest = unsafe { dest.as_mut_vec() }; + let src = src.as_bytes(); + let mut last = 0; + + // According to , every + // character is valid except `" \ IsolatedCR`. We don't test if the `\r` is isolated or not, + // but always escape it. + for x in memchr::memchr3_iter(b'\\', b'"', b'\r', src) { + dest.extend(&src[last..x]); + dest.extend(match src[x] { + b'\\' => br#"\\"#, + b'\"' => br#"\""#, + _ => br#"\r"#, + }); + last = x + 1; + } + dest.extend(&src[last..]); +} diff --git a/rinja_derive_standalone/Cargo.toml b/rinja_derive_standalone/Cargo.toml index cf9ab034..657daa61 100644 --- a/rinja_derive_standalone/Cargo.toml +++ b/rinja_derive_standalone/Cargo.toml @@ -26,6 +26,7 @@ with-warp = [] [dependencies] parser = { package = "rinja_parser", version = "0.2.0", path = "../rinja_parser" } basic-toml = { version = "0.1.1", optional = true } +memchr = "2" mime = "0.3" mime_guess = "2" once_map = "0.4.18"