From f99c06ab6a769751b2b4713026215d6b8e13ac27 Mon Sep 17 00:00:00 2001 From: clubby789 Date: Tue, 20 Feb 2024 15:53:46 +0000 Subject: [PATCH] Use `memchr` to optimize `get_text_slice` --- fluent-syntax/Cargo.toml | 1 + fluent-syntax/src/parser/macros.rs | 6 ++ fluent-syntax/src/parser/pattern.rs | 101 ++++++++++++++++------------ 3 files changed, 64 insertions(+), 44 deletions(-) diff --git a/fluent-syntax/Cargo.toml b/fluent-syntax/Cargo.toml index 9c306381..872d230e 100644 --- a/fluent-syntax/Cargo.toml +++ b/fluent-syntax/Cargo.toml @@ -25,6 +25,7 @@ include = [ ] [dependencies] +memchr = "2.0" serde = { workspace = true, optional = true, features = ["derive"] } serde_json = { workspace = true, optional = true } thiserror.workspace = true diff --git a/fluent-syntax/src/parser/macros.rs b/fluent-syntax/src/parser/macros.rs index 671d5432..f641c11e 100644 --- a/fluent-syntax/src/parser/macros.rs +++ b/fluent-syntax/src/parser/macros.rs @@ -9,3 +9,9 @@ macro_rules! get_current_byte { $s.source.as_ref().as_bytes().get($s.ptr) }; } + +macro_rules! get_remaining_bytes { + ($s:expr) => { + $s.source.as_ref().as_bytes().get($s.ptr..) + }; +} diff --git a/fluent-syntax/src/parser/pattern.rs b/fluent-syntax/src/parser/pattern.rs index 85c8925e..9ca1229a 100644 --- a/fluent-syntax/src/parser/pattern.rs +++ b/fluent-syntax/src/parser/pattern.rs @@ -157,51 +157,64 @@ where &mut self, ) -> Result<(usize, usize, TextElementType, TextElementTermination)> { let start_pos = self.ptr; - let mut text_element_type = TextElementType::Blank; - - while let Some(b) = get_current_byte!(self) { - match b { - b' ' => self.ptr += 1, - b'\n' => { - self.ptr += 1; - return Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::LineFeed, - )); - } - b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => { - self.ptr += 1; - return Ok(( - start_pos, - self.ptr - 1, - text_element_type, - TextElementTermination::Crlf, - )); - } - b'{' => { - return Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::PlaceableStart, - )); - } - b'}' => { - return error!(ErrorKind::UnbalancedClosingBrace, self.ptr); - } - _ => { - text_element_type = TextElementType::NonBlank; - self.ptr += 1 - } + let Some(rest) = get_remaining_bytes!(self) else { + return Ok(( + start_pos, + self.ptr, + TextElementType::Blank, + TextElementTermination::Eof, + )); + }; + let end = memchr::memchr3(b'\n', b'{', b'}', rest); + let element_type = |text: &[u8]| { + if text.iter().any(|&c| c != b' ') { + TextElementType::NonBlank + } else { + TextElementType::Blank + } + }; + match end.map(|p| &rest[..=p]) { + Some([text @ .., b'}']) => { + self.ptr += text.len(); + error!(ErrorKind::UnbalancedClosingBrace, self.ptr) + } + Some([text @ .., b'\r', b'\n']) => { + self.ptr += text.len() + 1; + Ok(( + start_pos, + self.ptr - 1, + element_type(text), + TextElementTermination::Crlf, + )) + } + Some([text @ .., b'\n']) => { + self.ptr += text.len() + 1; + Ok(( + start_pos, + self.ptr, + element_type(text), + TextElementTermination::LineFeed, + )) + } + Some([text @ .., b'{']) => { + self.ptr += text.len(); + Ok(( + start_pos, + self.ptr, + element_type(text), + TextElementTermination::PlaceableStart, + )) + } + None => { + self.ptr += rest.len(); + Ok(( + start_pos, + self.ptr, + element_type(rest), + TextElementTermination::Eof, + )) } + _ => unreachable!(), } - Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::Eof, - )) } }