From 74020c39ea1c4abe523b5222307b029757441c3c Mon Sep 17 00:00:00 2001 From: Caleb Maclennan Date: Sun, 5 May 2024 22:23:53 +0300 Subject: [PATCH 1/9] chore: Release 'safe harbor' versions of Fluent workspace crates (#349) These releases serve as a markers in the transition from the Project Fluent related crates being a wholly Mozilla managed project to bringing on non-Mozilla community maintainers. The safe harbor release series publishes the current Git HEAD of all crates as a convenience to downstream consumers that either audit the code and contributors, don't want to adopt potential changes for whatever reason, or just want an easy reference point against which to compare future releases. --- fluent-bundle/CHANGELOG.md | 15 +++++++++++++++ fluent-bundle/Cargo.toml | 2 +- fluent-fallback/CHANGELOG.md | 8 ++++++++ fluent-fallback/Cargo.toml | 2 +- fluent-pseudo/CHANGELOG.md | 8 ++++++++ fluent-pseudo/Cargo.toml | 2 +- fluent-resmgr/CHANGELOG.md | 8 ++++++++ fluent-resmgr/Cargo.toml | 2 +- fluent-syntax/CHANGELOG.md | 10 ++++++++++ fluent-syntax/Cargo.toml | 2 +- fluent-testing/CHANGELOG.md | 7 +++++++ fluent-testing/Cargo.toml | 2 +- fluent/CHANGELOG.md | 9 +++++++++ fluent/Cargo.toml | 2 +- intl-memoizer/CHANGELOG.md | 7 +++++++ intl-memoizer/Cargo.toml | 2 +- 16 files changed, 80 insertions(+), 8 deletions(-) diff --git a/fluent-bundle/CHANGELOG.md b/fluent-bundle/CHANGELOG.md index 223ed009..af5bbf46 100644 --- a/fluent-bundle/CHANGELOG.md +++ b/fluent-bundle/CHANGELOG.md @@ -4,6 +4,21 @@ - … +## fluent-bundle 0.15.3 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Implement `From<&String>` for `FluentValue` + - Add `FluentValue.into_string` to prevent String clone + - Fix `FluentValue::try_number` accepting numbers + - Allow optional arguments on `FluentValue` + - Fix behavior of `FluentArgs::set` + - Resolve function instead in `impl ResolveValue` + - Add type alias for concurrent `FluentBundle` + - Fix `FluentBundle::format_pattern` lifetimes + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-bundle 0.15.2 (October 25, 2021) - Bump `self_cell` to 0.10. diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 872ffe13..570b7e44 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -4,7 +4,7 @@ description = """ A localization system designed to unleash the entire expressive power of natural language translations. """ -version = "0.15.2" +version = "0.15.3" edition = "2021" authors = [ "Zibi Braniecki ", diff --git a/fluent-fallback/CHANGELOG.md b/fluent-fallback/CHANGELOG.md index 1c6598c4..739765a4 100644 --- a/fluent-fallback/CHANGELOG.md +++ b/fluent-fallback/CHANGELOG.md @@ -4,6 +4,14 @@ - … +## fluent-fallback 0.7.1 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Migrate to `pin_cell` crate + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-fallback 0.7.0 (Nov 9, 2022) - The `ResourceId`s are now stored as a `HashSet` rather than as a Vec. Adding a duplicate `ResourceId` is now a noop. diff --git a/fluent-fallback/Cargo.toml b/fluent-fallback/Cargo.toml index 0d949d43..113336aa 100644 --- a/fluent-fallback/Cargo.toml +++ b/fluent-fallback/Cargo.toml @@ -4,7 +4,7 @@ description = """ High-level abstraction model for managing localization resources and runtime localization lifecycle. """ -version = "0.7.0" +version = "0.7.1" edition = "2021" authors = [ "Zibi Braniecki ", diff --git a/fluent-pseudo/CHANGELOG.md b/fluent-pseudo/CHANGELOG.md index 9b48172f..75ed61eb 100644 --- a/fluent-pseudo/CHANGELOG.md +++ b/fluent-pseudo/CHANGELOG.md @@ -4,6 +4,14 @@ - … +## fluent-pseudo 0.3.2 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Allow optional resources, adds `ResourceId` struct + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-pseudo 0.3.1 (July 21, 2021) - Update README to document the API changes in 0.3.0. diff --git a/fluent-pseudo/Cargo.toml b/fluent-pseudo/Cargo.toml index fb70c1b8..79df5dc8 100644 --- a/fluent-pseudo/Cargo.toml +++ b/fluent-pseudo/Cargo.toml @@ -3,7 +3,7 @@ name = "fluent-pseudo" description = """ Pseudolocalization transformation API for use with Project Fluent API. """ -version = "0.3.1" +version = "0.3.2" edition = "2021" authors = [ "Zibi Braniecki ", diff --git a/fluent-resmgr/CHANGELOG.md b/fluent-resmgr/CHANGELOG.md index 95fbc64c..671ba65a 100644 --- a/fluent-resmgr/CHANGELOG.md +++ b/fluent-resmgr/CHANGELOG.md @@ -4,6 +4,14 @@ - … +## fluent-resmgr 0.0.7 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Return a result for `ResourceManager::get_resource` + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-resmgr 0.0.6 (Nov 9, 2022) - Update `fluent-fallback` to 0.7.0. diff --git a/fluent-resmgr/Cargo.toml b/fluent-resmgr/Cargo.toml index c73b843b..b3391ff6 100644 --- a/fluent-resmgr/Cargo.toml +++ b/fluent-resmgr/Cargo.toml @@ -3,7 +3,7 @@ name = "fluent-resmgr" description = """ Resource manager for Fluent localization resources. """ -version = "0.0.6" +version = "0.0.7" authors = [ "Zibi Braniecki ", "Staś Małolepszy " diff --git a/fluent-syntax/CHANGELOG.md b/fluent-syntax/CHANGELOG.md index 38f6e9b3..592d10ea 100644 --- a/fluent-syntax/CHANGELOG.md +++ b/fluent-syntax/CHANGELOG.md @@ -4,6 +4,16 @@ - Add module `serializer`. - … +## fluent-syntax 0.11.1 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Create generic ftl serializer `fluent_syntax::serializer` + - Fix crash when parsing multiline CRLF comment + - Treat tab as text, not whitespace, adds `parser::matches_fluent_ws` function + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-syntax 0.11.0 (February 9, 2021) - Document the crate. - Switch to use `thiserror` for Error annotations. diff --git a/fluent-syntax/Cargo.toml b/fluent-syntax/Cargo.toml index 9c306381..0e027bc2 100644 --- a/fluent-syntax/Cargo.toml +++ b/fluent-syntax/Cargo.toml @@ -3,7 +3,7 @@ name = "fluent-syntax" description = """ Parser/Serializer tools for Fluent Syntax. """ -version = "0.11.0" +version = "0.11.1" edition = "2021" authors = [ "Zibi Braniecki ", diff --git a/fluent-testing/CHANGELOG.md b/fluent-testing/CHANGELOG.md index 629bffd8..40aae598 100644 --- a/fluent-testing/CHANGELOG.md +++ b/fluent-testing/CHANGELOG.md @@ -4,6 +4,13 @@ - … +## fluent-testing 0.0.4 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent-resmgr 0.0.3 (Nov 9, 2022) - Update `fluent-fallback` to 0.7.0. diff --git a/fluent-testing/Cargo.toml b/fluent-testing/Cargo.toml index 77d30bb3..3af6a353 100644 --- a/fluent-testing/Cargo.toml +++ b/fluent-testing/Cargo.toml @@ -3,7 +3,7 @@ name = "fluent-testing" description = """ A collection of mock scenarios for testing fluent-rs components. """ -version = "0.0.3" +version = "0.0.4" authors = [ "Zibi Braniecki ", "Erik Nordin " diff --git a/fluent/CHANGELOG.md b/fluent/CHANGELOG.md index bf9057b8..8f1e2b28 100644 --- a/fluent/CHANGELOG.md +++ b/fluent/CHANGELOG.md @@ -4,6 +4,15 @@ - … +## fluent 0.16.1 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Make `fluent_args` macro work with trailing comma + - Fix `FluentValue::try_number` accepting numbers + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## fluent 0.16.0 (July 29, 2021) - Update `fluent-pseudo` to 0.3. diff --git a/fluent/Cargo.toml b/fluent/Cargo.toml index 16ae4cc2..3a773375 100644 --- a/fluent/Cargo.toml +++ b/fluent/Cargo.toml @@ -4,7 +4,7 @@ description = """ A localization system designed to unleash the entire expressive power of natural language translations. """ -version = "0.16.0" +version = "0.16.1" edition = "2021" authors = [ "Zibi Braniecki ", diff --git a/intl-memoizer/CHANGELOG.md b/intl-memoizer/CHANGELOG.md index c749b0e5..b5adfd32 100644 --- a/intl-memoizer/CHANGELOG.md +++ b/intl-memoizer/CHANGELOG.md @@ -4,6 +4,13 @@ - … +## intl-memoizer 0.5.2 (March 16, 2024) + - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers + - Workspace: Update to Rust 2021 + - Workspace: Add various missing documentation and fix typos and links + - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. + - Workspace: Apply rustfmt and clippy lint fixes + ## intl-memoizer 0.5.1 (January 22, 2021) - Update `type-map` to 0.4. diff --git a/intl-memoizer/Cargo.toml b/intl-memoizer/Cargo.toml index c1506146..cae12bf2 100644 --- a/intl-memoizer/Cargo.toml +++ b/intl-memoizer/Cargo.toml @@ -4,7 +4,7 @@ description = """ A memoizer specifically tailored for storing lazy-initialized intl formatters. """ -version = "0.5.1" +version = "0.5.2" edition = "2021" authors = [ "Zibi Braniecki ", From 17fa67bc7eaa8b590ce07e043e63068ce7346d66 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Sun, 5 May 2024 12:42:59 -0700 Subject: [PATCH 2/9] Add versions to cargo --- Cargo.toml | 28 +++++++++++++--------------- fluent-bundle/Cargo.toml | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c70b9b3..4bc232e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,19 +1,17 @@ [workspace] resolver = "2" members = [ - "fluent-syntax", - "fluent-bundle", - "fluent-fallback", - "fluent-resmgr", - "fluent-pseudo", - "fluent-testing", - "fluent", - "intl-memoizer" + "fluent-syntax", + "fluent-bundle", + "fluent-fallback", + "fluent-resmgr", + "fluent-pseudo", + "fluent-testing", + "fluent", + "intl-memoizer", ] -exclude = [ - "fluent-cli", -] +exclude = ["fluent-cli"] [workspace.dependencies] criterion = "0.3" @@ -28,7 +26,7 @@ thiserror = "1.0" tokio = "1.0" unic-langid = "0.9" -fluent-bundle = { path = "fluent-bundle" } -fluent-fallback = { path = "fluent-fallback" } -fluent-pseudo = { path = "fluent-pseudo" } -fluent-syntax = { path = "fluent-syntax" } +fluent-bundle = { version = "0.15.3", path = "fluent-bundle" } +fluent-fallback = { version = "0.7.1", path = "fluent-fallback" } +fluent-pseudo = { version = "0.3.2", path = "fluent-pseudo" } +fluent-syntax = { version = "0.11.1", path = "fluent-syntax" } diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 570b7e44..cb149f08 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -7,8 +7,8 @@ natural language translations. version = "0.15.3" edition = "2021" authors = [ - "Zibi Braniecki ", - "Staś Małolepszy " + "Zibi Braniecki ", + "Staś Małolepszy ", ] homepage = "http://www.projectfluent.org" license = "Apache-2.0 OR MIT" @@ -17,12 +17,12 @@ readme = "README.md" keywords = ["localization", "l10n", "i18n", "intl", "internationalization"] categories = ["localization", "internationalization"] include = [ - "src/**/*", - "benches/*.rs", - "Cargo.toml", - "README.md", - "LICENSE-APACHE", - "LICENSE-MIT" + "src/**/*", + "benches/*.rs", + "Cargo.toml", + "README.md", + "LICENSE-APACHE", + "LICENSE-MIT", ] [dependencies] @@ -31,14 +31,14 @@ fluent-syntax.workspace = true intl_pluralrules.workspace = true rustc-hash.workspace = true unic-langid.workspace = true -intl-memoizer = { path = "../intl-memoizer" } +intl-memoizer = { version = "0.5.2", path = "../intl-memoizer" } self_cell = "0.10" smallvec = "1" [dev-dependencies] criterion.workspace = true iai.workspace = true -serde = { workspace = true, features = ["derive"]} +serde = { workspace = true, features = ["derive"] } unic-langid = { workspace = true, features = ["macros"] } rand = "0.8" serde_yaml = "0.8" From 319dd19e276eda891ed783f825d89dd3b8c6effd Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Mon, 6 May 2024 03:48:21 +0700 Subject: [PATCH 3/9] docs: Fix doc links (#354) --- fluent-fallback/src/lib.rs | 2 +- intl-memoizer/src/concurrent.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fluent-fallback/src/lib.rs b/fluent-fallback/src/lib.rs index 4bd48f66..dee5906f 100644 --- a/fluent-fallback/src/lib.rs +++ b/fluent-fallback/src/lib.rs @@ -62,7 +62,7 @@ //! Resource identifiers can refer to resources that are either required or optional. //! In the above example, `"test.ftl"` is a required resource (the default using `.into()`), //! and `"test2.ftl"` is an optional resource, which you can create via the -//! [`ToResourceId`](fluent_fallback::types::ToResourceId) trait. +//! [`ToResourceId`](types::ToResourceId) trait. //! //! A required resource must be present in order for the a bundle to be considered valid. //! If a required resource is missing for a given locale, a bundle will not be generated for that locale. diff --git a/intl-memoizer/src/concurrent.rs b/intl-memoizer/src/concurrent.rs index 74dc528e..0809f733 100644 --- a/intl-memoizer/src/concurrent.rs +++ b/intl-memoizer/src/concurrent.rs @@ -20,7 +20,7 @@ impl IntlLangMemoizer { } /// Lazily initialize and run a formatter. See - /// [`intl_memoizer::IntlLangMemoizer::with_try_get`](../struct.IntlLangMemoizer.html#method.with_try_get) + /// [`intl_memoizer::IntlLangMemoizer::with_try_get`](crate::IntlLangMemoizer::with_try_get) /// for documentation. pub fn with_try_get(&self, args: I::Args, cb: U) -> Result where From 22a4808517f5fea6c9aa411009389ff553ddd272 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Mon, 6 May 2024 03:58:58 +0700 Subject: [PATCH 4/9] deps(fluent-bundle): Bump serde_yaml to 0.9 from 0.8 (#316) --- fluent-bundle/CHANGELOG.md | 2 +- fluent-bundle/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fluent-bundle/CHANGELOG.md b/fluent-bundle/CHANGELOG.md index af5bbf46..63aa2c2e 100644 --- a/fluent-bundle/CHANGELOG.md +++ b/fluent-bundle/CHANGELOG.md @@ -2,7 +2,7 @@ ## Unreleased - - … + - Bump `serde_yaml` to 0.9. ## fluent-bundle 0.15.3 (March 16, 2024) - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index cb149f08..056c6a88 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -41,7 +41,7 @@ iai.workspace = true serde = { workspace = true, features = ["derive"] } unic-langid = { workspace = true, features = ["macros"] } rand = "0.8" -serde_yaml = "0.8" +serde_yaml = "0.9" [features] default = [] From 8b36e4fe494aef058ea08db0021b26872f996df5 Mon Sep 17 00:00:00 2001 From: Pi-Cla Date: Sun, 5 May 2024 15:07:59 -0600 Subject: [PATCH 5/9] deps(workspace): Bump criterion to 0.5 to avoid dependency on atty (#351) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4bc232e3..fd37988c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ members = [ exclude = ["fluent-cli"] [workspace.dependencies] -criterion = "0.3" +criterion = "0.5" fluent-langneg = "0.13" futures = "0.3" iai = "0.1" From f1c2e66cf42cd00c27c7bc5e5c7ae3f0e83d7308 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Mon, 6 May 2024 04:16:38 +0700 Subject: [PATCH 6/9] deps(fluent-bundle): Bump self_cell to 1.0 from 0.10 (#324) Co-authored-by: Caleb Maclennan --- fluent-bundle/CHANGELOG.md | 5 ++--- fluent-bundle/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fluent-bundle/CHANGELOG.md b/fluent-bundle/CHANGELOG.md index 63aa2c2e..cf626d7d 100644 --- a/fluent-bundle/CHANGELOG.md +++ b/fluent-bundle/CHANGELOG.md @@ -1,8 +1,8 @@ # Changelog ## Unreleased - - - Bump `serde_yaml` to 0.9. + - Bump `self_cell` to 1.x + - Bump `serde_yaml` to 0.9 ## fluent-bundle 0.15.3 (March 16, 2024) - This is a 'safe harbor' release prior to bringing on non-Mozilla community maintainers @@ -18,7 +18,6 @@ - Workspace: Add various missing documentation and fix typos and links - Workspace: Cleanup meta-data using workspaces, use SPDX licenses, etc. - Workspace: Apply rustfmt and clippy lint fixes - ## fluent-bundle 0.15.2 (October 25, 2021) - Bump `self_cell` to 0.10. diff --git a/fluent-bundle/Cargo.toml b/fluent-bundle/Cargo.toml index 056c6a88..00e41ce2 100644 --- a/fluent-bundle/Cargo.toml +++ b/fluent-bundle/Cargo.toml @@ -32,7 +32,7 @@ intl_pluralrules.workspace = true rustc-hash.workspace = true unic-langid.workspace = true intl-memoizer = { version = "0.5.2", path = "../intl-memoizer" } -self_cell = "0.10" +self_cell = "1.0" smallvec = "1" [dev-dependencies] From 2aa38ae307a15952176672c81f5dffa47e2832db Mon Sep 17 00:00:00 2001 From: clubby789 Date: Sun, 5 May 2024 22:41:58 +0100 Subject: [PATCH 7/9] perf: Use `memchr` to optimize `get_text_slice` (#344) --- fluent-syntax/Cargo.toml | 1 + fluent-syntax/src/parser/macros.rs | 6 ++ fluent-syntax/src/parser/pattern.rs | 101 ++++++++++++++++------------ 3 files changed, 64 insertions(+), 44 deletions(-) diff --git a/fluent-syntax/Cargo.toml b/fluent-syntax/Cargo.toml index 0e027bc2..057f3bbc 100644 --- a/fluent-syntax/Cargo.toml +++ b/fluent-syntax/Cargo.toml @@ -25,6 +25,7 @@ include = [ ] [dependencies] +memchr = "2.0" serde = { workspace = true, optional = true, features = ["derive"] } serde_json = { workspace = true, optional = true } thiserror.workspace = true diff --git a/fluent-syntax/src/parser/macros.rs b/fluent-syntax/src/parser/macros.rs index 671d5432..f641c11e 100644 --- a/fluent-syntax/src/parser/macros.rs +++ b/fluent-syntax/src/parser/macros.rs @@ -9,3 +9,9 @@ macro_rules! get_current_byte { $s.source.as_ref().as_bytes().get($s.ptr) }; } + +macro_rules! get_remaining_bytes { + ($s:expr) => { + $s.source.as_ref().as_bytes().get($s.ptr..) + }; +} diff --git a/fluent-syntax/src/parser/pattern.rs b/fluent-syntax/src/parser/pattern.rs index 85c8925e..9ca1229a 100644 --- a/fluent-syntax/src/parser/pattern.rs +++ b/fluent-syntax/src/parser/pattern.rs @@ -157,51 +157,64 @@ where &mut self, ) -> Result<(usize, usize, TextElementType, TextElementTermination)> { let start_pos = self.ptr; - let mut text_element_type = TextElementType::Blank; - - while let Some(b) = get_current_byte!(self) { - match b { - b' ' => self.ptr += 1, - b'\n' => { - self.ptr += 1; - return Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::LineFeed, - )); - } - b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => { - self.ptr += 1; - return Ok(( - start_pos, - self.ptr - 1, - text_element_type, - TextElementTermination::Crlf, - )); - } - b'{' => { - return Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::PlaceableStart, - )); - } - b'}' => { - return error!(ErrorKind::UnbalancedClosingBrace, self.ptr); - } - _ => { - text_element_type = TextElementType::NonBlank; - self.ptr += 1 - } + let Some(rest) = get_remaining_bytes!(self) else { + return Ok(( + start_pos, + self.ptr, + TextElementType::Blank, + TextElementTermination::Eof, + )); + }; + let end = memchr::memchr3(b'\n', b'{', b'}', rest); + let element_type = |text: &[u8]| { + if text.iter().any(|&c| c != b' ') { + TextElementType::NonBlank + } else { + TextElementType::Blank + } + }; + match end.map(|p| &rest[..=p]) { + Some([text @ .., b'}']) => { + self.ptr += text.len(); + error!(ErrorKind::UnbalancedClosingBrace, self.ptr) + } + Some([text @ .., b'\r', b'\n']) => { + self.ptr += text.len() + 1; + Ok(( + start_pos, + self.ptr - 1, + element_type(text), + TextElementTermination::Crlf, + )) + } + Some([text @ .., b'\n']) => { + self.ptr += text.len() + 1; + Ok(( + start_pos, + self.ptr, + element_type(text), + TextElementTermination::LineFeed, + )) + } + Some([text @ .., b'{']) => { + self.ptr += text.len(); + Ok(( + start_pos, + self.ptr, + element_type(text), + TextElementTermination::PlaceableStart, + )) + } + None => { + self.ptr += rest.len(); + Ok(( + start_pos, + self.ptr, + element_type(rest), + TextElementTermination::Eof, + )) } + _ => unreachable!(), } - Ok(( - start_pos, - self.ptr, - text_element_type, - TextElementTermination::Eof, - )) } } From 0b13310a6a25b4cf2b0357f9d5467532b6e743d5 Mon Sep 17 00:00:00 2001 From: JasperDeSutter Date: Wed, 21 Dec 2022 14:54:49 +0100 Subject: [PATCH 8/9] test: Test ownership in unescape_unicode_test --- fluent-syntax/tests/unicode.rs | 36 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/fluent-syntax/tests/unicode.rs b/fluent-syntax/tests/unicode.rs index 0030e70b..138cf91e 100644 --- a/fluent-syntax/tests/unicode.rs +++ b/fluent-syntax/tests/unicode.rs @@ -1,23 +1,33 @@ +use std::borrow::Cow; + use fluent_syntax::unicode::{unescape_unicode, unescape_unicode_to_string}; -fn test_unescape_unicode(input: &str, output: &str) { +/// Asserts that decoding unicode escape sequences in `input` matches `output`. +/// When `borrowed` = true, asserts that the escaped value is passed back by reference. +fn test_unescape_unicode(input: &str, output: &str, borrowed: bool) { let mut s = String::new(); unescape_unicode(&mut s, input).expect("Failed to write."); - assert_eq!(&s, output); + assert_eq!(s, output); let result = unescape_unicode_to_string(input); - assert_eq!(&result, output); + assert_eq!(result, output); + + assert_eq!(matches!(result, Cow::Borrowed(_)), borrowed); } #[test] fn unescape_unicode_test() { - test_unescape_unicode("foo", "foo"); - test_unescape_unicode("foo \\\\", "foo \\"); - test_unescape_unicode("foo \\\"", "foo \""); - test_unescape_unicode("foo \\\\ faa", "foo \\ faa"); - test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii"); - test_unescape_unicode("foo \\\\\\\" faa \\\"\\\\ fii", "foo \\\" faa \"\\ fii"); - test_unescape_unicode("\\u0041\\u004F", "AO"); - test_unescape_unicode("\\uA", "�"); - test_unescape_unicode("\\uA0Pl", "�"); - test_unescape_unicode("\\d Foo", "� Foo"); + test_unescape_unicode("foo", "foo", true); + test_unescape_unicode("foo \\\\", "foo \\", false); + test_unescape_unicode("foo \\\"", "foo \"", false); + test_unescape_unicode("foo \\\\ faa", "foo \\ faa", false); + test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii", false); + test_unescape_unicode( + "foo \\\\\\\" faa \\\"\\\\ fii", + "foo \\\" faa \"\\ fii", + false, + ); + test_unescape_unicode("\\u0041\\u004F", "AO", false); + test_unescape_unicode("\\uA", "�", false); + test_unescape_unicode("\\uA0Pl", "�", false); + test_unescape_unicode("\\d Foo", "� Foo", false); } From b17e47f45210f5b5350e3353e8183e6ec6bbdbd4 Mon Sep 17 00:00:00 2001 From: JasperDeSutter Date: Wed, 21 Dec 2022 14:55:46 +0100 Subject: [PATCH 9/9] refator: Deduplicate implementation of unescape_unicode --- fluent-syntax/src/unicode.rs | 59 ++++++++++++++---------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/fluent-syntax/src/unicode.rs b/fluent-syntax/src/unicode.rs index ab95a868..14a96aff 100644 --- a/fluent-syntax/src/unicode.rs +++ b/fluent-syntax/src/unicode.rs @@ -66,6 +66,16 @@ fn encode_unicode(s: Option<&str>) -> char { /// assert_eq!(s, "Foo 😊 Bar"); /// ``` pub fn unescape_unicode(w: &mut W, input: &str) -> fmt::Result +where + W: fmt::Write, +{ + if unescape(w, input)? { + return Ok(()); + } + w.write_str(input) +} + +fn unescape(w: &mut W, input: &str) -> Result where W: fmt::Write, { @@ -100,10 +110,15 @@ where w.write_char(new_char)?; start = ptr; } + + if start == 0 { + return Ok(false); + } + if start != ptr { w.write_str(&input[start..ptr])?; } - Ok(()) + Ok(true) } /// Unescapes to a `Cow` optionally allocating. @@ -119,41 +134,11 @@ where /// ); /// ``` pub fn unescape_unicode_to_string(input: &str) -> Cow { - let bytes = input.as_bytes(); - let mut result = Cow::from(input); - - let mut ptr = 0; - - while let Some(b) = bytes.get(ptr) { - if b != &b'\\' { - if let Cow::Owned(ref mut s) = result { - s.push(*b as char); - } - ptr += 1; - continue; - } - - if let Cow::Borrowed(_) = result { - result = Cow::from(&input[0..ptr]); - } - - ptr += 1; - - let new_char = match bytes.get(ptr) { - Some(b'\\') => '\\', - Some(b'"') => '"', - Some(u @ b'u') | Some(u @ b'U') => { - let start = ptr + 1; - let len = if u == &b'u' { 4 } else { 6 }; - ptr += len; - input - .get(start..(start + len)) - .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice))) - } - _ => UNKNOWN_CHAR, - }; - result.to_mut().push(new_char); - ptr += 1; + let mut result = String::new(); + let owned = unescape(&mut result, input).expect("String write methods don't Err"); + if owned { + Cow::Owned(result) + } else { + Cow::Borrowed(input) } - result }