From 9a199b4a1c501cc72e76e04f6f40941629e0d5ad Mon Sep 17 00:00:00 2001 From: Ivan Medina Date: Tue, 15 Oct 2024 11:07:51 +0200 Subject: [PATCH] feat(string): add normalize method (#441) --- Cargo.toml | 1 + nova_vm/Cargo.toml | 1 + .../string_objects/string_prototype.rs | 87 ++++++++++++++++++- tests/expectations.json | 10 --- tests/metrics.json | 4 +- 5 files changed, 88 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 64b1a367..f08ab537 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,4 +23,5 @@ oxc_syntax = "0.30.3" rand = "0.8.5" ryu-js = "1.0.1" sonic-rs = "0.3.13" +unicode-normalization = "0.1.24" wtf8 = "0.1" diff --git a/nova_vm/Cargo.toml b/nova_vm/Cargo.toml index 103c7489..d2433b3d 100644 --- a/nova_vm/Cargo.toml +++ b/nova_vm/Cargo.toml @@ -20,6 +20,7 @@ rand = { workspace = true } ryu-js = { workspace = true } small_string = { path = "../small_string" } sonic-rs = { workspace = true, optional = true } +unicode-normalization = { workspace = true } wtf8 = { workspace = true } [features] diff --git a/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs b/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs index 10cc8393..6ee4d6b9 100644 --- a/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs +++ b/nova_vm/src/ecmascript/builtins/text_processing/string_objects/string_prototype.rs @@ -2,9 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::{cmp::max, collections::VecDeque, iter::repeat}; +use std::{cmp::max, collections::VecDeque, iter::repeat, str::FromStr}; use small_string::SmallString; +use unicode_normalization::{ + is_nfc_quick, is_nfd_quick, is_nfkc_quick, is_nfkd_quick, IsNormalized, UnicodeNormalization, +}; use crate::{ ecmascript::{ @@ -567,8 +570,44 @@ impl StringPrototype { todo!() } - fn normalize(_agent: &mut Agent, _this_value: Value, _: ArgumentsList) -> JsResult { - todo!() + /// ### [22.1.3.15 String.prototype.normalize ( \[ form \] )](https://tc39.es/ecma262/#sec-string.prototype.normalize) + fn normalize( + agent: &mut Agent, + this_value: Value, + arguments: ArgumentsList, + ) -> JsResult { + // 1. Let O be ? RequireObjectCoercible(this value). + let o = require_object_coercible(agent, this_value)?; + + // 2. Let S be ? ToString(O). + let s = to_string(agent, o)?; + + // 3. If form is undefined, let f be "NFC". + let form = arguments.get(0); + let f = if form.is_undefined() { + NormalizeForm::Nfc + } else { + // 4. Else, let f be ? ToString(form). + let f = to_string(agent, form)?; + let form_result = NormalizeForm::from_str(f.as_str(agent)); + match form_result { + Ok(form) => form, + // 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception. + Err(()) => { + return Err(agent.throw_exception_with_static_message( + ExceptionType::RangeError, + "The normalization form should be one of NFC, NFD, NFKC, NFKD.", + )) + } + } + }; + + // 6. Let ns be the String value that is the result of normalizing S into the normalization form named by f as specified in the latest Unicode Standard, Normalization Forms. + match unicode_normalize(s.as_str(agent), f) { + // 7. Return ns. + None => Ok(s.into_value()), + Some(ns) => Ok(Value::from_string(agent, ns).into_value()), + } } /// ### [22.1.3.16 String.prototype.padEnd ( maxLength \[ , fillString \] )](https://tc39.es/ecma262/#sec-string.prototype.padend) @@ -1478,3 +1517,45 @@ enum TrimWhere { End, StartAndEnd, } + +enum NormalizeForm { + Nfc, + Nfd, + Nfkc, + Nfkd, +} + +impl FromStr for NormalizeForm { + type Err = (); + + fn from_str(input: &str) -> Result { + match input { + "NFC" => Ok(NormalizeForm::Nfc), + "NFD" => Ok(NormalizeForm::Nfd), + "NFKC" => Ok(NormalizeForm::Nfkc), + "NFKD" => Ok(NormalizeForm::Nfkd), + _ => Err(()), + } + } +} + +fn unicode_normalize(s: &str, f: NormalizeForm) -> Option { + match f { + NormalizeForm::Nfc => match is_nfc_quick(s.chars()) { + IsNormalized::Yes => None, + _ => Some(s.nfc().collect::()), + }, + NormalizeForm::Nfd => match is_nfd_quick(s.chars()) { + IsNormalized::Yes => None, + _ => Some(s.nfd().collect::()), + }, + NormalizeForm::Nfkc => match is_nfkc_quick(s.chars()) { + IsNormalized::Yes => None, + _ => Some(s.nfkc().collect::()), + }, + NormalizeForm::Nfkd => match is_nfkd_quick(s.chars()) { + IsNormalized::Yes => None, + _ => Some(s.nfkd().collect::()), + }, + } +} diff --git a/tests/expectations.json b/tests/expectations.json index 736549b1..7eb1f549 100644 --- a/tests/expectations.json +++ b/tests/expectations.json @@ -6207,16 +6207,6 @@ "built-ins/String/prototype/matchAll/regexp-prototype-matchAll-v-u-flag.js": "CRASH", "built-ins/String/prototype/matchAll/this-val-non-obj-coercible.js": "CRASH", "built-ins/String/prototype/matchAll/toString-this-val.js": "CRASH", - "built-ins/String/prototype/normalize/form-is-not-valid-throws.js": "CRASH", - "built-ins/String/prototype/normalize/return-abrupt-from-form-as-symbol.js": "CRASH", - "built-ins/String/prototype/normalize/return-abrupt-from-form.js": "CRASH", - "built-ins/String/prototype/normalize/return-abrupt-from-this-as-symbol.js": "CRASH", - "built-ins/String/prototype/normalize/return-abrupt-from-this.js": "CRASH", - "built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form.js": "CRASH", - "built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js": "CRASH", - "built-ins/String/prototype/normalize/return-normalized-string.js": "CRASH", - "built-ins/String/prototype/normalize/this-is-null-throws.js": "CRASH", - "built-ins/String/prototype/normalize/this-is-undefined-throws.js": "CRASH", "built-ins/String/prototype/padEnd/normal-operation.js": "CRASH", "built-ins/String/prototype/padStart/normal-operation.js": "CRASH", "built-ins/String/prototype/repeat/repeat-string-n-times.js": "TIMEOUT", diff --git a/tests/metrics.json b/tests/metrics.json index 6bbd81a3..19f5dd95 100644 --- a/tests/metrics.json +++ b/tests/metrics.json @@ -1,8 +1,8 @@ { "results": { - "crash": 16307, + "crash": 16297, "fail": 8261, - "pass": 20680, + "pass": 20690, "skip": 40, "timeout": 3, "unresolved": 0