Skip to content

Commit

Permalink
feat(string): add normalize method (#441)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivandevp authored Oct 15, 2024
1 parent 50e4856 commit 9a199b4
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 15 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ oxc_syntax = "0.30.3"
rand = "0.8.5"
ryu-js = "1.0.1"
sonic-rs = "0.3.13"
unicode-normalization = "0.1.24"
wtf8 = "0.1"
1 change: 1 addition & 0 deletions nova_vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ rand = { workspace = true }
ryu-js = { workspace = true }
small_string = { path = "../small_string" }
sonic-rs = { workspace = true, optional = true }
unicode-normalization = { workspace = true }
wtf8 = { workspace = true }

[features]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use std::{cmp::max, collections::VecDeque, iter::repeat};
use std::{cmp::max, collections::VecDeque, iter::repeat, str::FromStr};

use small_string::SmallString;
use unicode_normalization::{
is_nfc_quick, is_nfd_quick, is_nfkc_quick, is_nfkd_quick, IsNormalized, UnicodeNormalization,
};

use crate::{
ecmascript::{
Expand Down Expand Up @@ -567,8 +570,44 @@ impl StringPrototype {
todo!()
}

fn normalize(_agent: &mut Agent, _this_value: Value, _: ArgumentsList) -> JsResult<Value> {
todo!()
/// ### [22.1.3.15 String.prototype.normalize ( \[ form \] )](https://tc39.es/ecma262/#sec-string.prototype.normalize)
fn normalize(
agent: &mut Agent,
this_value: Value,
arguments: ArgumentsList,
) -> JsResult<Value> {
// 1. Let O be ? RequireObjectCoercible(this value).
let o = require_object_coercible(agent, this_value)?;

// 2. Let S be ? ToString(O).
let s = to_string(agent, o)?;

// 3. If form is undefined, let f be "NFC".
let form = arguments.get(0);
let f = if form.is_undefined() {
NormalizeForm::Nfc
} else {
// 4. Else, let f be ? ToString(form).
let f = to_string(agent, form)?;
let form_result = NormalizeForm::from_str(f.as_str(agent));
match form_result {
Ok(form) => form,
// 5. If f is not one of "NFC", "NFD", "NFKC", or "NFKD", throw a RangeError exception.
Err(()) => {
return Err(agent.throw_exception_with_static_message(
ExceptionType::RangeError,
"The normalization form should be one of NFC, NFD, NFKC, NFKD.",
))
}
}
};

// 6. Let ns be the String value that is the result of normalizing S into the normalization form named by f as specified in the latest Unicode Standard, Normalization Forms.
match unicode_normalize(s.as_str(agent), f) {
// 7. Return ns.
None => Ok(s.into_value()),
Some(ns) => Ok(Value::from_string(agent, ns).into_value()),
}
}

/// ### [22.1.3.16 String.prototype.padEnd ( maxLength \[ , fillString \] )](https://tc39.es/ecma262/#sec-string.prototype.padend)
Expand Down Expand Up @@ -1478,3 +1517,45 @@ enum TrimWhere {
End,
StartAndEnd,
}

enum NormalizeForm {
Nfc,
Nfd,
Nfkc,
Nfkd,
}

impl FromStr for NormalizeForm {
type Err = ();

fn from_str(input: &str) -> Result<NormalizeForm, Self::Err> {
match input {
"NFC" => Ok(NormalizeForm::Nfc),
"NFD" => Ok(NormalizeForm::Nfd),
"NFKC" => Ok(NormalizeForm::Nfkc),
"NFKD" => Ok(NormalizeForm::Nfkd),
_ => Err(()),
}
}
}

fn unicode_normalize(s: &str, f: NormalizeForm) -> Option<std::string::String> {
match f {
NormalizeForm::Nfc => match is_nfc_quick(s.chars()) {
IsNormalized::Yes => None,
_ => Some(s.nfc().collect::<std::string::String>()),
},
NormalizeForm::Nfd => match is_nfd_quick(s.chars()) {
IsNormalized::Yes => None,
_ => Some(s.nfd().collect::<std::string::String>()),
},
NormalizeForm::Nfkc => match is_nfkc_quick(s.chars()) {
IsNormalized::Yes => None,
_ => Some(s.nfkc().collect::<std::string::String>()),
},
NormalizeForm::Nfkd => match is_nfkd_quick(s.chars()) {
IsNormalized::Yes => None,
_ => Some(s.nfkd().collect::<std::string::String>()),
},
}
}
10 changes: 0 additions & 10 deletions tests/expectations.json
Original file line number Diff line number Diff line change
Expand Up @@ -6207,16 +6207,6 @@
"built-ins/String/prototype/matchAll/regexp-prototype-matchAll-v-u-flag.js": "CRASH",
"built-ins/String/prototype/matchAll/this-val-non-obj-coercible.js": "CRASH",
"built-ins/String/prototype/matchAll/toString-this-val.js": "CRASH",
"built-ins/String/prototype/normalize/form-is-not-valid-throws.js": "CRASH",
"built-ins/String/prototype/normalize/return-abrupt-from-form-as-symbol.js": "CRASH",
"built-ins/String/prototype/normalize/return-abrupt-from-form.js": "CRASH",
"built-ins/String/prototype/normalize/return-abrupt-from-this-as-symbol.js": "CRASH",
"built-ins/String/prototype/normalize/return-abrupt-from-this.js": "CRASH",
"built-ins/String/prototype/normalize/return-normalized-string-from-coerced-form.js": "CRASH",
"built-ins/String/prototype/normalize/return-normalized-string-using-default-parameter.js": "CRASH",
"built-ins/String/prototype/normalize/return-normalized-string.js": "CRASH",
"built-ins/String/prototype/normalize/this-is-null-throws.js": "CRASH",
"built-ins/String/prototype/normalize/this-is-undefined-throws.js": "CRASH",
"built-ins/String/prototype/padEnd/normal-operation.js": "CRASH",
"built-ins/String/prototype/padStart/normal-operation.js": "CRASH",
"built-ins/String/prototype/repeat/repeat-string-n-times.js": "TIMEOUT",
Expand Down
4 changes: 2 additions & 2 deletions tests/metrics.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"results": {
"crash": 16307,
"crash": 16297,
"fail": 8261,
"pass": 20680,
"pass": 20690,
"skip": 40,
"timeout": 3,
"unresolved": 0
Expand Down

0 comments on commit 9a199b4

Please sign in to comment.