From 316249457ba8a246e96cbf56727f4628bfa44f48 Mon Sep 17 00:00:00 2001 From: Tim Zakian <2895723+tzakian@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:55:21 -0700 Subject: [PATCH] [move] Use u256 instead of bigint for parsing (#19998) ## Description Switches to using u256 instead of bigint for parsing. ## Test plan CI + additional tests to make sure parity was preserved. --- crates/sui-types/src/lib.rs | 2 +- external-crates/move/Cargo.toml | 1 - .../move-core-types/src/parsing/address.rs | 9 +- .../move-core-types/src/parsing/parser.rs | 28 ++-- .../move-core-types/src/parsing/types.rs | 8 +- .../move/crates/move-core-types/src/u256.rs | 12 ++ .../src/unit_tests/parsing_test.rs | 142 ++++++++++++++---- 7 files changed, 152 insertions(+), 50 deletions(-) diff --git a/crates/sui-types/src/lib.rs b/crates/sui-types/src/lib.rs index b956b6a38de6c..0a3c4dc45fe90 100644 --- a/crates/sui-types/src/lib.rs +++ b/crates/sui-types/src/lib.rs @@ -384,7 +384,7 @@ mod tests { #[test] fn test_parse_sui_struct_tag_long_account_addr() { let result = parse_sui_struct_tag( - "0x00000000000000000000000000000000000000000000000000000000000000002::sui::SUI", + "0x0000000000000000000000000000000000000000000000000000000000000002::sui::SUI", ) .expect("should not error"); diff --git a/external-crates/move/Cargo.toml b/external-crates/move/Cargo.toml index 3231118dec233..d7f9e31040a67 100644 --- a/external-crates/move/Cargo.toml +++ b/external-crates/move/Cargo.toml @@ -76,7 +76,6 @@ memory-stats = "1.0.0" mirai-annotations = "1.10.1" named-lock = "0.2.0" num = "0.4.0" -num-bigint = "0.4.0" num_cpus = "1.13.0" once_cell = "1.7.2" ouroboros = "0.17.2" diff --git a/external-crates/move/crates/move-core-types/src/parsing/address.rs b/external-crates/move/crates/move-core-types/src/parsing/address.rs index b479e76dcf23c..44ff9810fc613 100644 --- a/external-crates/move/crates/move-core-types/src/parsing/address.rs +++ b/external-crates/move/crates/move-core-types/src/parsing/address.rs @@ -3,8 +3,8 @@ use crate::account_address::AccountAddress; use crate::parsing::parser::{parse_address_number, NumberFormat}; +use crate::u256::U256; use anyhow::anyhow; -use num::BigUint; use std::{fmt, hash::Hash}; // Parsed Address, either a name or a numerical address @@ -62,10 +62,7 @@ impl NumericalAddress { pub fn parse_str(s: &str) -> Result { match parse_address_number(s) { - Some((n, format)) => Ok(NumericalAddress { - bytes: AccountAddress::new(n), - format, - }), + Some((n, format)) => Ok(NumericalAddress { bytes: n, format }), None => // TODO the kind of error is in an unstable nightly API // But currently the only way this should fail is if the number is too long @@ -90,7 +87,7 @@ impl fmt::Display for NumericalAddress { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.format { NumberFormat::Decimal => { - let n = BigUint::from_bytes_be(self.bytes.as_ref()); + let n = U256::from_be_bytes(&self.bytes); write!(f, "{}", n) } NumberFormat::Hex => write!(f, "{:#X}", self), diff --git a/external-crates/move/crates/move-core-types/src/parsing/parser.rs b/external-crates/move/crates/move-core-types/src/parsing/parser.rs index 1245c34239d54..eba50aef13801 100644 --- a/external-crates/move/crates/move-core-types/src/parsing/parser.rs +++ b/external-crates/move/crates/move-core-types/src/parsing/parser.rs @@ -11,11 +11,12 @@ use crate::{ u256::{U256FromStrError, U256}, }; use anyhow::{anyhow, bail, Result}; -use num::BigUint; use std::{fmt::Display, iter::Peekable, num::ParseIntError}; const MAX_TYPE_DEPTH: u64 = 128; const MAX_TYPE_NODE_COUNT: u64 = 256; +// See: https://stackoverflow.com/questions/43787672/the-max-number-of-digits-in-an-int-based-on-number-of-bits +const U256_MAX_DECIMAL_DIGITS: usize = 241 * AccountAddress::LENGTH / 100 + 1; pub trait Token: Display + Copy + Eq { fn is_whitespace(&self) -> bool; @@ -447,20 +448,23 @@ pub fn parse_u256(s: &str) -> Result<(U256, NumberFormat), U256FromStrError> { } // Parse an address from a decimal or hex encoding -pub fn parse_address_number(s: &str) -> Option<([u8; AccountAddress::LENGTH], NumberFormat)> { +pub fn parse_address_number(s: &str) -> Option<(AccountAddress, NumberFormat)> { let (txt, base) = determine_num_text_and_base(s); - let parsed = BigUint::parse_bytes( - txt.as_bytes(), + let txt = txt.replace('_', ""); + let max_len = match base { + NumberFormat::Hex => AccountAddress::LENGTH * 2, + NumberFormat::Decimal => U256_MAX_DECIMAL_DIGITS, + }; + if txt.len() > max_len { + return None; + } + let parsed = U256::from_str_radix( + &txt, match base { NumberFormat::Hex => 16, NumberFormat::Decimal => 10, }, - )?; - let bytes = parsed.to_bytes_be(); - if bytes.len() > AccountAddress::LENGTH { - return None; - } - let mut result = [0u8; AccountAddress::LENGTH]; - result[(AccountAddress::LENGTH - bytes.len())..].clone_from_slice(&bytes); - Some((result, base)) + ) + .ok()?; + Some((AccountAddress::new(parsed.to_be_bytes()), base)) } diff --git a/external-crates/move/crates/move-core-types/src/parsing/types.rs b/external-crates/move/crates/move-core-types/src/parsing/types.rs index f011d4fc62cb9..acdb789d93156 100644 --- a/external-crates/move/crates/move-core-types/src/parsing/types.rs +++ b/external-crates/move/crates/move-core-types/src/parsing/types.rs @@ -91,10 +91,10 @@ impl Token for TypeToken { Some(':') => (Self::ColonColon, 2), _ => bail!("unrecognized token: {}", s), }, - '0' if matches!(chars.peek(), Some('x') | Some('X')) => { + '0' if matches!(chars.peek(), Some('x')) => { chars.next().unwrap(); match chars.next() { - Some(c) if c.is_ascii_hexdigit() || c == '_' => { + Some(c) if c.is_ascii_hexdigit() => { // 0x + c + remaining let len = 3 + chars .take_while(|q| char::is_ascii_hexdigit(q) || *q == '_') @@ -106,7 +106,9 @@ impl Token for TypeToken { } c if c.is_ascii_digit() => { // c + remaining - let len = 1 + chars.take_while(char::is_ascii_digit).count(); + let len = 1 + chars + .take_while(|c| c.is_ascii_digit() || *c == '_') + .count(); (Self::AddressIdent, len) } c if c.is_ascii_whitespace() => { diff --git a/external-crates/move/crates/move-core-types/src/u256.rs b/external-crates/move/crates/move-core-types/src/u256.rs index d47245857df5c..10657683d6eaf 100644 --- a/external-crates/move/crates/move-core-types/src/u256.rs +++ b/external-crates/move/crates/move-core-types/src/u256.rs @@ -308,6 +308,11 @@ impl U256 { Self(PrimitiveU256::from_little_endian(slice)) } + /// U256 from 32 big endian bytes + pub fn from_be_bytes(slice: &[u8; U256_NUM_BYTES]) -> Self { + Self(PrimitiveU256::from_big_endian(slice)) + } + /// U256 to 32 little endian bytes pub fn to_le_bytes(self) -> [u8; U256_NUM_BYTES] { let mut bytes = [0u8; U256_NUM_BYTES]; @@ -315,6 +320,13 @@ impl U256 { bytes } + /// U256 to 32 big endian bytes + pub fn to_be_bytes(self) -> [u8; U256_NUM_BYTES] { + let mut bytes = [0u8; U256_NUM_BYTES]; + self.0.to_big_endian(&mut bytes); + bytes + } + /// Leading zeros of the number pub fn leading_zeros(&self) -> u32 { self.0.leading_zeros() diff --git a/external-crates/move/crates/move-core-types/src/unit_tests/parsing_test.rs b/external-crates/move/crates/move-core-types/src/unit_tests/parsing_test.rs index 426d03abc92a4..2d69de0be29c6 100644 --- a/external-crates/move/crates/move-core-types/src/unit_tests/parsing_test.rs +++ b/external-crates/move/crates/move-core-types/src/unit_tests/parsing_test.rs @@ -11,9 +11,51 @@ use crate::{ u256::U256, }; use anyhow::bail; +use num::BigUint; use proptest::{prelude::*, proptest}; use std::str::FromStr; +const VALID_ADDRS: &[&str] = &[ + "0x0", + "0x1", + "1", + "123", + "0x123", + "0x1234567890abcdef", + "100_00_00", + "0x0_0_0_0", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x0_00000_0000000000000000000000000000000000000000000000000_000000000", + "000000000000000000000000000000000000000000000000000000000000000000000000000000", + "00_0000000000000000000000000000000000000000000000000000000_00000000000000000_0000", +]; + +const INVALID_ADDRS: &[&str] = &[ + "_x", + "0x", + "_0x0", + "_0", + "0x_", + "0x_00", + "+0x0", + "+0", + "0xg", + "0x0g", + "0X0", + "_0x0", + "_0x0_", + "_0", + "_0_", + "_00_", + "_0_0_", + "0x_00", + "0x00000000000000000000000000000000000000000000000000000000000000000", + "0x0000000000000000000000000000000000000000000000000000000000_0000000", + "0x_0_00000_0000000000000000000000000000000000000000000000000_000000000", + "0000000000000000000000000000000000000000000000000000000000000000000000000000000", + "000_0000000000000000000000000000000000000000000000000000000_00000000000000000_0000", +]; + #[allow(clippy::unreadable_literal)] #[test] fn tests_parse_value_positive() { @@ -158,27 +200,6 @@ fn tests_parse_value_negative() { } } -#[test] -fn test_parse_type_negative() { - for s in &[ - "_", - "_::_::_", - "0x1::_", - "0x1::__::_", - "0x1::_::__", - "0x1::_::foo", - "0x1::foo::_", - "0x1::_::_", - "0x1::bar::foo<0x1::_::foo>", - "0X1::bar::bar", - ] { - assert!( - TypeTag::from_str(s).is_err(), - "Parsed type {s} but should have failed" - ); - } -} - #[test] fn test_parse_struct_negative() { for s in &[ @@ -202,6 +223,9 @@ fn test_parse_struct_negative() { "0x1::Foo::Foo,>", "0x1::Foo::Foo>", "0x1::Foo::Foo,", + "_0x0_0::a::a", + "_0x_00::a::a", + "_0_0::a::a", ] { assert!( TypeTag::from_str(s).is_err(), @@ -213,7 +237,12 @@ fn test_parse_struct_negative() { #[test] fn test_type_type() { for s in &[ + "u8", + "u16", + "u32", "u64", + "u128", + "u256", "bool", "vector", "vector>", @@ -234,9 +263,27 @@ fn test_type_type() { "0x1::__::__", "0x1::_bar::_BAR<0x2::_____::______fooo______>", "0x1::__::__<0x2::_____::______fooo______, 0xff::Bar____::_______foo>", + "0x0_0::a::a", + "0_0::a::a", ] { assert!(TypeTag::from_str(s).is_ok(), "Failed to parse type {}", s); } + + for valid_addr in VALID_ADDRS { + assert!( + TypeTag::from_str(&format!("{valid_addr}::a::a")).is_ok(), + "Failed to parse type {}::a::a", + valid_addr + ); + } + + for invalid_addr in INVALID_ADDRS { + assert!( + TypeTag::from_str(&format!("{invalid_addr}::a::a")).is_err(), + "Parse type {}::a::a but should have failed", + invalid_addr + ); + } } #[test] @@ -281,17 +328,17 @@ fn test_parse_valid_struct_type() { #[test] fn test_parse_type_list() { - let valid_with_trails = vec![ + let valid_with_trails = &[ "", "", ",>", ]; - let valid_no_trails = vec![ + let valid_no_trails = &[ "", "", ">", ]; - let invalid = vec![ + let invalid = &[ "<>", "<,>", "", @@ -327,15 +374,15 @@ fn test_parse_type_list() { assert!(parse_type_tags(t, true).is_ok()); } - for t in &valid_no_trails { + for t in valid_no_trails { assert!(parse_type_tags(t, false).is_ok()); } - for t in &valid_with_trails { + for t in valid_with_trails { assert!(parse_type_tags(t, false).is_err()); } - for t in &invalid { + for t in invalid { assert!(parse_type_tags(t, true).is_err(), "parsed type {}", t); assert!(parse_type_tags(t, false).is_err(), "parsed type {}", t); } @@ -396,6 +443,21 @@ fn parse_type_tags(s: &str, allow_trailing_delim: bool) -> anyhow::Result