Skip to content

Commit

Permalink
[move] Use u256 instead of bigint for parsing (#19998)
Browse files Browse the repository at this point in the history
## Description 

Switches to using u256 instead of bigint for parsing. 

## Test plan 

CI + additional tests to make sure parity was preserved.
  • Loading branch information
tzakian authored Oct 25, 2024
1 parent c4826d0 commit 3162494
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 50 deletions.
2 changes: 1 addition & 1 deletion crates/sui-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ mod tests {
#[test]
fn test_parse_sui_struct_tag_long_account_addr() {
let result = parse_sui_struct_tag(
"0x00000000000000000000000000000000000000000000000000000000000000002::sui::SUI",
"0x0000000000000000000000000000000000000000000000000000000000000002::sui::SUI",
)
.expect("should not error");

Expand Down
1 change: 0 additions & 1 deletion external-crates/move/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ memory-stats = "1.0.0"
mirai-annotations = "1.10.1"
named-lock = "0.2.0"
num = "0.4.0"
num-bigint = "0.4.0"
num_cpus = "1.13.0"
once_cell = "1.7.2"
ouroboros = "0.17.2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

use crate::account_address::AccountAddress;
use crate::parsing::parser::{parse_address_number, NumberFormat};
use crate::u256::U256;
use anyhow::anyhow;
use num::BigUint;
use std::{fmt, hash::Hash};

// Parsed Address, either a name or a numerical address
Expand Down Expand Up @@ -62,10 +62,7 @@ impl NumericalAddress {

pub fn parse_str(s: &str) -> Result<NumericalAddress, String> {
match parse_address_number(s) {
Some((n, format)) => Ok(NumericalAddress {
bytes: AccountAddress::new(n),
format,
}),
Some((n, format)) => Ok(NumericalAddress { bytes: n, format }),
None =>
// TODO the kind of error is in an unstable nightly API
// But currently the only way this should fail is if the number is too long
Expand All @@ -90,7 +87,7 @@ impl fmt::Display for NumericalAddress {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.format {
NumberFormat::Decimal => {
let n = BigUint::from_bytes_be(self.bytes.as_ref());
let n = U256::from_be_bytes(&self.bytes);
write!(f, "{}", n)
}
NumberFormat::Hex => write!(f, "{:#X}", self),
Expand Down
28 changes: 16 additions & 12 deletions external-crates/move/crates/move-core-types/src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ use crate::{
u256::{U256FromStrError, U256},
};
use anyhow::{anyhow, bail, Result};
use num::BigUint;
use std::{fmt::Display, iter::Peekable, num::ParseIntError};

const MAX_TYPE_DEPTH: u64 = 128;
const MAX_TYPE_NODE_COUNT: u64 = 256;
// See: https://stackoverflow.com/questions/43787672/the-max-number-of-digits-in-an-int-based-on-number-of-bits
const U256_MAX_DECIMAL_DIGITS: usize = 241 * AccountAddress::LENGTH / 100 + 1;

pub trait Token: Display + Copy + Eq {
fn is_whitespace(&self) -> bool;
Expand Down Expand Up @@ -447,20 +448,23 @@ pub fn parse_u256(s: &str) -> Result<(U256, NumberFormat), U256FromStrError> {
}

// Parse an address from a decimal or hex encoding
pub fn parse_address_number(s: &str) -> Option<([u8; AccountAddress::LENGTH], NumberFormat)> {
pub fn parse_address_number(s: &str) -> Option<(AccountAddress, NumberFormat)> {
let (txt, base) = determine_num_text_and_base(s);
let parsed = BigUint::parse_bytes(
txt.as_bytes(),
let txt = txt.replace('_', "");
let max_len = match base {
NumberFormat::Hex => AccountAddress::LENGTH * 2,
NumberFormat::Decimal => U256_MAX_DECIMAL_DIGITS,
};
if txt.len() > max_len {
return None;
}
let parsed = U256::from_str_radix(
&txt,
match base {
NumberFormat::Hex => 16,
NumberFormat::Decimal => 10,
},
)?;
let bytes = parsed.to_bytes_be();
if bytes.len() > AccountAddress::LENGTH {
return None;
}
let mut result = [0u8; AccountAddress::LENGTH];
result[(AccountAddress::LENGTH - bytes.len())..].clone_from_slice(&bytes);
Some((result, base))
)
.ok()?;
Some((AccountAddress::new(parsed.to_be_bytes()), base))
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ impl Token for TypeToken {
Some(':') => (Self::ColonColon, 2),
_ => bail!("unrecognized token: {}", s),
},
'0' if matches!(chars.peek(), Some('x') | Some('X')) => {
'0' if matches!(chars.peek(), Some('x')) => {
chars.next().unwrap();
match chars.next() {
Some(c) if c.is_ascii_hexdigit() || c == '_' => {
Some(c) if c.is_ascii_hexdigit() => {
// 0x + c + remaining
let len = 3 + chars
.take_while(|q| char::is_ascii_hexdigit(q) || *q == '_')
Expand All @@ -106,7 +106,9 @@ impl Token for TypeToken {
}
c if c.is_ascii_digit() => {
// c + remaining
let len = 1 + chars.take_while(char::is_ascii_digit).count();
let len = 1 + chars
.take_while(|c| c.is_ascii_digit() || *c == '_')
.count();
(Self::AddressIdent, len)
}
c if c.is_ascii_whitespace() => {
Expand Down
12 changes: 12 additions & 0 deletions external-crates/move/crates/move-core-types/src/u256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,13 +308,25 @@ impl U256 {
Self(PrimitiveU256::from_little_endian(slice))
}

/// U256 from 32 big endian bytes
pub fn from_be_bytes(slice: &[u8; U256_NUM_BYTES]) -> Self {
Self(PrimitiveU256::from_big_endian(slice))
}

/// U256 to 32 little endian bytes
pub fn to_le_bytes(self) -> [u8; U256_NUM_BYTES] {
let mut bytes = [0u8; U256_NUM_BYTES];
self.0.to_little_endian(&mut bytes);
bytes
}

/// U256 to 32 big endian bytes
pub fn to_be_bytes(self) -> [u8; U256_NUM_BYTES] {
let mut bytes = [0u8; U256_NUM_BYTES];
self.0.to_big_endian(&mut bytes);
bytes
}

/// Leading zeros of the number
pub fn leading_zeros(&self) -> u32 {
self.0.leading_zeros()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,51 @@ use crate::{
u256::U256,
};
use anyhow::bail;
use num::BigUint;
use proptest::{prelude::*, proptest};
use std::str::FromStr;

const VALID_ADDRS: &[&str] = &[
"0x0",
"0x1",
"1",
"123",
"0x123",
"0x1234567890abcdef",
"100_00_00",
"0x0_0_0_0",
"0x0000000000000000000000000000000000000000000000000000000000000000",
"0x0_00000_0000000000000000000000000000000000000000000000000_000000000",
"000000000000000000000000000000000000000000000000000000000000000000000000000000",
"00_0000000000000000000000000000000000000000000000000000000_00000000000000000_0000",
];

const INVALID_ADDRS: &[&str] = &[
"_x",
"0x",
"_0x0",
"_0",
"0x_",
"0x_00",
"+0x0",
"+0",
"0xg",
"0x0g",
"0X0",
"_0x0",
"_0x0_",
"_0",
"_0_",
"_00_",
"_0_0_",
"0x_00",
"0x00000000000000000000000000000000000000000000000000000000000000000",
"0x0000000000000000000000000000000000000000000000000000000000_0000000",
"0x_0_00000_0000000000000000000000000000000000000000000000000_000000000",
"0000000000000000000000000000000000000000000000000000000000000000000000000000000",
"000_0000000000000000000000000000000000000000000000000000000_00000000000000000_0000",
];

#[allow(clippy::unreadable_literal)]
#[test]
fn tests_parse_value_positive() {
Expand Down Expand Up @@ -158,27 +200,6 @@ fn tests_parse_value_negative() {
}
}

#[test]
fn test_parse_type_negative() {
for s in &[
"_",
"_::_::_",
"0x1::_",
"0x1::__::_",
"0x1::_::__",
"0x1::_::foo",
"0x1::foo::_",
"0x1::_::_",
"0x1::bar::foo<0x1::_::foo>",
"0X1::bar::bar",
] {
assert!(
TypeTag::from_str(s).is_err(),
"Parsed type {s} but should have failed"
);
}
}

#[test]
fn test_parse_struct_negative() {
for s in &[
Expand All @@ -202,6 +223,9 @@ fn test_parse_struct_negative() {
"0x1::Foo::Foo,>",
"0x1::Foo::Foo>",
"0x1::Foo::Foo,",
"_0x0_0::a::a",
"_0x_00::a::a",
"_0_0::a::a",
] {
assert!(
TypeTag::from_str(s).is_err(),
Expand All @@ -213,7 +237,12 @@ fn test_parse_struct_negative() {
#[test]
fn test_type_type() {
for s in &[
"u8",
"u16",
"u32",
"u64",
"u128",
"u256",
"bool",
"vector<u8>",
"vector<vector<u64>>",
Expand All @@ -234,9 +263,27 @@ fn test_type_type() {
"0x1::__::__",
"0x1::_bar::_BAR<0x2::_____::______fooo______>",
"0x1::__::__<0x2::_____::______fooo______, 0xff::Bar____::_______foo>",
"0x0_0::a::a",
"0_0::a::a",
] {
assert!(TypeTag::from_str(s).is_ok(), "Failed to parse type {}", s);
}

for valid_addr in VALID_ADDRS {
assert!(
TypeTag::from_str(&format!("{valid_addr}::a::a")).is_ok(),
"Failed to parse type {}::a::a",
valid_addr
);
}

for invalid_addr in INVALID_ADDRS {
assert!(
TypeTag::from_str(&format!("{invalid_addr}::a::a")).is_err(),
"Parse type {}::a::a but should have failed",
invalid_addr
);
}
}

#[test]
Expand Down Expand Up @@ -281,17 +328,17 @@ fn test_parse_valid_struct_type() {

#[test]
fn test_parse_type_list() {
let valid_with_trails = vec![
let valid_with_trails = &[
"<u64,>",
"<u64, 0x0::a::a,>",
"<u64, 0x0::a::a, 0x0::a::a<0x0::a::a>,>",
];
let valid_no_trails = vec![
let valid_no_trails = &[
"<u64>",
"<u64, 0x0::a::a>",
"<u64, 0x0::a::a, 0x0::a::a<0x0::a::a>>",
];
let invalid = vec![
let invalid = &[
"<>",
"<,>",
"<u64,,>",
Expand Down Expand Up @@ -327,15 +374,15 @@ fn test_parse_type_list() {
assert!(parse_type_tags(t, true).is_ok());
}

for t in &valid_no_trails {
for t in valid_no_trails {
assert!(parse_type_tags(t, false).is_ok());
}

for t in &valid_with_trails {
for t in valid_with_trails {
assert!(parse_type_tags(t, false).is_err());
}

for t in &invalid {
for t in invalid {
assert!(parse_type_tags(t, true).is_err(), "parsed type {}", t);
assert!(parse_type_tags(t, false).is_err(), "parsed type {}", t);
}
Expand Down Expand Up @@ -396,6 +443,21 @@ fn parse_type_tags(s: &str, allow_trailing_delim: bool) -> anyhow::Result<Vec<Pa
})
}

#[test]
fn address_parsing() {
for valid_addr in VALID_ADDRS {
assert!(
ParsedAddress::parse(valid_addr).is_ok(),
"parsed address {}",
valid_addr
);
}

for invalid_addr in INVALID_ADDRS {
assert!(ParsedAddress::parse(invalid_addr).is_err());
}
}

proptest! {
#[test]
fn parse_type_tag_list(t in struct_type_gen0(), args in proptest::collection::vec(struct_type_gen0(), 1..=100)) {
Expand Down Expand Up @@ -534,6 +596,32 @@ proptest! {
prop_assert!(TypeTag::from_str(&s).is_err());
}

#[test]
fn decimal_parse_parity(s in "[0-9]{64}") {
let bigint_parsed = {
let bytes = BigUint::parse_bytes(s.as_bytes(), 10).unwrap().to_bytes_be();
let mut result = [0u8; AccountAddress::LENGTH];
result[(AccountAddress::LENGTH - bytes.len())..].clone_from_slice(&bytes);
result
};
let u256_parsed = U256::from_str(&s).unwrap();
prop_assert_eq!(bigint_parsed, u256_parsed.to_be_bytes(), "Parsed addresses do not match: {}", s);
}

#[test]
fn hex_parse_parity(s in "0x[0-9a-fA-F]{1,64}") {
let bigint_parsed = {
let bytes = BigUint::parse_bytes(s[2..].as_bytes(), 16).unwrap().to_bytes_be();
let mut result = [0u8; AccountAddress::LENGTH];
result[(AccountAddress::LENGTH - bytes.len())..].clone_from_slice(&bytes);
result
};
let addr_parsed = AccountAddress::from_hex_literal(&s).unwrap().into_bytes();
let u256_parsed = AccountAddress::new(U256::from_str_radix(&s[2..], 16).unwrap().to_be_bytes()).into_bytes();
prop_assert_eq!(bigint_parsed, addr_parsed, "Parsed addresses do not match: {}", s);
prop_assert_eq!(addr_parsed, u256_parsed, "Parsed addresses do not match: {}", s);
}

#[test]
fn test_parse_different_length_numeric_addresses(s in "[0-9]{1,63}", x in r#"[^a-zA-Z0-9_\s]+"#) {
prop_assert!(AccountAddress::from_str(&s).is_err());
Expand Down

0 comments on commit 3162494

Please sign in to comment.