Skip to content

Commit

Permalink
fix small string
Browse files Browse the repository at this point in the history
Co-authored-by: Aapo Alasuutari <[email protected]>
  • Loading branch information
sno2 and aapoalas committed Jul 24, 2023
1 parent d1051f6 commit b60617c
Showing 1 changed file with 63 additions and 12 deletions.
75 changes: 63 additions & 12 deletions nova_vm/src/small_string.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[derive(Clone, Copy)]
pub struct SmallString {
data: [u8; 7],
bytes: [u8; 7],
}

impl std::fmt::Debug for SmallString {
Expand All @@ -11,33 +11,84 @@ impl std::fmt::Debug for SmallString {

impl SmallString {
pub fn len(&self) -> usize {
self.data.iter().position(|byte| *byte == 0).unwrap_or(7)
// Find the last non-null character and add one to its index to get length.
self.bytes
.as_slice()
.iter()
.rev()
.position(|&x| x != 0)
.map_or(0, |i| 7 - i)
}

#[inline]
pub fn as_str(&self) -> &str {
// SAFETY: Guaranteed to be ASCII, which is a subset of UTF-8.
unsafe { &std::str::from_utf8_unchecked(self.as_bytes()) }
}

#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.data[0..self.len()]
&self.bytes.as_slice().split_at(self.len()).0
}

pub fn as_str(&self) -> &str {
// SAFETY: Guaranteed to be valid UTF-8.
unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
#[inline]
pub fn data(&self) -> &[u8; 7] {
return &self.bytes;
}

pub(crate) fn from_str_unchecked(value: &str) -> Self {
debug_assert!(value.len() < 8 && !value.as_bytes().contains(&0));
let mut data: [u8; 7] = [0, 0, 0, 0, 0, 0, 0];
data.copy_from_slice(value.as_bytes());
Self { data }
pub(crate) fn from_str_unchecked(string: &str) -> Self {
let string_bytes = string.as_bytes();

// We have only 7 bytes to work with, and we cannot tell apart
// UTF-8 strings that end with a null byte from our null
// terminator so we must fail to convert on those.
debug_assert!(string_bytes.len() < 8 && string_bytes.last() != Some(&0));

let mut bytes = [0, 0, 0, 0, 0, 0, 0];
bytes
.as_mut_slice()
.split_at_mut(string_bytes.len())
.0
.copy_from_slice(string_bytes);

Self { bytes }
}
}

impl TryFrom<&str> for SmallString {
type Error = ();
fn try_from(value: &str) -> Result<Self, Self::Error> {
if value.len() < 8 && !value.as_bytes().contains(&0) {
// We have only 7 bytes to work with, and we cannot tell apart
// UTF-8 strings that end with a null byte from our null
// terminator so we must fail to convert on those.
if value.len() < 8 && value.as_bytes().last() != Some(&0) {
Ok(Self::from_str_unchecked(value))
} else {
Err(())
}
}
}

#[test]
fn valid_stack_strings() {
assert!(SmallString::try_from("").is_ok());
assert_eq!(SmallString::try_from("").unwrap().len(), 0);
assert!(SmallString::try_from("asd").is_ok());
assert_eq!(SmallString::try_from("asd").unwrap().len(), 3);
assert!(SmallString::try_from("asdasd").is_ok());
assert_eq!(SmallString::try_from("asdasd").unwrap().len(), 6);
assert!(SmallString::try_from("asdasda").is_ok());
assert_eq!(SmallString::try_from("asdasda").unwrap().len(), 7);
assert!(SmallString::try_from("asd76fd").is_ok());
assert_eq!(SmallString::try_from("asd76fd").unwrap().len(), 7);
assert!(SmallString::try_from("💩").is_ok());
assert_eq!(SmallString::try_from("💩 ").unwrap().len(), 5);
assert!(SmallString::try_from("asd\0foo").is_ok());
assert_eq!(SmallString::try_from("asd\0foo").unwrap().len(), 7);
}

#[test]
fn not_valid_stack_strings() {
assert!(SmallString::try_from("asd asd r 547 gdfg").is_err());
assert!(SmallString::try_from("asdfoo\0").is_err());
}

0 comments on commit b60617c

Please sign in to comment.