Skip to content

Commit

Permalink
feat: parallelize final assembly step in encode using rayon
Browse files Browse the repository at this point in the history
- gives nice improvments on huge bocs
```
encode                  time:   [35.299 ms 35.359 ms 35.422 ms]                    
                        change: [-25.523% -25.318% -25.115%] (p = 0.00 < 0.05)
                        Performance has improved.
```
  • Loading branch information
0xdeafbeef committed Jul 22, 2024
1 parent d182e97 commit 7070289
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 24 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ hex = "0.4"
num-bigint = { version = "0.4", optional = true }
num-traits = { version = "0.2", optional = true }
rand = { version = "0.8", optional = true }
rayon = { version = "1.10.0", optional = false }
scc = { version = "2.1", optional = true }
serde = { version = "1", features = ["derive"], optional = true }
sha2 = "0.10"
Expand All @@ -50,11 +51,11 @@ tl-proto = { version = "0.4", optional = true }

everscale-types-proc = { version = "=0.1.4", path = "proc" }


[dev-dependencies]
anyhow = "1.0"
base64 = "0.21"
criterion = "0.5"
libc = "0.2"
rand = "0.8"
rand_xorshift = "0.3"
serde = { version = "1", features = ["derive"] }
Expand Down
14 changes: 14 additions & 0 deletions src/boc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,20 @@ impl Boc {
encode_impl(cell.as_ref())
}

/// Encodes the specified cell tree as BOC.
/// Uses rayon under the hood to parallelize encoding.
pub fn encode_par<T>(cell: T) -> Vec<u8>
where
T: AsRef<DynCell>,
{
fn encode_impl(cell: &DynCell) -> Vec<u8> {
let mut result = Vec::new();
ser::BocHeader::<ahash::RandomState>::new(cell).encode_par(&mut result);
result
}
encode_impl(cell.as_ref())
}

/// Encodes a pair of cell trees as BOC.
pub fn encode_pair<T1, T2>((cell1, cell2): (T1, T2)) -> Vec<u8>
where
Expand Down
184 changes: 161 additions & 23 deletions src/boc/ser.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::collections::HashMap;
use std::hash::BuildHasher;

use super::BocTag;
use crate::cell::{CellDescriptor, DynCell, HashBytes};

use super::BocTag;

/// Intermediate BOC serializer state.
pub struct BocHeader<'a, S = ahash::RandomState> {
root_rev_indices: Vec<u32>,
Expand All @@ -18,7 +19,7 @@ pub struct BocHeader<'a, S = ahash::RandomState> {

impl<'a, S> BocHeader<'a, S>
where
S: BuildHasher + Default,
S: BuildHasher + Default + Send + Sync,
{
/// Creates an intermediate BOC serializer state with a single root.
pub fn new(root: &'a DynCell) -> Self {
Expand All @@ -39,7 +40,7 @@ where

impl<'a, S> BocHeader<'a, S>
where
S: BuildHasher,
S: BuildHasher + Send + Sync,
{
/// Adds an additional root to the state.
pub fn add_root(&mut self, root: &'a DynCell) {
Expand Down Expand Up @@ -115,30 +116,132 @@ where
target.extend_from_slice(&root_index.to_be_bytes()[4 - ref_size..]);
}

for cell in self.rev_cells.into_iter().rev() {
let mut descriptor = cell.descriptor();
descriptor.d1 &= !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
if descriptor.store_hashes() {
let level_mask = descriptor.level_mask();
for level in level_mask {
target.extend_from_slice(cell.hash(level).as_ref());
}
for level in level_mask {
target.extend_from_slice(&cell.depth(level).to_be_bytes());
}
process_chunk(
&self.rev_cells,
self.without_hashes,
self.cell_count,
&self.rev_indices,
ref_size,
target,
);

if self.include_crc {
let target_len_after = target.len();
debug_assert!(target_len_before < target_len_after);

let crc = crc32c::crc32c(&target[target_len_before..target_len_after]);
target.extend_from_slice(&crc.to_le_bytes());
}

debug_assert_eq!(target.len() as u64, target_len_before as u64 + total_size);
}

/// Encodes cell trees into bytes.
/// Uses rayon under the hood.
pub fn encode_par(self, target: &mut Vec<u8>) {
let root_count = self.root_rev_indices.len();

let ref_size = number_of_bytes_to_fit(self.cell_count as u64);
// NOTE: `ref_size` will be in range 1..=4 because `self.cell_count`
// is `u32`, and there is at least one cell (see Self::new)
debug_assert!((1..=4).contains(&ref_size));

let total_cells_size: u64 = self.total_data_size
+ (self.cell_count as u64 * 2) // all descriptor bytes
+ (ref_size as u64 * self.reference_count);
let offset_size = number_of_bytes_to_fit(total_cells_size);

// NOTE: `offset_size` will be in range 1..=8 because `self.cell_count`
// is at least 1, and `total_cells_size` is `u64`
debug_assert!((1..=8).contains(&offset_size));

let flags = (ref_size as u8) | (u8::from(self.include_crc) * 0b0100_0000);

// 4 bytes - BOC tag
// 1 byte - flags
// 1 byte - offset size
// {ref_size} - cell count
// {ref_size} - root count
// {ref_size} - absent cell count
// {offset_size} - total cells size
// root_count * {ref_size} - root indices
// {total_cells_size} - cells
// include_crc * 4 - optional CRC32
let total_size = 4
+ 2
+ (ref_size as u64) * (3 + root_count as u64)
+ (offset_size as u64)
+ total_cells_size
+ u64::from(self.include_crc) * 4;
target.reserve(total_size as usize);

let target_len_before = target.len();

target.extend_from_slice(&BocTag::GENERIC);
target.extend_from_slice(&[flags, offset_size as u8]);
target.extend_from_slice(&self.cell_count.to_be_bytes()[4 - ref_size..]);
target.extend_from_slice(&(root_count as u32).to_be_bytes()[4 - ref_size..]);
target.extend_from_slice(&[0; 4][4 - ref_size..]);
target.extend_from_slice(&total_cells_size.to_be_bytes()[8 - offset_size..]);

for rev_index in self.root_rev_indices {
let root_index = self.cell_count - rev_index - 1;
target.extend_from_slice(&root_index.to_be_bytes()[4 - ref_size..]);
}

// #[cfg(feature = "rayon")] todo: uncomment this block before merge
'res: {
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
use rayon::slice::ParallelSlice;

const CHUNK_SIZE: usize = 5_000;

if self.rev_cells.len() > CHUNK_SIZE * 2 {
process_chunk(
&self.rev_cells,
self.without_hashes,
self.cell_count,
&self.rev_indices,
ref_size,
target,
);
break 'res;
}
target.extend_from_slice(cell.data());
for child in cell.references() {
if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
let rev_index = self.cell_count - *rev_index - 1;
target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
} else {
debug_assert!(false, "child not found");
}

let data: Vec<_> = self
.rev_cells
.par_chunks(5_000)
.rev()
.map(|chunk| {
let mut target = Vec::with_capacity(chunk.len() * 256);
process_chunk(
chunk,
self.without_hashes,
self.cell_count,
&self.rev_indices,
ref_size,
&mut target,
);
target
})
.collect();
for chunk in data {
target.extend_from_slice(&chunk);
}
}

// #[cfg(not(feature = "rayon"))]
// {
// process_chunk(
// &self.rev_cells,
// self.without_hashes,
// self.cell_count,
// &self.rev_indices,
// ref_size,
// target,
// );
// }

if self.include_crc {
let target_len_after = target.len();
debug_assert!(target_len_before < target_len_after);
Expand Down Expand Up @@ -226,3 +329,38 @@ impl CellDescriptor {
fn number_of_bytes_to_fit(l: u64) -> usize {
(8 - l.leading_zeros() / 8) as usize
}

fn process_chunk<S>(
chunk: &[&DynCell],
without_hashes: bool,
cell_count: u32,
rev_indices: &HashMap<&HashBytes, u32, S>,
ref_size: usize,
target: &mut Vec<u8>,
) where
S: BuildHasher,
{
for cell in chunk.iter().rev() {
let mut descriptor = cell.descriptor();
descriptor.d1 &= !(u8::from(without_hashes) * CellDescriptor::STORE_HASHES_MASK);
target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
if descriptor.store_hashes() {
let level_mask = descriptor.level_mask();
for level in level_mask {
target.extend_from_slice(cell.hash(level).as_ref());
}
for level in level_mask {
target.extend_from_slice(&cell.depth(level).to_be_bytes());
}
}
target.extend_from_slice(cell.data());
for child in cell.references() {
if let Some(rev_index) = rev_indices.get(child.repr_hash()) {
let rev_index = cell_count - *rev_index - 1;
target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
} else {
debug_assert!(false, "child not found");
}
}
}
}

0 comments on commit 7070289

Please sign in to comment.