Skip to content

Commit

Permalink
feat: parallelize final assembly step in encode using rayon
Browse files Browse the repository at this point in the history
- gives nice improvments on huge bocs
```
encode                  time:   [35.299 ms 35.359 ms 35.422 ms]                    
                        change: [-25.523% -25.318% -25.115%] (p = 0.00 < 0.05)
                        Performance has improved.
```
  • Loading branch information
0xdeafbeef committed Jul 22, 2024
1 parent d182e97 commit 35e131c
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 23 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ hex = "0.4"
num-bigint = { version = "0.4", optional = true }
num-traits = { version = "0.2", optional = true }
rand = { version = "0.8", optional = true }
rayon = { version = "1.10.0", optional = true }
scc = { version = "2.1", optional = true }
serde = { version = "1", features = ["derive"], optional = true }
sha2 = "0.10"
Expand All @@ -50,18 +51,18 @@ tl-proto = { version = "0.4", optional = true }

everscale-types-proc = { version = "=0.1.4", path = "proc" }


[dev-dependencies]
anyhow = "1.0"
base64 = "0.21"
criterion = "0.5"
libc = "0.2"
rand = "0.8"
rand_xorshift = "0.3"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

[features]
default = ["base64", "serde", "models", "sync"]
default = ["base64", "serde", "models", "sync", "rayon"]
sync = ["dep:scc"]
stats = []
serde = ["dep:serde", "base64"]
Expand Down
93 changes: 72 additions & 21 deletions src/boc/ser.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::collections::HashMap;
use std::hash::BuildHasher;

use super::BocTag;
use crate::cell::{CellDescriptor, DynCell, HashBytes};

use super::BocTag;

/// Intermediate BOC serializer state.
pub struct BocHeader<'a, S = ahash::RandomState> {
root_rev_indices: Vec<u32>,
Expand All @@ -18,7 +19,7 @@ pub struct BocHeader<'a, S = ahash::RandomState> {

impl<'a, S> BocHeader<'a, S>
where
S: BuildHasher + Default,
S: BuildHasher + Default + Send + Sync,
{
/// Creates an intermediate BOC serializer state with a single root.
pub fn new(root: &'a DynCell) -> Self {
Expand All @@ -39,7 +40,7 @@ where

impl<'a, S> BocHeader<'a, S>
where
S: BuildHasher,
S: BuildHasher + Send + Sync,
{
/// Adds an additional root to the state.
pub fn add_root(&mut self, root: &'a DynCell) {
Expand Down Expand Up @@ -115,30 +116,80 @@ where
target.extend_from_slice(&root_index.to_be_bytes()[4 - ref_size..]);
}

for cell in self.rev_cells.into_iter().rev() {
let mut descriptor = cell.descriptor();
descriptor.d1 &= !(u8::from(self.without_hashes) * CellDescriptor::STORE_HASHES_MASK);
target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
if descriptor.store_hashes() {
let level_mask = descriptor.level_mask();
for level in level_mask {
target.extend_from_slice(cell.hash(level).as_ref());
fn process_chunk<S>(
chunk: &[&DynCell],
without_hashes: bool,
cell_count: u32,
rev_indices: &HashMap<&HashBytes, u32, S>,
ref_size: usize,
target: &mut Vec<u8>,
) where
S: BuildHasher,
{
for cell in chunk.iter().rev() {
let mut descriptor = cell.descriptor();
descriptor.d1 &= !(u8::from(without_hashes) * CellDescriptor::STORE_HASHES_MASK);
target.extend_from_slice(&[descriptor.d1, descriptor.d2]);
if descriptor.store_hashes() {
let level_mask = descriptor.level_mask();
for level in level_mask {
target.extend_from_slice(cell.hash(level).as_ref());
}
for level in level_mask {
target.extend_from_slice(&cell.depth(level).to_be_bytes());
}
}
for level in level_mask {
target.extend_from_slice(&cell.depth(level).to_be_bytes());
target.extend_from_slice(cell.data());
for child in cell.references() {
if let Some(rev_index) = rev_indices.get(child.repr_hash()) {
let rev_index = cell_count - *rev_index - 1;
target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
} else {
debug_assert!(false, "child not found");
}
}
}
target.extend_from_slice(cell.data());
for child in cell.references() {
if let Some(rev_index) = self.rev_indices.get(child.repr_hash()) {
let rev_index = self.cell_count - *rev_index - 1;
target.extend_from_slice(&rev_index.to_be_bytes()[4 - ref_size..]);
} else {
debug_assert!(false, "child not found");
}
}

#[cfg(feature = "rayon")]
{
use rayon::iter::{IndexedParallelIterator, ParallelIterator};
use rayon::slice::ParallelSlice;

let data: Vec<_> = self
.rev_cells
.par_chunks(5_000)
.rev()
.map(|chunk| {
let mut target = Vec::with_capacity(chunk.len() * 256);
process_chunk(
chunk,
self.without_hashes,
self.cell_count,
&self.rev_indices,
ref_size,
&mut target,
);
target
})
.collect();
for chunk in data {
target.extend_from_slice(&chunk);
}
}

#[cfg(not(feature = "rayon"))]
{
process_chunk(
&self.rev_cells,
self.without_hashes,
self.cell_count,
&self.rev_indices,
ref_size,
target,
);
}

if self.include_crc {
let target_len_after = target.len();
debug_assert!(target_len_before < target_len_after);
Expand Down

0 comments on commit 35e131c

Please sign in to comment.