From 5c8d51614b15f9326922df3a6ececfd9da33d6ba Mon Sep 17 00:00:00 2001 From: BGluth Date: Mon, 11 Mar 2024 13:51:25 -0600 Subject: [PATCH] Now prunes tries properly (and more aggressively!) without hack (#97) * Finished logic for detecting trie collapses * A few fixes related to branch collapse detection * Requested PR changes for #39 * Extension nodes are now always not hashed if encountered - However it will only not hash the child if the key matches the key piece of the extension. * Mix of different cherry-picked fixes - Fixes overall deal with calling the new pruning query in `mpt_trie` and changing the order of calls when generating proof IR. * Impled additional logic to extract remaining child - Realized that we needed a bit more logic to get the key to the remaining child after a branch collapse. - Ended up adding a bool to `TriePathIter`. * Cleanup from messy cherry picks - Also includes one core fix that got left out. * Apply suggestions from code review Robin's suggested changes for PR #97 Co-authored-by: Robin Salen <30937548+Nashtare@users.noreply.github.com> * Requsted PR changes for #97 --------- Co-authored-by: Ben Co-authored-by: Robin Salen <30937548+Nashtare@users.noreply.github.com> --- CHANGELOG.md | 1 + mpt_trie/src/debug_tools/query.rs | 2 +- mpt_trie/src/nibbles.rs | 51 ++-- mpt_trie/src/special_query.rs | 74 +++++- mpt_trie/src/trie_subsets.rs | 14 +- mpt_trie/src/utils.rs | 4 +- trace_decoder/src/decoding.rs | 282 +++++++++++++++------ trace_decoder/src/processed_block_trace.rs | 17 +- trace_decoder/src/types.rs | 4 +- 9 files changed, 331 insertions(+), 118 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccf746163..2a5f50c3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix generation inputs logging pre-transaction execution ([#89](https://github.com/0xPolygonZero/zk_evm/pull/89)) - Reduce state trie size for dummy payloads ([#88](https://github.com/0xPolygonZero/zk_evm/pull/88)) - Fix post-txn trie debugging output for multi-logs receipts ([#86](https://github.com/0xPolygonZero/zk_evm/pull/86)) +- Fixed *most* failing blocks caused by the merged in aggressive pruning changes ([#97](https://github.com/0xPolygonZero/zk_evm/pull/97)) ## [0.1.1] - 2024-03-01 diff --git a/mpt_trie/src/debug_tools/query.rs b/mpt_trie/src/debug_tools/query.rs index dd7d3b654..20edf6a3b 100644 --- a/mpt_trie/src/debug_tools/query.rs +++ b/mpt_trie/src/debug_tools/query.rs @@ -159,7 +159,7 @@ pub struct DebugQueryOutput { k: Nibbles, /// The nodes hit during the query. - pub node_path: TriePath, + node_path: TriePath, extra_node_info: Vec>, node_found: bool, params: DebugQueryParams, diff --git a/mpt_trie/src/nibbles.rs b/mpt_trie/src/nibbles.rs index f5d63a043..d2426dad2 100644 --- a/mpt_trie/src/nibbles.rs +++ b/mpt_trie/src/nibbles.rs @@ -26,6 +26,11 @@ pub type Nibble = u8; /// Used for the internal representation of a sequence of nibbles. pub type NibblesIntern = U512; +const MULTIPLE_NIBBLES_APPEND_ASSERT_ERR_MSG: &str = + "Attempted to create a nibbles sequence longer than 64!"; +const SINGLE_NIBBLE_APPEND_ASSERT_ERR_MSG: &str = + "Attempted to append a single nibble that was greater than 15!"; + /// Because there are two different ways to convert to `Nibbles`, we don't want /// to rely on `From`. Instead, we'll define a new trait that defines both /// conversions. @@ -776,12 +781,20 @@ impl Nibbles { } fn nibble_append_safety_asserts(&self, n: Nibble) { - assert!(self.count < 64); - assert!(n < 16); + assert!( + self.count < 64, + "{}", + MULTIPLE_NIBBLES_APPEND_ASSERT_ERR_MSG + ); + assert!(n < 16, "{}", SINGLE_NIBBLE_APPEND_ASSERT_ERR_MSG); } fn nibbles_append_safety_asserts(&self, new_count: usize) { - assert!(new_count <= 64); + assert!( + new_count <= 64, + "{}", + MULTIPLE_NIBBLES_APPEND_ASSERT_ERR_MSG + ); } // TODO: REMOVE BEFORE NEXT CRATE VERSION! THIS IS A TEMP HACK! @@ -806,8 +819,12 @@ mod tests { use super::{Nibble, Nibbles, ToNibbles}; use crate::nibbles::FromHexPrefixError; - const LONG_ZERO_NIBS_STR_LEN_63: &str = - "0x000000000000000000000000000000000000000000000000000000000000000"; + const ZERO_NIBS_63: &str = "0x000000000000000000000000000000000000000000000000000000000000000"; + const ZERO_NIBS_64: &str = "0x0000000000000000000000000000000000000000000000000000000000000000"; + const ZERO_NIBS_64_LEADING_1: &str = + "0x1000000000000000000000000000000000000000000000000000000000000000"; + const ZERO_NIBS_64_TRAILING_1: &str = + "0x0000000000000000000000000000000000000000000000000000000000000001"; #[test] fn get_nibble_works() { @@ -925,9 +942,8 @@ mod tests { test_and_assert_nib_push_func(Nibbles::default(), 0x1, |n| n.push_nibble_front(0x1)); test_and_assert_nib_push_func(0x1, 0x21, |n| n.push_nibble_front(0x2)); test_and_assert_nib_push_func( - Nibbles::from_str(LONG_ZERO_NIBS_STR_LEN_63).unwrap(), - Nibbles::from_str("0x1000000000000000000000000000000000000000000000000000000000000000") - .unwrap(), + Nibbles::from_str(ZERO_NIBS_63).unwrap(), + Nibbles::from_str(ZERO_NIBS_64_LEADING_1).unwrap(), |n| n.push_nibble_front(0x1), ); } @@ -937,9 +953,8 @@ mod tests { test_and_assert_nib_push_func(Nibbles::default(), 0x1, |n| n.push_nibble_back(0x1)); test_and_assert_nib_push_func(0x1, 0x12, |n| n.push_nibble_back(0x2)); test_and_assert_nib_push_func( - Nibbles::from_str(LONG_ZERO_NIBS_STR_LEN_63).unwrap(), - Nibbles::from_str("0x0000000000000000000000000000000000000000000000000000000000000001") - .unwrap(), + Nibbles::from_str(ZERO_NIBS_63).unwrap(), + Nibbles::from_str(ZERO_NIBS_64_TRAILING_1).unwrap(), |n| n.push_nibble_back(0x1), ); } @@ -951,9 +966,8 @@ mod tests { }); test_and_assert_nib_push_func(0x1234, 0x5671234, |n| n.push_nibbles_front(&0x567.into())); test_and_assert_nib_push_func( - Nibbles::from_str(LONG_ZERO_NIBS_STR_LEN_63).unwrap(), - Nibbles::from_str("0x1000000000000000000000000000000000000000000000000000000000000000") - .unwrap(), + Nibbles::from_str(ZERO_NIBS_63).unwrap(), + Nibbles::from_str(ZERO_NIBS_64_LEADING_1).unwrap(), |n| n.push_nibbles_front(&0x1.into()), ); } @@ -965,9 +979,8 @@ mod tests { }); test_and_assert_nib_push_func(0x1234, 0x1234567, |n| n.push_nibbles_back(&0x567.into())); test_and_assert_nib_push_func( - Nibbles::from_str(LONG_ZERO_NIBS_STR_LEN_63).unwrap(), - Nibbles::from_str("0x0000000000000000000000000000000000000000000000000000000000000001") - .unwrap(), + Nibbles::from_str(ZERO_NIBS_63).unwrap(), + Nibbles::from_str(ZERO_NIBS_64_TRAILING_1).unwrap(), |n| n.push_nibbles_back(&0x1.into()), ); } @@ -1264,7 +1277,7 @@ mod tests { fn nibbles_from_h256_works() { assert_eq!( format!("{:x}", Nibbles::from_h256_be(H256::from_low_u64_be(0))), - "0x0000000000000000000000000000000000000000000000000000000000000000", + ZERO_NIBS_64, ); assert_eq!( format!("{:x}", Nibbles::from_h256_be(H256::from_low_u64_be(2048))), @@ -1272,7 +1285,7 @@ mod tests { ); assert_eq!( format!("{:x}", Nibbles::from_h256_le(H256::from_low_u64_be(0))), - "0x0000000000000000000000000000000000000000000000000000000000000000" + ZERO_NIBS_64 ); // Note that the first bit of the `Nibbles` changes if the count is odd. diff --git a/mpt_trie/src/special_query.rs b/mpt_trie/src/special_query.rs index 503331aa0..5c74cbab8 100644 --- a/mpt_trie/src/special_query.rs +++ b/mpt_trie/src/special_query.rs @@ -19,6 +19,10 @@ pub struct TriePathIter { // Although wrapping `curr_node` in an option might be more "Rust like", the logic is a lot // cleaner with a bool. terminated: bool, + + /// Always include the final node we encounter even if the key does not + /// match. + always_include_final_node_if_possible: bool, } impl Iterator for TriePathIter { @@ -39,9 +43,12 @@ impl Iterator for TriePathIter { Some(TrieSegment::Hash) } Node::Branch { children, .. } => { - // Our query key has ended. Stop here. if self.curr_key.is_empty() { + // Our query key has ended. Stop here. self.terminated = true; + + // In this case even if `always_include_final_node` is set, we have no + // information on which branch to take, so we can't add in the last node. return None; } @@ -58,7 +65,8 @@ impl Iterator for TriePathIter { false => { // Only a partial match. Stop. self.terminated = true; - None + self.always_include_final_node_if_possible + .then_some(TrieSegment::Extension(*nibbles)) } true => { pop_nibbles_clamped(&mut self.curr_key, nibbles.count); @@ -72,7 +80,7 @@ impl Iterator for TriePathIter { Node::Leaf { nibbles, .. } => { self.terminated = true; - match self.curr_key == *nibbles { + match self.curr_key == *nibbles || self.always_include_final_node_if_possible { false => None, true => Some(TrieSegment::Leaf(*nibbles)), } @@ -93,7 +101,11 @@ fn pop_nibbles_clamped(nibbles: &mut Nibbles, n: usize) -> Nibbles { /// Note that if the key does not match the entire key of a node (eg. the /// remaining key is `0x34` but the next key is a leaf with the key `0x3456`), /// then the leaf will not appear in the query output. -pub fn path_for_query(trie: &Node, k: K) -> TriePathIter +pub fn path_for_query( + trie: &Node, + k: K, + always_include_final_node_if_possible: bool, +) -> TriePathIter where K: Into, { @@ -101,6 +113,7 @@ where curr_node: trie.clone().into(), curr_key: k.into(), terminated: false, + always_include_final_node_if_possible, } } @@ -109,10 +122,15 @@ mod test { use std::str::FromStr; use super::path_for_query; - use crate::{nibbles::Nibbles, testing_utils::handmade_trie_1, utils::TrieSegment}; + use crate::{ + nibbles::Nibbles, + testing_utils::{common_setup, handmade_trie_1}, + utils::TrieSegment, + }; #[test] - fn query_iter_works() { + fn query_iter_works_no_last_node() { + common_setup(); let (trie, ks) = handmade_trie_1(); // ks --> vec![0x1234, 0x1324, 0x132400005_u64, 0x2001, 0x2002]; @@ -149,8 +167,50 @@ mod test { ]; for (q, expected) in ks.into_iter().zip(res.into_iter()) { - let res: Vec<_> = path_for_query(&trie.node, q).collect(); + let res: Vec<_> = path_for_query(&trie.node, q, false).collect(); assert_eq!(res, expected) } } + + #[test] + fn query_iter_works_with_last_node() { + common_setup(); + let (trie, _) = handmade_trie_1(); + + let extension_expected = vec![ + TrieSegment::Branch(1), + TrieSegment::Branch(3), + TrieSegment::Extension(0x24.into()), + ]; + + assert_eq!( + path_for_query(&trie, 0x13, true).collect::>(), + extension_expected + ); + assert_eq!( + path_for_query(&trie, 0x132, true).collect::>(), + extension_expected + ); + + // The last node is a branch here, but we don't include it because a TrieSegment + // requires us to state which nibble we took in the branch, and we don't have + // this information. + assert_eq!( + path_for_query(&trie, 0x1324, true).collect::>(), + extension_expected + ); + + let mut leaf_expected = extension_expected.clone(); + leaf_expected.push(TrieSegment::Branch(0)); + leaf_expected.push(TrieSegment::Leaf(Nibbles::from_str("0x0005").unwrap())); + + assert_eq!( + path_for_query(&trie, 0x13240, true).collect::>(), + leaf_expected + ); + assert_eq!( + path_for_query(&trie, 0x132400, true).collect::>(), + leaf_expected + ); + } } diff --git a/mpt_trie/src/trie_subsets.rs b/mpt_trie/src/trie_subsets.rs index 455d3685c..483c86baf 100644 --- a/mpt_trie/src/trie_subsets.rs +++ b/mpt_trie/src/trie_subsets.rs @@ -307,11 +307,17 @@ fn mark_nodes_that_are_needed( return mark_nodes_that_are_needed(&mut children[nib as usize], curr_nibbles); } TrackedNodeIntern::Extension(child) => { - let nibbles = trie.info.get_nibbles_expected(); - let r = curr_nibbles.pop_nibbles_front(nibbles.count); + // If we hit an extension node, we always must include it unless we exhausted + // our key. + if curr_nibbles.is_empty() { + return Ok(()); + } - if r.nibbles_are_identical_up_to_smallest_count(nibbles) { - trie.info.touched = true; + trie.info.touched = true; + + let nibbles: &Nibbles = trie.info.get_nibbles_expected(); + if curr_nibbles.nibbles_are_identical_up_to_smallest_count(nibbles) { + curr_nibbles.pop_nibbles_front(nibbles.count); return mark_nodes_that_are_needed(child, curr_nibbles); } } diff --git a/mpt_trie/src/utils.rs b/mpt_trie/src/utils.rs index 59400ee82..8c457f07a 100644 --- a/mpt_trie/src/utils.rs +++ b/mpt_trie/src/utils.rs @@ -216,7 +216,7 @@ impl TrieSegment { } /// Extracts the key piece used by the segment (if applicable). - pub fn get_key_piece_from_seg_if_present(&self) -> Option { + pub(crate) fn get_key_piece_from_seg_if_present(&self) -> Option { match self { TrieSegment::Empty | TrieSegment::Hash => None, TrieSegment::Branch(nib) => Some(Nibbles::from_nibble(*nib)), @@ -230,7 +230,7 @@ impl TrieSegment { /// This function is intended to be used during a trie query as we are /// traversing down a trie. Depending on the current node, we pop off nibbles /// and use these to create `TrieSegment`s. -pub fn get_segment_from_node_and_key_piece( +pub(crate) fn get_segment_from_node_and_key_piece( n: &Node, k_piece: &Nibbles, ) -> TrieSegment { diff --git a/trace_decoder/src/decoding.rs b/trace_decoder/src/decoding.rs index 35900b193..b697fcaac 100644 --- a/trace_decoder/src/decoding.rs +++ b/trace_decoder/src/decoding.rs @@ -1,7 +1,7 @@ use std::{ collections::HashMap, fmt::{self, Display, Formatter}, - iter::{empty, once}, + iter::{self, empty, once}, }; use ethereum_types::{Address, H256, U256}; @@ -9,18 +9,22 @@ use evm_arithmetization::{ generation::{mpt::AccountRlp, GenerationInputs, TrieInputs}, proof::{ExtraBlockData, TrieRoots}, }; +use log::trace; use mpt_trie::{ nibbles::Nibbles, partial_trie::{HashedPartialTrie, Node, PartialTrie}, - trie_subsets::create_trie_subset, + special_query::path_for_query, + trie_subsets::{create_trie_subset, SubsetTrieError}, + utils::{IntoTrieKey, TriePath, TrieSegment}, }; use thiserror::Error; use crate::{ processed_block_trace::{NodesUsedByTxn, ProcessedBlockTrace, StateTrieWrites, TxnMetaState}, types::{ - HashedAccountAddr, HashedNodeAddr, HashedStorageAddrNibbles, OtherBlockData, TrieRootHash, - TxnIdx, TxnProofGenIR, EMPTY_ACCOUNT_BYTES_RLPED, ZERO_STORAGE_SLOT_VAL_RLPED, + HashedAccountAddr, HashedNodeAddr, HashedStorageAddr, HashedStorageAddrNibbles, + OtherBlockData, TriePathIter, TrieRootHash, TxnIdx, TxnProofGenIR, + EMPTY_ACCOUNT_BYTES_RLPED, ZERO_STORAGE_SLOT_VAL_RLPED, }, utils::{hash, update_val_if_some}, }; @@ -38,16 +42,16 @@ pub enum TraceParsingError { /// Failure due to trying to access or delete a storage trie missing /// from the base trie. - #[error("Missing account storage trie in base trie when constructing subset partial trie for txn (account: {0})")] + #[error("Missing account storage trie in base trie when constructing subset partial trie for txn (account: {0:x})")] MissingAccountStorageTrie(HashedAccountAddr), /// Failure due to trying to access a non-existent key in the trie. - #[error("Tried accessing a non-existent key ({1}) in the {0} trie (root hash: {2:x})")] + #[error("Tried accessing a non-existent key ({1:x}) in the {0} trie (root hash: {2:x})")] NonExistentTrieEntry(TrieType, Nibbles, TrieRootHash), - /// Failure due to missing keys when creating a subpartial trie. - #[error("Missing keys when creating sub-partial tries (Trie type: {0})")] - MissingKeysCreatingSubPartialTrie(TrieType), + /// Failure due to missing keys when creating a sub-partial trie. + #[error("Missing key {0:x} when creating sub-partial tries (Trie type: {1})")] + MissingKeysCreatingSubPartialTrie(Nibbles, TrieType), /// Failure due to trying to withdraw from a missing account #[error("No account present at {0:x} (hashed: {1:x}) to withdraw {2} Gwei from!")] @@ -80,7 +84,7 @@ impl Display for TrieType { /// The current state of all tries as we process txn deltas. These are mutated /// after every txn we process in the trace. -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] struct PartialTrieState { state: HashedPartialTrie, storage: HashMap, @@ -88,6 +92,15 @@ struct PartialTrieState { receipt: HashedPartialTrie, } +/// Additional information discovered during delta application. +#[derive(Debug, Default)] +struct TrieDeltaApplicationOutput { + // During delta application, if a delete occurs, we may have to make sure additional nodes + // that are not accessed by the txn remain unhashed. + additional_state_trie_paths_to_not_hash: Vec, + additional_storage_trie_paths_to_not_hash: HashMap>, +} + impl ProcessedBlockTrace { pub(crate) fn into_txn_proof_gen_ir( self, @@ -122,23 +135,43 @@ impl ProcessedBlockTrace { .into_iter() .enumerate() .map(|(txn_idx, txn_info)| { - let tries = Self::create_minimal_partial_tries_needed_by_txn( - &mut curr_block_tries, - &txn_info.nodes_used_by_txn, - txn_idx, - &other_data.b_data.b_meta.block_beneficiary, - )?; - + trace!("Generating proof IR for txn {}...", txn_idx); + + Self::init_any_needed_empty_storage_tries( + &mut curr_block_tries.storage, + txn_info + .nodes_used_by_txn + .storage_accesses + .iter() + .map(|(k, _)| k), + &txn_info + .nodes_used_by_txn + .state_accounts_with_no_accesses_but_storage_tries, + ); // For each non-dummy txn, we increment `txn_number_after` by 1, and // update `gas_used_after` accordingly. extra_data.txn_number_after += U256::one(); extra_data.gas_used_after += txn_info.meta.gas_used.into(); - Self::apply_deltas_to_trie_state( + // Because we need to run delta application before creating the minimal + // sub-tries (we need to detect if deletes collapsed any branches), we need to + // do this clone every iteration. + let tries_at_start_of_txn = curr_block_tries.clone(); + + Self::update_txn_and_receipt_tries(&mut curr_block_tries, &txn_info.meta, txn_idx); + + let delta_out = Self::apply_deltas_to_trie_state( &mut curr_block_tries, - txn_info.nodes_used_by_txn, + &txn_info.nodes_used_by_txn, &txn_info.meta, + )?; + + let tries = Self::create_minimal_partial_tries_needed_by_txn( + &tries_at_start_of_txn, + &txn_info.nodes_used_by_txn, txn_idx, + delta_out, + &other_data.b_data.b_meta.block_beneficiary, )?; let trie_roots_after = calculate_trie_input_hashes(&curr_block_tries); @@ -191,15 +224,55 @@ impl ProcessedBlockTrace { Ok(txn_gen_inputs) } + fn update_txn_and_receipt_tries( + trie_state: &mut PartialTrieState, + meta: &TxnMetaState, + txn_idx: TxnIdx, + ) { + let txn_k = Nibbles::from_bytes_be(&rlp::encode(&txn_idx)).unwrap(); + trie_state.txn.insert(txn_k, meta.txn_bytes()); + + trie_state + .receipt + .insert(txn_k, meta.receipt_node_bytes.as_ref()); + } + + /// If the account does not have a storage trie or does but is not + /// accessed by any txns, then we still need to manually create an entry for + /// them. + fn init_any_needed_empty_storage_tries<'a>( + storage_tries: &mut HashMap, + accounts_with_storage: impl Iterator, + state_accounts_with_no_accesses_but_storage_tries: &'a HashMap< + HashedAccountAddr, + TrieRootHash, + >, + ) { + for h_addr in accounts_with_storage { + if !storage_tries.contains_key(h_addr) { + let trie = state_accounts_with_no_accesses_but_storage_tries + .get(h_addr) + .map(|s_root| HashedPartialTrie::new(Node::Hash(*s_root))) + .unwrap_or_default(); + + storage_tries.insert(*h_addr, trie); + }; + } + } + fn create_minimal_partial_tries_needed_by_txn( - curr_block_tries: &mut PartialTrieState, + curr_block_tries: &PartialTrieState, nodes_used_by_txn: &NodesUsedByTxn, txn_idx: TxnIdx, + delta_application_out: TrieDeltaApplicationOutput, _coin_base_addr: &Address, ) -> TraceParsingResult { let state_trie = create_minimal_state_partial_trie( &curr_block_tries.state, nodes_used_by_txn.state_accesses.iter().cloned(), + delta_application_out + .additional_state_trie_paths_to_not_hash + .into_iter(), )?; let txn_k = Nibbles::from_bytes_be(&rlp::encode(&txn_idx)).unwrap(); @@ -210,17 +283,11 @@ impl ProcessedBlockTrace { let receipts_trie = create_trie_subset_wrapped(&curr_block_tries.receipt, once(txn_k), TrieType::Receipt)?; - // TODO: Refactor so we can remove this vec alloc... - let storage_access_vec = nodes_used_by_txn - .storage_accesses - .iter() - .map(|(k, v)| (H256::from_slice(&k.bytes_be()), v.clone())) - .collect::>(); - let storage_tries = create_minimal_storage_partial_tries( - &mut curr_block_tries.storage, + &curr_block_tries.storage, &nodes_used_by_txn.state_accounts_with_no_accesses_but_storage_tries, - storage_access_vec.iter(), + nodes_used_by_txn.storage_accesses.iter(), + &delta_application_out.additional_storage_trie_paths_to_not_hash, )?; Ok(TrieInputs { @@ -233,34 +300,42 @@ impl ProcessedBlockTrace { fn apply_deltas_to_trie_state( trie_state: &mut PartialTrieState, - deltas: NodesUsedByTxn, + deltas: &NodesUsedByTxn, meta: &TxnMetaState, - txn_idx: TxnIdx, - ) -> TraceParsingResult<()> { - for (hashed_acc_addr, storage_writes) in deltas.storage_writes { - let storage_trie = trie_state - .storage - .get_mut(&H256::from_slice(&hashed_acc_addr.bytes_be())) - .ok_or(TraceParsingError::MissingAccountStorageTrie( - H256::from_slice(&hashed_acc_addr.bytes_be()), - ))?; + ) -> TraceParsingResult { + let mut out = TrieDeltaApplicationOutput::default(); + + for (hashed_acc_addr, storage_writes) in deltas.storage_writes.iter() { + let mut storage_trie = trie_state.storage.get_mut(hashed_acc_addr).ok_or( + TraceParsingError::MissingAccountStorageTrie(*hashed_acc_addr), + )?; for (slot, val) in storage_writes - .into_iter() + .iter() .map(|(k, v)| (Nibbles::from_h256_be(hash(&k.bytes_be())), v)) { // If we are writing a zero, then we actually need to perform a delete. - match val == ZERO_STORAGE_SLOT_VAL_RLPED { - false => storage_trie.insert(slot, val), + match val == &ZERO_STORAGE_SLOT_VAL_RLPED { + false => storage_trie.insert(slot, val.clone()), true => { - storage_trie.delete(slot); + if let Some(remaining_slot_key) = + Self::delete_node_and_report_remaining_key_if_branch_collapsed( + storage_trie, + &slot, + ) + { + out.additional_storage_trie_paths_to_not_hash + .entry(*hashed_acc_addr) + .or_default() + .push(remaining_slot_key); + } } }; } } - for (hashed_acc_addr, s_trie_writes) in deltas.state_writes { - let val_k = Nibbles::from_h256_be(hashed_acc_addr); + for (hashed_acc_addr, s_trie_writes) in deltas.state_writes.iter() { + let val_k = Nibbles::from_h256_be(*hashed_acc_addr); // If the account was created, then it will not exist in the trie. let val_bytes = trie_state @@ -272,7 +347,7 @@ impl ProcessedBlockTrace { s_trie_writes.apply_writes_to_state_node( &mut account, - &hashed_acc_addr, + hashed_acc_addr, &trie_state.storage, )?; @@ -283,27 +358,72 @@ impl ProcessedBlockTrace { } // Remove any accounts that self-destructed. - for hashed_addr in deltas.self_destructed_accounts { - let k = Nibbles::from_h256_be(hashed_addr); + for hashed_addr in deltas.self_destructed_accounts.iter() { + let k = Nibbles::from_h256_be(*hashed_addr); trie_state .storage - .remove(&hashed_addr) - .ok_or(TraceParsingError::MissingAccountStorageTrie(hashed_addr))?; + .remove(hashed_addr) + .ok_or(TraceParsingError::MissingAccountStorageTrie(*hashed_addr))?; + // TODO: Once the mechanism for resolving code hashes settles, we probably want // to also delete the code hash mapping here as well... - trie_state.state.delete(k); + if let Some(remaining_account_key) = + Self::delete_node_and_report_remaining_key_if_branch_collapsed( + &mut trie_state.state, + &k, + ) + { + out.additional_state_trie_paths_to_not_hash + .push(remaining_account_key); + } } - let txn_k = Nibbles::from_bytes_be(&rlp::encode(&txn_idx)).unwrap(); - trie_state.txn.insert(txn_k, meta.txn_bytes()); + Ok(out) + } - trie_state - .receipt - .insert(txn_k, meta.receipt_node_bytes.as_ref()); + fn get_trie_trace(trie: &HashedPartialTrie, k: &Nibbles) -> TriePath { + path_for_query(trie, *k, true).collect() + } - Ok(()) + /// If a branch collapse occurred after a delete, then we must ensure that + /// the other single child that remains also is not hashed when passed into + /// plonky2. Returns the key to the remaining child if a collapse occured. + fn delete_node_and_report_remaining_key_if_branch_collapsed( + trie: &mut HashedPartialTrie, + delete_k: &Nibbles, + ) -> Option { + let old_trace = Self::get_trie_trace(trie, delete_k); + trie.delete(*delete_k); + let new_trace = Self::get_trie_trace(trie, delete_k); + + Self::node_deletion_resulted_in_a_branch_collapse(&old_trace, &new_trace) + } + + /// Comparing the path of the deleted key before and after the deletion, + /// determine if the deletion resulted in a branch collapsing into a leaf or + /// extension node, and return the path to the remaining child if this + /// occurred. + fn node_deletion_resulted_in_a_branch_collapse( + old_path: &TriePath, + new_path: &TriePath, + ) -> Option { + // Collapse requires at least 2 nodes. + if old_path.0.len() < 2 { + return None; + } + + // If the node path length decreased after the delete, then a collapse occurred. + // As an aside, note that while it's true that the branch could have collapsed + // into an extension node with multiple nodes below it, the query logic will + // always stop at most one node after the keys diverge, which guarantees that + // the new trie path will always be shorter if a collapse occurred. + let branch_collapse_occurred = old_path.0.len() > new_path.0.len(); + + // Now we need to determine the key of the only remaining node after the + // collapse. + branch_collapse_occurred.then(|| new_path.iter().into_key()) } /// Pads a generated IR vec with additional "dummy" entries if needed. @@ -533,7 +653,11 @@ fn create_dummy_gen_input_with_state_addrs_accessed( ) -> TraceParsingResult { let sub_tries = create_dummy_proof_trie_inputs( final_tries, - create_minimal_state_partial_trie(&final_tries.state, account_addrs_accessed)?, + create_minimal_state_partial_trie( + &final_tries.state, + account_addrs_accessed, + iter::empty(), + )?, ); Ok(create_dummy_gen_input_common( other_data, extra_data, sub_tries, @@ -606,10 +730,14 @@ fn create_dummy_proof_trie_inputs( fn create_minimal_state_partial_trie( state_trie: &HashedPartialTrie, state_accesses: impl Iterator, + additional_state_trie_paths_to_not_hash: impl Iterator, ) -> TraceParsingResult { create_trie_subset_wrapped( state_trie, - state_accesses.into_iter().map(Nibbles::from_h256_be), + state_accesses + .into_iter() + .map(Nibbles::from_h256_be) + .chain(additional_state_trie_paths_to_not_hash), TrieType::State, ) } @@ -617,28 +745,27 @@ fn create_minimal_state_partial_trie( // TODO!!!: We really need to be appending the empty storage tries to the base // trie somewhere else! This is a big hack! fn create_minimal_storage_partial_tries<'a>( - storage_tries: &mut HashMap, + storage_tries: &HashMap, state_accounts_with_no_accesses_but_storage_tries: &HashMap, accesses_per_account: impl Iterator)>, + additional_storage_trie_paths_to_not_hash: &HashMap>, ) -> TraceParsingResult> { accesses_per_account .map(|(h_addr, mem_accesses)| { - // TODO: Clean up... - let base_storage_trie = match storage_tries.get(&H256(h_addr.0)) { - Some(s_trie) => s_trie, - None => { - let trie = state_accounts_with_no_accesses_but_storage_tries - .get(h_addr) - .map(|s_root| HashedPartialTrie::new(Node::Hash(*s_root))) - .unwrap_or_default(); - storage_tries.insert(*h_addr, trie); // TODO: Really change this... - storage_tries.get(h_addr).unwrap() - } - }; + // Guaranteed to exist due to calling `init_any_needed_empty_storage_tries` + // earlier on. + let base_storage_trie = &storage_tries[h_addr]; + + let storage_slots_to_not_hash = mem_accesses.iter().cloned().chain( + additional_storage_trie_paths_to_not_hash + .get(h_addr) + .into_iter() + .flat_map(|slots| slots.iter().cloned()), + ); let partial_storage_trie = create_trie_subset_wrapped( base_storage_trie, - mem_accesses.iter().cloned(), + storage_slots_to_not_hash, TrieType::Storage, )?; @@ -652,8 +779,13 @@ fn create_trie_subset_wrapped( accesses: impl Iterator, trie_type: TrieType, ) -> TraceParsingResult { - create_trie_subset(trie, accesses) - .map_err(|_| TraceParsingError::MissingKeysCreatingSubPartialTrie(trie_type)) + create_trie_subset(trie, accesses).map_err(|trie_err| { + let key = match trie_err { + SubsetTrieError::UnexpectedKey(key, _) => key, + }; + + TraceParsingError::MissingKeysCreatingSubPartialTrie(key, trie_type) + }) } fn account_from_rlped_bytes(bytes: &[u8]) -> TraceParsingResult { diff --git a/trace_decoder/src/processed_block_trace.rs b/trace_decoder/src/processed_block_trace.rs index 469290d92..31e1613cc 100644 --- a/trace_decoder/src/processed_block_trace.rs +++ b/trace_decoder/src/processed_block_trace.rs @@ -17,8 +17,9 @@ use crate::trace_protocol::{ TrieUncompressed, TxnInfo, }; use crate::types::{ - CodeHash, CodeHashResolveFunc, HashedAccountAddr, HashedNodeAddr, HashedStorageAddrNibbles, - OtherBlockData, TrieRootHash, TxnProofGenIR, EMPTY_CODE_HASH, EMPTY_TRIE_HASH, + CodeHash, CodeHashResolveFunc, HashedAccountAddr, HashedNodeAddr, HashedStorageAddr, + HashedStorageAddrNibbles, OtherBlockData, TrieRootHash, TxnProofGenIR, EMPTY_CODE_HASH, + EMPTY_TRIE_HASH, }; use crate::utils::{ h_addr_nibs_to_h256, hash, print_value_and_hash_nodes_of_storage_trie, @@ -251,7 +252,7 @@ impl TxnInfo { let storage_access_keys = storage_read_keys.chain(storage_write_keys.copied()); nodes_used_by_txn.storage_accesses.push(( - Nibbles::from_h256_be(hashed_addr), + hashed_addr, storage_access_keys .map(|k| Nibbles::from_h256_be(hash(&k.0))) .collect(), @@ -284,7 +285,7 @@ impl TxnInfo { nodes_used_by_txn .storage_writes - .push((Nibbles::from_h256_be(hashed_addr), storage_writes_vec)); + .push((hashed_addr, storage_writes_vec)); nodes_used_by_txn.state_accesses.push(hashed_addr); @@ -325,9 +326,7 @@ impl TxnInfo { .filter(|(_, data)| data.storage_root != EMPTY_TRIE_HASH); let accounts_with_storage_but_no_storage_accesses = all_accounts_with_non_empty_storage - .filter(|&(addr, _data)| { - !accounts_with_storage_accesses.contains(&Nibbles::from_h256_be(*addr)) - }) + .filter(|&(addr, _data)| !accounts_with_storage_accesses.contains(addr)) .map(|(addr, data)| (*addr, data.storage_root)); nodes_used_by_txn @@ -380,8 +379,8 @@ pub(crate) struct NodesUsedByTxn { pub(crate) state_writes: Vec<(HashedAccountAddr, StateTrieWrites)>, // Note: All entries in `storage_writes` also appear in `storage_accesses`. - pub(crate) storage_accesses: Vec<(Nibbles, StorageAccess)>, - pub(crate) storage_writes: Vec<(Nibbles, StorageWrite)>, + pub(crate) storage_accesses: Vec<(HashedAccountAddr, StorageAccess)>, + pub(crate) storage_writes: Vec<(HashedAccountAddr, StorageWrite)>, pub(crate) state_accounts_with_no_accesses_but_storage_tries: HashMap, pub(crate) self_destructed_accounts: Vec, diff --git a/trace_decoder/src/types.rs b/trace_decoder/src/types.rs index 090651b1c..d1c51b998 100644 --- a/trace_decoder/src/types.rs +++ b/trace_decoder/src/types.rs @@ -3,7 +3,7 @@ use evm_arithmetization::{ generation::GenerationInputs, proof::{BlockHashes, BlockMetadata}, }; -use mpt_trie::nibbles::Nibbles; +use mpt_trie::{nibbles::Nibbles, partial_trie::HashedPartialTrie}; use serde::{Deserialize, Serialize}; /// A type alias around `u64` for a block height. @@ -31,6 +31,8 @@ pub type TrieRootHash = H256; /// A type alias around `usize` for a transaction's index within a block. pub type TxnIdx = usize; +pub(crate) type TriePathIter = mpt_trie::special_query::TriePathIter; + /// A function which turns a code hash into bytes. pub trait CodeHashResolveFunc = Fn(&CodeHash) -> Vec;