Skip to content

Commit

Permalink
mpt: Refactor path mismatch handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chfast committed Dec 21, 2023
1 parent 1ac539f commit 5917ee2
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 48 deletions.
103 changes: 55 additions & 48 deletions test/state/mpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ struct Path

Path() = default;

/// Constructs a sub-path by copying nibbles from existing path.
Path(size_t len, const uint8_t* nibs) noexcept : length{len}
{
std::copy_n(nibs, length, nibbles);
}

/// Constructs a path from bytes - each byte will produce 2 nibbles in the path.
explicit Path(bytes_view key) noexcept : length{2 * key.size()}
{
assert(length <= std::size(nibbles));
Expand All @@ -34,19 +41,7 @@ struct Path
[[nodiscard]] Path tail(size_t pos) const noexcept
{
assert(pos > 0 && pos <= length); // MPT never requests whole path copy (pos == 0).
Path p;
p.length = length - pos;
std::copy_n(&nibbles[pos], p.length, p.nibbles);
return p;
}

[[nodiscard]] Path head(size_t size) const noexcept
{
assert(size < length); // MPT never requests whole path copy (size == length).
Path p;
p.length = size;
std::copy_n(nibbles, size, p.nibbles);
return p;
return {length - pos, &nibbles[pos]};
}

[[nodiscard]] bytes encode(bool extended) const
Expand Down Expand Up @@ -132,12 +127,37 @@ class MPTNode
std::move(br);
}

/// Finds the position at witch two paths differ.
static size_t mismatch(const Path& p1, const Path& p2) noexcept
/// The information how two paths are different.
struct MismatchResult
{
assert(p1.length <= p2.length);
return static_cast<size_t>(
std::mismatch(p1.nibbles, p1.nibbles + p1.length, p2.nibbles).first - p1.nibbles);
static constexpr uint8_t invalid = 0xff;

Path common; ///< Common prefix.
uint8_t nibble1 = invalid; ///< Mismatched nibble in path 1. Invalid if paths match.
uint8_t nibble2 = invalid; ///< Mismatched nibble in second path.
Path tail1; ///< Remaining tail after the nibble in path 1.
Path tail2; ///< Remaining tail after the nibble in path 2.
};

/// Finds the difference between two paths.
static MismatchResult mismatch(const Path& p1, const Path& p2) noexcept
{
assert(p1.length <= p2.length); // no support for inserting keys of different lengths
const auto [it1, it2] = std::mismatch(p1.nibbles, p1.nibbles + p1.length, p2.nibbles);

const Path common{static_cast<size_t>(it1 - p1.nibbles), p1.nibbles};
const auto matched = common.length == p1.length;
const auto tail_start = common.length + 1;
assert(!matched || p1.length != p2.length); // matched => paths have different lengths
assert(matched || tail_start <= p1.length); // !matched => tail_start is valid

return {
.common = common,
.nibble1 = static_cast<uint8_t>(matched ? MismatchResult::invalid : *it1),
.nibble2 = *it2,
.tail1 = matched ? Path{} : p1.tail(tail_start),
.tail2 = p2.tail(tail_start),
};
}

public:
Expand All @@ -160,57 +180,44 @@ void MPTNode::insert(const Path& path, bytes&& value) // NOLINT(misc-no-recursi
// in an existing branch node. Otherwise, we need to create new branch node
// (possibly with an adjusted extended node) and transform existing nodes around it.

const auto [common, this_idx, insert_idx, this_tail, insert_tail] = mismatch(m_path, path);

switch (m_kind)
{
case Kind::branch:
{
assert(m_path.length == 0); // Branch has no path.

const auto idx = path.nibbles[0];
auto& child = m_children[idx];
if (!child)
child = leaf(path.tail(1), std::move(value));
if (auto& child = m_children[insert_idx]; child)
child->insert(insert_tail, std::move(value));
else
child->insert(path.tail(1), std::move(value));
child = leaf(insert_tail, std::move(value));
break;
}

case Kind::ext:
{
assert(m_path.length != 0); // Ext must have non-empty path.

const auto mismatch_pos = mismatch(m_path, path);

if (mismatch_pos == m_path.length) // Paths match: go into the child.
return m_children[0]->insert(path.tail(mismatch_pos), std::move(value));

const auto orig_idx = m_path.nibbles[mismatch_pos];
const auto new_idx = path.nibbles[mismatch_pos];
assert(m_path.length != 0); // Ext must have non-empty path.
if (common.length == m_path.length) // Paths match: go into the child.
return m_children[0]->insert(path.tail(common.length), std::move(value));

// The original branch node must be pushed down, possible extended with
// the adjusted extended node if the path split point is not directly at the branch node.
// Clang Analyzer bug: https://github.com/llvm/llvm-project/issues/47814
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
auto orig_branch = optional_ext(m_path.tail(mismatch_pos + 1), std::move(m_children[0]));
auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value));
*this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_branch), new_idx,
std::move(new_leaf));
auto this_branch = optional_ext(this_tail, std::move(m_children[0]));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, this_idx, std::move(this_branch), insert_idx, std::move(new_leaf));
break;
}

case Kind::leaf:
{
assert(m_path.length != 0); // Leaf must have non-empty path.

const auto mismatch_pos = mismatch(m_path, path);
assert(mismatch_pos != m_path.length); // Paths must be different.

const auto orig_idx = m_path.nibbles[mismatch_pos];
const auto new_idx = path.nibbles[mismatch_pos];
auto orig_leaf = leaf(m_path.tail(mismatch_pos + 1), std::move(m_value));
auto new_leaf = leaf(path.tail(mismatch_pos + 1), std::move(value));
*this = ext_branch(m_path.head(mismatch_pos), orig_idx, std::move(orig_leaf), new_idx,
std::move(new_leaf));
assert(m_path.length != 0); // Leaf must have non-empty path.
assert(common.length != m_path.length); // Paths must be different.
auto this_leaf = leaf(this_tail, std::move(m_value));
auto new_leaf = leaf(insert_tail, std::move(value));
*this = ext_branch(common, this_idx, std::move(this_leaf), insert_idx, std::move(new_leaf));
break;
}

Expand Down
5 changes: 5 additions & 0 deletions test/state/mpt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ constexpr auto emptyMPTHash =

/// Insert-only Merkle Patricia Trie implementation for getting the root hash
/// out of (key, value) pairs.
///
/// Limitations:
/// 1. All inserted keys must be of the same length (this is by MTP design?).
/// 2. Inserted values cannot be updated (by inserting the same key again).
/// 3. Inserted values cannot be erased.
class MPT
{
std::unique_ptr<class MPTNode> m_root;
Expand Down
23 changes: 23 additions & 0 deletions test/unittests/state_mpt_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,29 @@ TEST(state_mpt, branch_node_example1)
EXPECT_EQ(hex(trie.hash()), "1aaa6f712413b9a115730852323deb5f5d796c29151a60a1f55f41a25354cd26");
}

TEST(state_mpt, branch_node_of_3)
{
// A trie of single branch node and three leaf nodes with paths of length 2.
// The branch node has leaf nodes at positions [0], [1] and [2]. All leafs have path 0.
// {0:0 1:0 2:0}

MPT trie;
trie.insert("00"_hex, "X"_b);
trie.insert("10"_hex, "Y"_b);
trie.insert("20"_hex, "Z"_b);
EXPECT_EQ(hex(trie.hash()), "5c5154e8d108dcf8b9946c8d33730ec8178345ce9d36e6feed44f0134515482d");
}

TEST(state_mpt, leaf_node_with_empty_path)
{
// Both inserted leafs have empty path in the end.
// 0:{0:"X", 1:"Y"}
MPT trie;
trie.insert("00"_hex, "X"_b);
trie.insert("01"_hex, "Y"_b);
EXPECT_EQ(hex(trie.hash()), "0a923005d10fbd4e571655cec425db7c5091db03c33891224073a55d3abc2415");
}

TEST(state_mpt, extension_node_example1)
{
// A trie of an extension node followed by a branch node with
Expand Down

0 comments on commit 5917ee2

Please sign in to comment.