Skip to content

Commit

Permalink
Merge pull request #397 from Maxxen/dev
Browse files Browse the repository at this point in the history
Bugfixes & improve RTree delete performance
  • Loading branch information
Maxxen authored Sep 17, 2024
2 parents 4107eb7 + 823dea7 commit 43e84f2
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 29 deletions.
4 changes: 4 additions & 0 deletions spatial/include/spatial/core/geometry/bbox.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ struct Box {
Box(const V &min_p, const V &max_p) : min(min_p), max(max_p) {
}

bool IsUnbounded() const {
return *this == Box();
}

// Only does a 2D intersection check
bool Intersects(const Box &other) const {
return !(min.x > other.max.x || max.x < other.min.x || min.y > other.max.y || max.y < other.min.y);
Expand Down
3 changes: 3 additions & 0 deletions spatial/include/spatial/core/index/rtree/rtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ struct RTree {
RTreePointer MakePage(RTreeNodeType type) const;
static RTreePointer MakeRowId(row_t row_id);

string ToString() const;
void Print() const;

private:
void Free(RTreePointer &pointer);

Expand Down
44 changes: 22 additions & 22 deletions spatial/src/spatial/core/geometry/geometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
MakeMutable(geom, alloc);
}

auto used_to_have_z = geom.properties.HasZ();
auto used_to_have_m = geom.properties.HasM();
auto old_vertex_size = geom.properties.VertexSize();
const auto used_to_have_z = geom.properties.HasZ();
const auto used_to_have_m = geom.properties.HasM();
const auto old_vertex_size = geom.properties.VertexSize();

geom.properties.SetZ(has_z);
geom.properties.SetM(has_m);

auto new_vertex_size = geom.properties.VertexSize();
const auto new_vertex_size = geom.properties.VertexSize();
// Case 1: The new vertex size is larger than the old vertex size
if (new_vertex_size > old_vertex_size) {
geom.data_ptr = alloc.ReallocateAligned(geom.data_ptr, geom.data_count * old_vertex_size,
Expand All @@ -97,24 +97,24 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// 1. We go from XYM to XYZM
// This is special, because we need to slide the M value to the end of each vertex
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
auto old_m_offset = old_offset + sizeof(double) * 2;
auto new_z_offset = new_offset + sizeof(double) * 2;
auto new_m_offset = new_offset + sizeof(double) * 3;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
const auto old_m_offset = old_offset + sizeof(double) * 2;
const auto new_z_offset = new_offset + sizeof(double) * 2;
const auto new_m_offset = new_offset + sizeof(double) * 3;
// Move the M value
memcpy(geom.data_ptr + new_m_offset, geom.data_ptr + old_m_offset, sizeof(double));
// Set the new Z value
memcpy(geom.data_ptr + new_z_offset, &default_z, sizeof(double));
// Move the X and Y values
memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
}
} else if (!used_to_have_z && has_z && !used_to_have_m && has_m) {
// 2. We go from XY to XYZM
// This is special, because we need to add both the default Z and M values to the end of each vertex
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
memcpy(geom.data_ptr + new_offset + sizeof(double) * 2, &default_z, sizeof(double));
memcpy(geom.data_ptr + new_offset + sizeof(double) * 3, &default_m, sizeof(double));
Expand All @@ -125,10 +125,10 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// 4. We go from XY to XYM
// 5. We go from XYZ to XYZM
// These are all really the same, we just add the default to the end
auto default_value = has_m ? default_m : default_z;
const auto default_value = has_m ? default_m : default_z;
for (int64_t i = geom.data_count - 1; i >= 0; i--) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, old_vertex_size);
memcpy(geom.data_ptr + new_offset + old_vertex_size, &default_value, sizeof(double));
}
Expand All @@ -138,9 +138,9 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
else if (new_vertex_size == old_vertex_size) {
// This only happens when we go from XYZ -> XYM or XYM -> XYZ
// In this case we just need to set the default on the third dimension
auto default_value = has_m ? default_m : default_z;
const auto default_value = has_m ? default_m : default_z;
for (uint32_t i = 0; i < geom.data_count; i++) {
auto offset = i * new_vertex_size + sizeof(double) * 2;
const auto offset = i * new_vertex_size + sizeof(double) * 2;
memcpy(geom.data_ptr + offset, &default_value, sizeof(double));
}
}
Expand All @@ -153,17 +153,17 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo
// Special case: If we go from XYZM to XYM, we need to slide the M value to the end of each vertex
if (used_to_have_z && used_to_have_m && !has_z && has_m) {
for (uint32_t i = 0; i < geom.data_count; i++) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(new_data + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2);
auto m_offset = old_offset + sizeof(double) * 3;
const auto m_offset = old_offset + sizeof(double) * 3;
memcpy(new_data + new_offset + sizeof(double) * 2, geom.data_ptr + m_offset, sizeof(double));
}
} else {
// Otherwise, we just copy the data over
for (uint32_t i = 0; i < geom.data_count; i++) {
auto old_offset = i * old_vertex_size;
auto new_offset = i * new_vertex_size;
const auto old_offset = i * old_vertex_size;
const auto new_offset = i * new_vertex_size;
memcpy(new_data + new_offset, geom.data_ptr + old_offset, new_vertex_size);
}
}
Expand Down
90 changes: 84 additions & 6 deletions spatial/src/spatial/core/index/rtree/rtree.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "spatial/core/index/rtree/rtree.hpp"
#include "duckdb/common/printer.hpp"

namespace spatial {

Expand Down Expand Up @@ -415,6 +416,9 @@ void RTree::RootInsert(RTreeEntry &root_entry, const RTreeEntry &new_entry) {
// Delete
//------------------------------------------------------------------------------
DeleteResult RTree::NodeDelete(RTreeEntry &entry, const RTreeEntry &target, vector<RTreeEntry> &orphans) {
if(!entry.bounds.Intersects(target.bounds)) {
return {false, false, false};
}
const auto is_leaf = entry.pointer.IsLeafPage();
return is_leaf ? LeafDelete(entry, target, orphans) : BranchDelete(entry, target, orphans);
}
Expand Down Expand Up @@ -461,8 +465,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve
entry.bounds = node.GetBounds();
shrunk = entry.bounds != old_bounds;

// Assert that the bounds shrunk
D_ASSERT(entry.bounds.Area() <= old_bounds.Area());
// If the min capacity is zero, the bounds can grow when a node becomes empty
D_ASSERT(entry.bounds.IsUnbounded() || entry.bounds.Area() <= old_bounds.Area());
}
return {true, shrunk, false};
}
Expand All @@ -475,7 +479,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve
entry.bounds = node.GetBounds();
shrunk = entry.bounds != old_bounds;

D_ASSERT(entry.bounds.Area() <= old_bounds.Area());
// If the min capacity is zero, the bounds can grow when a node becomes empty
D_ASSERT(entry.bounds.IsUnbounded() || (entry.bounds.Area() <= old_bounds.Area()));
}

return {true, shrunk, false};
Expand All @@ -488,15 +493,21 @@ DeleteResult RTree::LeafDelete(RTreeEntry &entry, const RTreeEntry &target, vect

// Do a binary search with std::lower_bound to find the matching rowid
// This is faster than a linear search
auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(),
const auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(),
[](const RTreeEntry &item, const row_t &row) { return item.pointer.GetRowId() < row; });
if (it == node.end()) {
// Not found in this leaf
return {false, false, false};
}

auto &child = *it;
auto child_idx = it - node.begin();
const auto &child = *it;

// Ok, did the binary search actually find the rowid?
if(child.pointer.GetRowId() != target.pointer.GetRowId()) {
return {false, false, false};
}

const auto child_idx = it - node.begin();

D_ASSERT(child.pointer.IsRowId());

Expand Down Expand Up @@ -581,6 +592,73 @@ void RTree::RootDelete(RTreeEntry &root, const RTreeEntry &target) {
}
}

// Print as ascii tree
string RTree::ToString() const {
string result;

struct PrintState {
RTreePointer pointer;
idx_t entry_idx;
explicit PrintState(const RTreePointer &pointer_p) : pointer(pointer_p), entry_idx(0) {
}
};

vector<PrintState> stack;
idx_t level = 0;

stack.emplace_back(root.pointer);

while(!stack.empty()) {
auto &frame = stack.back();
const auto &node = Ref(frame.pointer);
const auto count = node.GetCount();

if(frame.pointer.IsLeafPage()) {
while(frame.entry_idx < count) {
auto &entry = node[frame.entry_idx];
// TODO: Print entry
for(idx_t i = 0; i < level; i++) {
result += " ";
}

result += "Leaf: " + std::to_string(entry.pointer.GetRowId()) + "\n";

frame.entry_idx++;
}
stack.pop_back();
level--;
}
else {
D_ASSERT(frame.pointer.IsBranchPage());
if(frame.entry_idx < count) {
auto &entry = node[frame.entry_idx];

// TODO: Print entry
for(idx_t i = 0; i < level; i++) {
result += " ";
}

result += "Branch: " + std::to_string(frame.entry_idx) + "\n";

frame.entry_idx++;
level++;
stack.emplace_back(entry.pointer);
} else {
stack.pop_back();
level--;
}
}
}


return result;
}

void RTree::Print() const {
Printer::Print(ToString());
}


} // namespace core

} // namespace spatial
2 changes: 1 addition & 1 deletion spatial/src/spatial/core/io/shapefile/read_shapefile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ static void ConvertStringAttributeLoop(Vector &result, int record_start, idx_t c
auto string_bytes = DBFReadStringAttribute(dbf_handle, record_idx, field_idx);
string_t result_str;
if (attribute_encoding == AttributeEncoding::LATIN1) {
conversion_buffer.reserve(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars)
conversion_buffer.resize(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars)
auto out_len =
EncodingUtil::LatinToUTF8Buffer(const_data_ptr_cast(string_bytes), conversion_buffer.data());
result_str = StringVector::AddString(result, const_char_ptr_cast(conversion_buffer.data()), out_len);
Expand Down

0 comments on commit 43e84f2

Please sign in to comment.