From 4a6cf900c21b5536c4a1b5a07fc7a6a89749684d Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Mon, 16 Sep 2024 15:01:46 +0200 Subject: [PATCH 1/2] fix sanitizer issues --- .../src/spatial/core/geometry/geometry.cpp | 44 +++++++++---------- .../core/io/shapefile/read_shapefile.cpp | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/spatial/src/spatial/core/geometry/geometry.cpp b/spatial/src/spatial/core/geometry/geometry.cpp index 9f06451c..0358b4a9 100644 --- a/spatial/src/spatial/core/geometry/geometry.cpp +++ b/spatial/src/spatial/core/geometry/geometry.cpp @@ -79,14 +79,14 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo MakeMutable(geom, alloc); } - auto used_to_have_z = geom.properties.HasZ(); - auto used_to_have_m = geom.properties.HasM(); - auto old_vertex_size = geom.properties.VertexSize(); + const auto used_to_have_z = geom.properties.HasZ(); + const auto used_to_have_m = geom.properties.HasM(); + const auto old_vertex_size = geom.properties.VertexSize(); geom.properties.SetZ(has_z); geom.properties.SetM(has_m); - auto new_vertex_size = geom.properties.VertexSize(); + const auto new_vertex_size = geom.properties.VertexSize(); // Case 1: The new vertex size is larger than the old vertex size if (new_vertex_size > old_vertex_size) { geom.data_ptr = alloc.ReallocateAligned(geom.data_ptr, geom.data_count * old_vertex_size, @@ -97,24 +97,24 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo // 1. We go from XYM to XYZM // This is special, because we need to slide the M value to the end of each vertex for (int64_t i = geom.data_count - 1; i >= 0; i--) { - auto old_offset = i * old_vertex_size; - auto new_offset = i * new_vertex_size; - auto old_m_offset = old_offset + sizeof(double) * 2; - auto new_z_offset = new_offset + sizeof(double) * 2; - auto new_m_offset = new_offset + sizeof(double) * 3; + const auto old_offset = i * old_vertex_size; + const auto new_offset = i * new_vertex_size; + const auto old_m_offset = old_offset + sizeof(double) * 2; + const auto new_z_offset = new_offset + sizeof(double) * 2; + const auto new_m_offset = new_offset + sizeof(double) * 3; // Move the M value memcpy(geom.data_ptr + new_m_offset, geom.data_ptr + old_m_offset, sizeof(double)); // Set the new Z value memcpy(geom.data_ptr + new_z_offset, &default_z, sizeof(double)); // Move the X and Y values - memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); + memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); } } else if (!used_to_have_z && has_z && !used_to_have_m && has_m) { // 2. We go from XY to XYZM // This is special, because we need to add both the default Z and M values to the end of each vertex for (int64_t i = geom.data_count - 1; i >= 0; i--) { - auto old_offset = i * old_vertex_size; - auto new_offset = i * new_vertex_size; + const auto old_offset = i * old_vertex_size; + const auto new_offset = i * new_vertex_size; memcpy(geom.data_ptr + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); memcpy(geom.data_ptr + new_offset + sizeof(double) * 2, &default_z, sizeof(double)); memcpy(geom.data_ptr + new_offset + sizeof(double) * 3, &default_m, sizeof(double)); @@ -125,10 +125,10 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo // 4. We go from XY to XYM // 5. We go from XYZ to XYZM // These are all really the same, we just add the default to the end - auto default_value = has_m ? default_m : default_z; + const auto default_value = has_m ? default_m : default_z; for (int64_t i = geom.data_count - 1; i >= 0; i--) { - auto old_offset = i * old_vertex_size; - auto new_offset = i * new_vertex_size; + const auto old_offset = i * old_vertex_size; + const auto new_offset = i * new_vertex_size; memmove(geom.data_ptr + new_offset, geom.data_ptr + old_offset, old_vertex_size); memcpy(geom.data_ptr + new_offset + old_vertex_size, &default_value, sizeof(double)); } @@ -138,9 +138,9 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo else if (new_vertex_size == old_vertex_size) { // This only happens when we go from XYZ -> XYM or XYM -> XYZ // In this case we just need to set the default on the third dimension - auto default_value = has_m ? default_m : default_z; + const auto default_value = has_m ? default_m : default_z; for (uint32_t i = 0; i < geom.data_count; i++) { - auto offset = i * new_vertex_size + sizeof(double) * 2; + const auto offset = i * new_vertex_size + sizeof(double) * 2; memcpy(geom.data_ptr + offset, &default_value, sizeof(double)); } } @@ -153,17 +153,17 @@ void SinglePartGeometry::SetVertexType(Geometry &geom, ArenaAllocator &alloc, bo // Special case: If we go from XYZM to XYM, we need to slide the M value to the end of each vertex if (used_to_have_z && used_to_have_m && !has_z && has_m) { for (uint32_t i = 0; i < geom.data_count; i++) { - auto old_offset = i * old_vertex_size; - auto new_offset = i * new_vertex_size; + const auto old_offset = i * old_vertex_size; + const auto new_offset = i * new_vertex_size; memcpy(new_data + new_offset, geom.data_ptr + old_offset, sizeof(double) * 2); - auto m_offset = old_offset + sizeof(double) * 3; + const auto m_offset = old_offset + sizeof(double) * 3; memcpy(new_data + new_offset + sizeof(double) * 2, geom.data_ptr + m_offset, sizeof(double)); } } else { // Otherwise, we just copy the data over for (uint32_t i = 0; i < geom.data_count; i++) { - auto old_offset = i * old_vertex_size; - auto new_offset = i * new_vertex_size; + const auto old_offset = i * old_vertex_size; + const auto new_offset = i * new_vertex_size; memcpy(new_data + new_offset, geom.data_ptr + old_offset, new_vertex_size); } } diff --git a/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp b/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp index 9a15e4c1..d8c97082 100644 --- a/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp +++ b/spatial/src/spatial/core/io/shapefile/read_shapefile.cpp @@ -434,7 +434,7 @@ static void ConvertStringAttributeLoop(Vector &result, int record_start, idx_t c auto string_bytes = DBFReadStringAttribute(dbf_handle, record_idx, field_idx); string_t result_str; if (attribute_encoding == AttributeEncoding::LATIN1) { - conversion_buffer.reserve(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars) + conversion_buffer.resize(strlen(string_bytes) * 2 + 1); // worst case (all non-ascii chars) auto out_len = EncodingUtil::LatinToUTF8Buffer(const_data_ptr_cast(string_bytes), conversion_buffer.data()); result_str = StringVector::AddString(result, const_char_ptr_cast(conversion_buffer.data()), out_len); From 823dea78bd915282928b1f301ad7c3d1dbe5432b Mon Sep 17 00:00:00 2001 From: Max Gabrielsson Date: Mon, 16 Sep 2024 16:43:59 +0200 Subject: [PATCH 2/2] fix rtree deletes --- .../include/spatial/core/geometry/bbox.hpp | 4 + .../spatial/core/index/rtree/rtree.hpp | 3 + .../src/spatial/core/index/rtree/rtree.cpp | 90 +++++++++++++++++-- 3 files changed, 91 insertions(+), 6 deletions(-) diff --git a/spatial/include/spatial/core/geometry/bbox.hpp b/spatial/include/spatial/core/geometry/bbox.hpp index ea2b859c..cd1393f3 100644 --- a/spatial/include/spatial/core/geometry/bbox.hpp +++ b/spatial/include/spatial/core/geometry/bbox.hpp @@ -21,6 +21,10 @@ struct Box { Box(const V &min_p, const V &max_p) : min(min_p), max(max_p) { } + bool IsUnbounded() const { + return *this == Box(); + } + // Only does a 2D intersection check bool Intersects(const Box &other) const { return !(min.x > other.max.x || max.x < other.min.x || min.y > other.max.y || max.y < other.min.y); diff --git a/spatial/include/spatial/core/index/rtree/rtree.hpp b/spatial/include/spatial/core/index/rtree/rtree.hpp index 59b42bb7..fa8b69e1 100644 --- a/spatial/include/spatial/core/index/rtree/rtree.hpp +++ b/spatial/include/spatial/core/index/rtree/rtree.hpp @@ -86,6 +86,9 @@ struct RTree { RTreePointer MakePage(RTreeNodeType type) const; static RTreePointer MakeRowId(row_t row_id); + string ToString() const; + void Print() const; + private: void Free(RTreePointer &pointer); diff --git a/spatial/src/spatial/core/index/rtree/rtree.cpp b/spatial/src/spatial/core/index/rtree/rtree.cpp index f9794327..bf064ea7 100644 --- a/spatial/src/spatial/core/index/rtree/rtree.cpp +++ b/spatial/src/spatial/core/index/rtree/rtree.cpp @@ -1,4 +1,5 @@ #include "spatial/core/index/rtree/rtree.hpp" +#include "duckdb/common/printer.hpp" namespace spatial { @@ -415,6 +416,9 @@ void RTree::RootInsert(RTreeEntry &root_entry, const RTreeEntry &new_entry) { // Delete //------------------------------------------------------------------------------ DeleteResult RTree::NodeDelete(RTreeEntry &entry, const RTreeEntry &target, vector &orphans) { + if(!entry.bounds.Intersects(target.bounds)) { + return {false, false, false}; + } const auto is_leaf = entry.pointer.IsLeafPage(); return is_leaf ? LeafDelete(entry, target, orphans) : BranchDelete(entry, target, orphans); } @@ -461,8 +465,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve entry.bounds = node.GetBounds(); shrunk = entry.bounds != old_bounds; - // Assert that the bounds shrunk - D_ASSERT(entry.bounds.Area() <= old_bounds.Area()); + // If the min capacity is zero, the bounds can grow when a node becomes empty + D_ASSERT(entry.bounds.IsUnbounded() || entry.bounds.Area() <= old_bounds.Area()); } return {true, shrunk, false}; } @@ -475,7 +479,8 @@ DeleteResult RTree::BranchDelete(RTreeEntry &entry, const RTreeEntry &target, ve entry.bounds = node.GetBounds(); shrunk = entry.bounds != old_bounds; - D_ASSERT(entry.bounds.Area() <= old_bounds.Area()); + // If the min capacity is zero, the bounds can grow when a node becomes empty + D_ASSERT(entry.bounds.IsUnbounded() || (entry.bounds.Area() <= old_bounds.Area())); } return {true, shrunk, false}; @@ -488,15 +493,21 @@ DeleteResult RTree::LeafDelete(RTreeEntry &entry, const RTreeEntry &target, vect // Do a binary search with std::lower_bound to find the matching rowid // This is faster than a linear search - auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(), + const auto it = std::lower_bound(node.begin(), node.end(), target.pointer.GetRowId(), [](const RTreeEntry &item, const row_t &row) { return item.pointer.GetRowId() < row; }); if (it == node.end()) { // Not found in this leaf return {false, false, false}; } - auto &child = *it; - auto child_idx = it - node.begin(); + const auto &child = *it; + + // Ok, did the binary search actually find the rowid? + if(child.pointer.GetRowId() != target.pointer.GetRowId()) { + return {false, false, false}; + } + + const auto child_idx = it - node.begin(); D_ASSERT(child.pointer.IsRowId()); @@ -581,6 +592,73 @@ void RTree::RootDelete(RTreeEntry &root, const RTreeEntry &target) { } } +// Print as ascii tree +string RTree::ToString() const { + string result; + + struct PrintState { + RTreePointer pointer; + idx_t entry_idx; + explicit PrintState(const RTreePointer &pointer_p) : pointer(pointer_p), entry_idx(0) { + } + }; + + vector stack; + idx_t level = 0; + + stack.emplace_back(root.pointer); + + while(!stack.empty()) { + auto &frame = stack.back(); + const auto &node = Ref(frame.pointer); + const auto count = node.GetCount(); + + if(frame.pointer.IsLeafPage()) { + while(frame.entry_idx < count) { + auto &entry = node[frame.entry_idx]; + // TODO: Print entry + for(idx_t i = 0; i < level; i++) { + result += " "; + } + + result += "Leaf: " + std::to_string(entry.pointer.GetRowId()) + "\n"; + + frame.entry_idx++; + } + stack.pop_back(); + level--; + } + else { + D_ASSERT(frame.pointer.IsBranchPage()); + if(frame.entry_idx < count) { + auto &entry = node[frame.entry_idx]; + + // TODO: Print entry + for(idx_t i = 0; i < level; i++) { + result += " "; + } + + result += "Branch: " + std::to_string(frame.entry_idx) + "\n"; + + frame.entry_idx++; + level++; + stack.emplace_back(entry.pointer); + } else { + stack.pop_back(); + level--; + } + } + } + + + return result; +} + +void RTree::Print() const { + Printer::Print(ToString()); +} + + } // namespace core } // namespace spatial \ No newline at end of file