Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull up pushed down filters from table scan before replacing with index scan #429

Merged
merged 3 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 54 additions & 11 deletions spatial/src/spatial/core/index/rtree/rtree_index_plan_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
#include "duckdb/optimizer/column_lifetime_analyzer.hpp"
#include "duckdb/optimizer/matcher/expression_matcher.hpp"
#include "duckdb/optimizer/matcher/function_matcher.hpp"
#include "duckdb/optimizer/optimizer_extension.hpp"
#include "duckdb/optimizer/remove_unused_columns.hpp"
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression/bound_function_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/operator/logical_filter.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
#include "duckdb/planner/operator_extension.hpp"
#include "duckdb/storage/data_table.hpp"
#include "spatial/core/geometry/bbox.hpp"
#include "spatial/core/geometry/geometry_type.hpp"
#include "spatial/core/index/rtree/rtree_index.hpp"
#include "spatial/core/index/rtree/rtree_index_create_logical.hpp"
#include "spatial/core/index/rtree/rtree_index_scan.hpp"
#include "spatial/core/index/rtree/rtree_module.hpp"
#include "spatial/core/types.hpp"
#include "spatial/core/util/math.hpp"

#include "duckdb/optimizer/matcher/expression_matcher.hpp"
#include "duckdb/optimizer/matcher/function_matcher.hpp"
#include "spatial/core/index/rtree/rtree_index_create_logical.hpp"
#include <duckdb/optimizer/column_binding_replacer.hpp>
#include <duckdb/optimizer/optimizer.hpp>
#include <duckdb/planner/operator/logical_projection.hpp>

namespace spatial {

Expand Down Expand Up @@ -95,7 +99,7 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
return true;
}

static bool TryOptimize(ClientContext &context, unique_ptr<LogicalOperator> &plan) {
static bool TryOptimize(Binder &binder, ClientContext &context, unique_ptr<LogicalOperator> &plan, unique_ptr<LogicalOperator> &root) {
// Look for a FILTER with a spatial predicate followed by a LOGICAL_GET table scan
auto &op = *plan;

Expand All @@ -116,11 +120,17 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
if (filter.children.front()->type != LogicalOperatorType::LOGICAL_GET) {
return false;
}
auto &get = filter.children.front()->Cast<LogicalGet>();
auto &get_ptr = filter.children.front();
auto &get = get_ptr->Cast<LogicalGet>();
if (get.function.name != "seq_scan") {
return false;
}

// We cant optimize if the table already has filters pushed down :(
if(get.dynamic_filters && get.dynamic_filters->HasFilters()) {
return false;
}

// We can replace the scan function with a rtree index scan (if the table has a rtree index)
// Get the table
auto &table = *get.GetTable();
Expand Down Expand Up @@ -182,24 +192,57 @@ class RTreeIndexScanOptimizer : public OptimizerExtension {
return false;
}

// Replace the scan with our custom index scan function
// If there are no table filters pushed down into the get, we can just replace the get with the index scan
const auto cardinality = get.function.cardinality(context, bind_data.get());
get.function = RTreeIndexScanFunction::GetFunction();
auto cardinality = get.function.cardinality(context, bind_data.get());
get.has_estimated_cardinality = cardinality->has_estimated_cardinality;
get.estimated_cardinality = cardinality->estimated_cardinality;
get.bind_data = std::move(bind_data);

if(get.table_filters.filters.empty()) {
return true;
}
get.projection_ids.clear();
get.types.clear();

// Otherwise, things get more complicated. We need to pullup the filters from the table scan as our index scan
// does not support regular filter pushdown.
auto new_filter = make_uniq<LogicalFilter>();
auto &column_ids = get.GetColumnIds();
for(const auto &entry : get.table_filters.filters) {
idx_t column_id = entry.first;
auto &type = get.returned_types[column_id];
bool found = false;
for(idx_t i = 0; i < column_ids.size(); i++) {
if (column_ids[i] == column_id) {
column_id = i;
found = true;
break;
}
}
if (!found) {
throw InternalException("Could not find column id for filter");
}
auto column = make_uniq<BoundColumnRefExpression>(type, ColumnBinding(get.table_index, column_id));
new_filter->expressions.push_back(entry.second->ToExpression(*column));
}
new_filter->children.push_back(std::move(get_ptr));
new_filter->ResolveOperatorTypes();
get_ptr = std::move(new_filter);
return true;
}

static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
if (!TryOptimize(input.context, plan)) {
static void OptimizeRecursive(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan, unique_ptr<LogicalOperator> &root) {
if (!TryOptimize(input.optimizer.binder, input.context, plan, root)) {
// No match: continue with the children
for (auto &child : plan->children) {
Optimize(input, child);
OptimizeRecursive(input, child, root);
}
}
}

static void Optimize(OptimizerExtensionInput &input, unique_ptr<LogicalOperator> &plan) {
OptimizeRecursive(input, plan, plan);
}
};

//-----------------------------------------------------------------------------
Expand Down
Binary file added test/data/segments.parquet
Binary file not shown.
19 changes: 19 additions & 0 deletions test/sql/index/rtree_filter_pullup.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
require spatial

require parquet

query I rowsort
SELECT id FROM '__WORKING_DIRECTORY__/test/data/segments.parquet'
WHERE subtype='road' AND ST_Intersects(geometry, ST_Buffer(ST_GeomFromText('POINT (-8476562 4795814)'), 100));
----
0862aac667ffffff043df7e4c6756d14
0862aac667ffffff047de7f2111f86ad
0862aac667ffffff047ffedcbc2db0f8


query III rowsort
SELECT id, subtype, class FROM '__WORKING_DIRECTORY__/test/data/segments.parquet'
WHERE subtype='road' AND class='residential' AND ST_Intersects(geometry, ST_Buffer(ST_GeomFromText('POINT (-8476562 4795814)'), 100));
----
0862aac667ffffff047de7f2111f86ad road residential
0862aac667ffffff047ffedcbc2db0f8 road residential
Loading