From 1b635bd6868cef1861a9d81f5db9cc1128124507 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 27 Aug 2024 18:49:43 +0200 Subject: [PATCH 01/11] there is another rule for this where filters on casts can be pushed down if they are invertible. I.e you cannot test this with in as the first type because all ints can be cast to varcahr. the first type must be of a type where not all vaulues can cast down. --- .../duckdb/optimizer/filter_pushdown.hpp | 1 + .../pushdown/pushdown_projection.cpp | 41 +++++++++++++++++++ .../test_no_pushdown_cast_into_cte.test | 41 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 test/sql/optimizer/test_no_pushdown_cast_into_cte.test diff --git a/src/include/duckdb/optimizer/filter_pushdown.hpp b/src/include/duckdb/optimizer/filter_pushdown.hpp index 94706d21016..c6b55279db3 100644 --- a/src/include/duckdb/optimizer/filter_pushdown.hpp +++ b/src/include/duckdb/optimizer/filter_pushdown.hpp @@ -26,6 +26,7 @@ class FilterPushdown { ClientContext &GetContext(); void CheckMarkToSemi(LogicalOperator &op, unordered_set &table_bindings); + bool FilterInputsChangeTypes(Expression &filter, vector> &expressions); struct Filter { unordered_set bindings; diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index ec82b39cd4f..b5ba10234f2 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -37,6 +37,47 @@ static unique_ptr ReplaceProjectionBindings(LogicalProjection &proj, return expr; } +bool FilterPushdown::FilterInputsChangeTypes(Expression &filter, vector> &expressions) { + // iterate through expressions of the filter and gather the columns the filter performs on + unordered_set columns; + ExpressionIterator::EnumerateChildren(filter, [&](Expression &child) { + switch (child.expression_class) { + case ExpressionClass::BOUND_COLUMN_REF: { + auto &colref = child.Cast(); + columns.insert(colref.binding.column_index); + break; + } + default: + break; + } + }); + + D_ASSERT(columns.size() <= expressions.size()); + + // go through the projected columns in the filter, if a column has a cast it is changing types + // from what it was below the projection. That means we cannot push this filter down. + bool to_return = false; + for (auto &column : columns) { + auto &proj_expr = expressions.at(column); + ExpressionIterator::EnumerateExpression(proj_expr, [&](Expression &child) { + switch (child.expression_class) { + case ExpressionClass::CAST: + case ExpressionClass::BOUND_CAST: { + to_return = true; + return; + } + default: + break; + } + }); + if (to_return) { + break; + } + } + + return to_return; +} + unique_ptr FilterPushdown::PushdownProjection(unique_ptr op) { D_ASSERT(op->type == LogicalOperatorType::LOGICAL_PROJECTION); auto &proj = op->Cast(); diff --git a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test new file mode 100644 index 00000000000..4b303a9efc7 --- /dev/null +++ b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test @@ -0,0 +1,41 @@ +# name: test/sql/optimizer/test_no_pushdown_cast_into_cte.test +# description: No Pushdown cast into cte +# group: [optimizer] + +query II +WITH t(a, b) AS ( + SELECT a :: int, b :: int + FROM (VALUES + ('1', '4'), + ('5', '3'), + ('2', '*'), + ('3', '8'), + ('7', '*')) AS _(a, b) + WHERE position('*' in b) = 0 +) +SELECT a, b +FROM t +WHERE a < b; +---- +1 4 +3 8 + + +#query II +#select a, b from +#( +# select (range*2)::VARCHAR a, range::VARCHAR b from range(120) +#where range % 2 = 0 +#) as t(a, b) where a > b; + + + +#with t(a, b) as ( +# select a :: varchar, b :: varchar +# FROM VALUES +# (1, 2), +# (3, 4), +# (5, 6), +# (7, 10) as +# _(a, b) where a + 1 = b +#) select a, b from t; \ No newline at end of file From f1375bb6994f216c8d6d29b52fe255596e1c54c3 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 28 Aug 2024 10:54:07 +0200 Subject: [PATCH 02/11] make sure pushdown is still allowed when the maximum logical type is the child type --- .../duckdb/optimizer/filter_pushdown.hpp | 2 +- .../pushdown/pushdown_projection.cpp | 28 +++++++-- .../test_no_pushdown_cast_into_cte.test | 60 ++++++++++++++----- 3 files changed, 68 insertions(+), 22 deletions(-) diff --git a/src/include/duckdb/optimizer/filter_pushdown.hpp b/src/include/duckdb/optimizer/filter_pushdown.hpp index c6b55279db3..dca100d17f9 100644 --- a/src/include/duckdb/optimizer/filter_pushdown.hpp +++ b/src/include/duckdb/optimizer/filter_pushdown.hpp @@ -26,7 +26,7 @@ class FilterPushdown { ClientContext &GetContext(); void CheckMarkToSemi(LogicalOperator &op, unordered_set &table_bindings); - bool FilterInputsChangeTypes(Expression &filter, vector> &expressions); + static bool FilterInputsChangeTypes(unique_ptr filter, vector> &expressions); struct Filter { unordered_set bindings; diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index b5ba10234f2..2cf56656f2f 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -4,6 +4,9 @@ #include "duckdb/planner/operator/logical_empty_result.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include +#include + namespace duckdb { static bool IsVolatile(LogicalProjection &proj, const unique_ptr &expr) { @@ -37,10 +40,11 @@ static unique_ptr ReplaceProjectionBindings(LogicalProjection &proj, return expr; } -bool FilterPushdown::FilterInputsChangeTypes(Expression &filter, vector> &expressions) { +bool FilterPushdown::FilterInputsChangeTypes(unique_ptr filter, + vector> &expressions) { // iterate through expressions of the filter and gather the columns the filter performs on unordered_set columns; - ExpressionIterator::EnumerateChildren(filter, [&](Expression &child) { + ExpressionIterator::EnumerateExpression(filter, [&](Expression &child) { switch (child.expression_class) { case ExpressionClass::BOUND_COLUMN_REF: { auto &colref = child.Cast(); @@ -61,10 +65,22 @@ bool FilterPushdown::FilterInputsChangeTypes(Expression &filter, vector(); + // if the max logical type is what is being cast to, then you can push down the filter. + // i.e if you are casting from int to varchar, every int value can cast to varchar, so you can push the + // filter down if you cast from varchar to int, there may be reason to not push the filter down, as + // another filter will filter out varchar values that cannot be cast to int). See + // test_no_pushdown_cast.test + if (LogicalType::ForceMaxLogicalType(cast.return_type.id(), cast.child->return_type.id()).id() == + cast.child->return_type.id()) { + to_return = true; + } + break; } default: break; @@ -92,7 +108,7 @@ unique_ptr FilterPushdown::PushdownProjection(unique_ptrCopy(), proj.expressions)) { // We can't push down related expressions if the column in the // expression is generated by the functions which have side effects remain_expressions.push_back(std::move(f.filter)); diff --git a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test index 4b303a9efc7..4465ee74847 100644 --- a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test +++ b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test @@ -20,22 +20,52 @@ WHERE a < b; 1 4 3 8 +# INT can always be cast to varchar, so these +query II +with t(a, b) as ( + select a :: varchar, b :: varchar + FROM VALUES + (1, 2), + (3, 3), + (5, 6), + (7, 6) as + _(a, b) where a <= b +) select a, b from t where a[1] = '1'; +---- +1 2 -#query II -#select a, b from -#( -# select (range*2)::VARCHAR a, range::VARCHAR b from range(120) -#where range % 2 = 0 -#) as t(a, b) where a > b; +statement ok +pragma explain_output='optimized_only'; + +query II +EXPLAIN WITH t(a, b) AS ( + SELECT a :: int, b :: int + FROM (VALUES + ('1', '4'), + ('5', '3'), + ('2', '*'), + ('3', '8'), + ('7', '*')) AS _(a, b) + WHERE position('*' in b) = 0 +) +SELECT a, b +FROM t +WHERE a < b; +---- +logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* -#with t(a, b) as ( -# select a :: varchar, b :: varchar -# FROM VALUES -# (1, 2), -# (3, 4), -# (5, 6), -# (7, 10) as -# _(a, b) where a + 1 = b -#) select a, b from t; \ No newline at end of file +# we should not see two filters, since the filter can be pushed to just above the column data scan +query II +explain with t(a, b) as ( + select a :: varchar, b :: varchar + FROM VALUES + (1, 2), + (3, 3), + (5, 6), + (7, 6) as + _(a, b) where a <= b +) select a, b from t where a[1] = '1'; +---- +logical_opt :.*FILTER.*PROJECTION.*FILTER.* From c53346efe818efe5700e50984865019aa9987800 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 28 Aug 2024 10:58:55 +0200 Subject: [PATCH 03/11] remove unused variables --- src/optimizer/pushdown/pushdown_projection.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index 2cf56656f2f..eee2c6b2e32 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -65,10 +65,6 @@ bool FilterPushdown::FilterInputsChangeTypes(unique_ptr filter, auto &proj_expr = expressions.at(column); ExpressionIterator::EnumerateExpression(proj_expr, [&](Expression &child) { switch (child.expression_class) { - case ExpressionClass::CAST: { - bool break_here = 0; - break; - } case ExpressionClass::BOUND_CAST: { auto &cast = child.Cast(); // if the max logical type is what is being cast to, then you can push down the filter. From 59396c3fa510f51e0c896f593e64b229ba34d81b Mon Sep 17 00:00:00 2001 From: Tmonster Date: Wed, 28 Aug 2024 16:43:46 +0200 Subject: [PATCH 04/11] remove unused include statement --- src/optimizer/pushdown/pushdown_projection.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index eee2c6b2e32..ab380381245 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -4,9 +4,6 @@ #include "duckdb/planner/operator/logical_empty_result.hpp" #include "duckdb/planner/operator/logical_projection.hpp" -#include -#include - namespace duckdb { static bool IsVolatile(LogicalProjection &proj, const unique_ptr &expr) { From 103bd1ca2d52099dfc9d09c7f600f2c21dc5ee95 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 29 Aug 2024 10:38:34 +0200 Subject: [PATCH 05/11] fixup logic to check projection cast expressions --- .../duckdb/optimizer/filter_pushdown.hpp | 1 - .../pushdown/pushdown_projection.cpp | 74 ++++-------- .../test_no_pushdown_cast_into_cte.test | 107 +++++++++--------- 3 files changed, 78 insertions(+), 104 deletions(-) diff --git a/src/include/duckdb/optimizer/filter_pushdown.hpp b/src/include/duckdb/optimizer/filter_pushdown.hpp index dca100d17f9..94706d21016 100644 --- a/src/include/duckdb/optimizer/filter_pushdown.hpp +++ b/src/include/duckdb/optimizer/filter_pushdown.hpp @@ -26,7 +26,6 @@ class FilterPushdown { ClientContext &GetContext(); void CheckMarkToSemi(LogicalOperator &op, unordered_set &table_bindings); - static bool FilterInputsChangeTypes(unique_ptr filter, vector> &expressions); struct Filter { unordered_set bindings; diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index ab380381245..35049ecd63b 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -3,6 +3,7 @@ #include "duckdb/planner/expression_iterator.hpp" #include "duckdb/planner/operator/logical_empty_result.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/expression/bound_cast_expression.hpp" namespace duckdb { @@ -23,6 +24,27 @@ static bool IsVolatile(LogicalProjection &proj, const unique_ptr &ex return is_volatile; } +static bool ProjectionCastsToLowerType(LogicalProjection &proj, unique_ptr expr) { + auto changes_type = false; + if (expr->type == ExpressionType::BOUND_COLUMN_REF) { + auto &colref = expr->Cast(); + D_ASSERT(colref.binding.table_index == proj.table_index); + D_ASSERT(colref.binding.column_index < proj.expressions.size()); + D_ASSERT(colref.depth == 0); + // replace the binding with a copy to the expression at the referenced index + auto &proj_expr = proj.expressions[colref.binding.column_index]; + // if max logical type is the child type, then the cast could be VARCHAR -> INT + // do not allow pushdown as another filter may filter out non castable VARCHARS + if (LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { + changes_type = true; + } + } + ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr &child) { + changes_type |= ProjectionCastsToLowerType(proj, std::move(child)); + }); + return changes_type; +} + static unique_ptr ReplaceProjectionBindings(LogicalProjection &proj, unique_ptr expr) { if (expr->type == ExpressionType::BOUND_COLUMN_REF) { auto &colref = expr->Cast(); @@ -37,56 +59,6 @@ static unique_ptr ReplaceProjectionBindings(LogicalProjection &proj, return expr; } -bool FilterPushdown::FilterInputsChangeTypes(unique_ptr filter, - vector> &expressions) { - // iterate through expressions of the filter and gather the columns the filter performs on - unordered_set columns; - ExpressionIterator::EnumerateExpression(filter, [&](Expression &child) { - switch (child.expression_class) { - case ExpressionClass::BOUND_COLUMN_REF: { - auto &colref = child.Cast(); - columns.insert(colref.binding.column_index); - break; - } - default: - break; - } - }); - - D_ASSERT(columns.size() <= expressions.size()); - - // go through the projected columns in the filter, if a column has a cast it is changing types - // from what it was below the projection. That means we cannot push this filter down. - bool to_return = false; - for (auto &column : columns) { - auto &proj_expr = expressions.at(column); - ExpressionIterator::EnumerateExpression(proj_expr, [&](Expression &child) { - switch (child.expression_class) { - case ExpressionClass::BOUND_CAST: { - auto &cast = child.Cast(); - // if the max logical type is what is being cast to, then you can push down the filter. - // i.e if you are casting from int to varchar, every int value can cast to varchar, so you can push the - // filter down if you cast from varchar to int, there may be reason to not push the filter down, as - // another filter will filter out varchar values that cannot be cast to int). See - // test_no_pushdown_cast.test - if (LogicalType::ForceMaxLogicalType(cast.return_type.id(), cast.child->return_type.id()).id() == - cast.child->return_type.id()) { - to_return = true; - } - break; - } - default: - break; - } - }); - if (to_return) { - break; - } - } - - return to_return; -} - unique_ptr FilterPushdown::PushdownProjection(unique_ptr op) { D_ASSERT(op->type == LogicalOperatorType::LOGICAL_PROJECTION); auto &proj = op->Cast(); @@ -101,7 +73,7 @@ unique_ptr FilterPushdown::PushdownProjection(unique_ptrCopy(), proj.expressions)) { + if (is_volatile || ProjectionCastsToLowerType(proj, f.filter->Copy())) { // We can't push down related expressions if the column in the // expression is generated by the functions which have side effects remain_expressions.push_back(std::move(f.filter)); diff --git a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test index 4465ee74847..ddafa9bdb22 100644 --- a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test +++ b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test @@ -2,63 +2,51 @@ # description: No Pushdown cast into cte # group: [optimizer] -query II -WITH t(a, b) AS ( - SELECT a :: int, b :: int - FROM (VALUES - ('1', '4'), - ('5', '3'), - ('2', '*'), - ('3', '8'), - ('7', '*')) AS _(a, b) - WHERE position('*' in b) = 0 -) -SELECT a, b -FROM t -WHERE a < b; ----- -1 4 -3 8 - -# INT can always be cast to varchar, so these -query II -with t(a, b) as ( - select a :: varchar, b :: varchar - FROM VALUES - (1, 2), - (3, 3), - (5, 6), - (7, 6) as - _(a, b) where a <= b -) select a, b from t where a[1] = '1'; ----- -1 2 - - statement ok pragma explain_output='optimized_only'; -query II -EXPLAIN WITH t(a, b) AS ( - SELECT a :: int, b :: int - FROM (VALUES - ('1', '4'), - ('5', '3'), - ('2', '*'), - ('3', '8'), - ('7', '*')) AS _(a, b) - WHERE position('*' in b) = 0 -) -SELECT a, b -FROM t -WHERE a < b; ----- -logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* +#query II +#WITH t(a, b) AS ( +# SELECT a :: int, b :: int +# FROM (VALUES +# ('1', '4'), +# ('5', '3'), +# ('2', '*'), +# ('3', '8'), +# ('7', '*')) AS _(a, b) +# WHERE position('*' in b) = 0 +#) +#SELECT a, b +#FROM t +#WHERE a < b; +#---- +#1 4 +#3 8 -# we should not see two filters, since the filter can be pushed to just above the column data scan +# check filter is above projection that casts the varchar to int +#query II +#EXPLAIN WITH t(a, b) AS ( +# SELECT a :: int, b :: int +# FROM (VALUES +# ('1', '4'), +# ('5', '3'), +# ('2', '*'), +# ('3', '8'), +# ('7', '*')) AS _(a, b) +# WHERE position('*' in b) = 0 +#) +#SELECT a, b +#FROM t +#WHERE a < b; +#---- +#logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* + + +# INT can always be cast to varchar, so the filter a[1] = '1' +# can be pushed down query II -explain with t(a, b) as ( +with t(a, b) as ( select a :: varchar, b :: varchar FROM VALUES (1, 2), @@ -68,4 +56,19 @@ explain with t(a, b) as ( _(a, b) where a <= b ) select a, b from t where a[1] = '1'; ---- -logical_opt :.*FILTER.*PROJECTION.*FILTER.* +1 2 + + +## we should not see two filters, since the filter can be pushed to just above the column data scan +#query II +#explain with t(a, b) as ( +# select a :: varchar, b :: varchar +# FROM VALUES +# (1, 2), +# (3, 3), +# (5, 6), +# (7, 6) as +# _(a, b) where a <= b +#) select a, b from t where a[1] = '1'; +#---- +#logical_opt :.*FILTER.*PROJECTION.*FILTER.* From 88b31516936de341f729f6136ecd603019b9e1bd Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 29 Aug 2024 11:11:16 +0200 Subject: [PATCH 06/11] if types are equal then allow filter pushdown --- src/optimizer/pushdown/pushdown_projection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index 35049ecd63b..86fcbf94184 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -35,7 +35,7 @@ static bool ProjectionCastsToLowerType(LogicalProjection &proj, unique_ptr INT // do not allow pushdown as another filter may filter out non castable VARCHARS - if (LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { + if (expr->return_type != proj_expr->return_type && LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { changes_type = true; } } From 67027bbf58e58131e01ac4397d7b128d6c78fc00 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 29 Aug 2024 11:22:34 +0200 Subject: [PATCH 07/11] make format-fix --- src/optimizer/pushdown/pushdown_projection.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index 86fcbf94184..e2056325fc4 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -35,7 +35,8 @@ static bool ProjectionCastsToLowerType(LogicalProjection &proj, unique_ptr INT // do not allow pushdown as another filter may filter out non castable VARCHARS - if (expr->return_type != proj_expr->return_type && LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { + if (expr->return_type != proj_expr->return_type && + LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { changes_type = true; } } From d476ce1e73d76b46c018fa226ae654b871229ce9 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 2 Sep 2024 12:49:21 +0200 Subject: [PATCH 08/11] add back in test cases --- .../pushdown/pushdown_projection.cpp | 28 ++++-- .../test_no_pushdown_cast_into_cte.test | 92 +++++++++---------- 2 files changed, 68 insertions(+), 52 deletions(-) diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index e2056325fc4..5ecc1396c4b 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -4,6 +4,7 @@ #include "duckdb/planner/operator/logical_empty_result.hpp" #include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/expression/bound_cast_expression.hpp" +#include "duckdb/common/types.hpp" namespace duckdb { @@ -24,7 +25,23 @@ static bool IsVolatile(LogicalProjection &proj, const unique_ptr &ex return is_volatile; } -static bool ProjectionCastsToLowerType(LogicalProjection &proj, unique_ptr expr) { +static bool ProjectionCastsToLowerType(Expression &proj_expr) { + bool changes_type = false; + if (proj_expr.expression_class == ExpressionClass::BOUND_CAST) { + auto &cast = proj_expr.Cast(); + const auto return_type = proj_expr.return_type; + const auto child_type = cast.child->return_type; + if (return_type.id() != child_type.id() && + LogicalType::ForceMaxLogicalType(return_type, child_type) == child_type.id()) { + return true; + } + } + ExpressionIterator::EnumerateChildren( + proj_expr, [&](unique_ptr &child) { changes_type |= ProjectionCastsToLowerType(*child); }); + return changes_type; +} + +static bool ProjectionChangesExpressionType(LogicalProjection &proj, unique_ptr expr) { auto changes_type = false; if (expr->type == ExpressionType::BOUND_COLUMN_REF) { auto &colref = expr->Cast(); @@ -35,13 +52,12 @@ static bool ProjectionCastsToLowerType(LogicalProjection &proj, unique_ptr INT // do not allow pushdown as another filter may filter out non castable VARCHARS - if (expr->return_type != proj_expr->return_type && - LogicalType::ForceMaxLogicalType(expr->return_type, proj_expr->return_type) == expr->return_type) { - changes_type = true; + if (ProjectionCastsToLowerType(*proj_expr)) { + return true; } } ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr &child) { - changes_type |= ProjectionCastsToLowerType(proj, std::move(child)); + changes_type |= ProjectionChangesExpressionType(proj, std::move(child)); }); return changes_type; } @@ -74,7 +90,7 @@ unique_ptr FilterPushdown::PushdownProjection(unique_ptrCopy())) { + if (is_volatile || ProjectionChangesExpressionType(proj, f.filter->Copy())) { // We can't push down related expressions if the column in the // expression is generated by the functions which have side effects remain_expressions.push_back(std::move(f.filter)); diff --git a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test index ddafa9bdb22..ce257266096 100644 --- a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test +++ b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test @@ -5,42 +5,42 @@ statement ok pragma explain_output='optimized_only'; -#query II -#WITH t(a, b) AS ( -# SELECT a :: int, b :: int -# FROM (VALUES -# ('1', '4'), -# ('5', '3'), -# ('2', '*'), -# ('3', '8'), -# ('7', '*')) AS _(a, b) -# WHERE position('*' in b) = 0 -#) -#SELECT a, b -#FROM t -#WHERE a < b; -#---- -#1 4 -#3 8 +query II +WITH t(a, b) AS ( + SELECT a :: int, b :: int + FROM (VALUES + ('1', '4'), + ('5', '3'), + ('2', '*'), + ('3', '8'), + ('7', '*')) AS _(a, b) + WHERE position('*' in b) = 0 +) +SELECT a, b +FROM t +WHERE a < b; +---- +1 4 +3 8 # check filter is above projection that casts the varchar to int -#query II -#EXPLAIN WITH t(a, b) AS ( -# SELECT a :: int, b :: int -# FROM (VALUES -# ('1', '4'), -# ('5', '3'), -# ('2', '*'), -# ('3', '8'), -# ('7', '*')) AS _(a, b) -# WHERE position('*' in b) = 0 -#) -#SELECT a, b -#FROM t -#WHERE a < b; -#---- -#logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* +query II +EXPLAIN WITH t(a, b) AS ( + SELECT a :: int, b :: int + FROM (VALUES + ('1', '4'), + ('5', '3'), + ('2', '*'), + ('3', '8'), + ('7', '*')) AS _(a, b) + WHERE position('*' in b) = 0 +) +SELECT a, b +FROM t +WHERE a < b; +---- +logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* # INT can always be cast to varchar, so the filter a[1] = '1' @@ -59,16 +59,16 @@ with t(a, b) as ( 1 2 -## we should not see two filters, since the filter can be pushed to just above the column data scan -#query II -#explain with t(a, b) as ( -# select a :: varchar, b :: varchar -# FROM VALUES -# (1, 2), -# (3, 3), -# (5, 6), -# (7, 6) as -# _(a, b) where a <= b -#) select a, b from t where a[1] = '1'; -#---- -#logical_opt :.*FILTER.*PROJECTION.*FILTER.* +# we should not see two filters, since the filter can be pushed to just above the column data scan +query II +explain with t(a, b) as ( + select a :: varchar, b :: varchar + FROM VALUES + (1, 2), + (3, 3), + (5, 6), + (7, 6) as + _(a, b) where a <= b +) select a, b from t where a[1] = '1'; +---- +logical_opt :.*FILTER.*PROJECTION.*FILTER.* From f3440a3284e81fabf23040d4f947ee865c053778 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 11 Oct 2024 09:44:46 +0200 Subject: [PATCH 09/11] use CanThrow instead of 'changestype' --- src/include/duckdb/parser/base_expression.hpp | 3 ++ .../parser/expression/cast_expression.hpp | 1 + .../duckdb/parser/parsed_expression.hpp | 1 + src/include/duckdb/planner/expression.hpp | 1 + .../expression/bound_cast_expression.hpp | 2 + .../pushdown/pushdown_projection.cpp | 39 +------------------ src/parser/expression/cast_expression.cpp | 5 +++ src/parser/parsed_expression.cpp | 7 ++++ src/planner/expression.cpp | 6 +++ .../expression/bound_cast_expression.cpp | 11 ++++++ .../test_no_pushdown_cast_into_cte.test | 2 +- 11 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/include/duckdb/parser/base_expression.hpp b/src/include/duckdb/parser/base_expression.hpp index a64baf7243f..07b400aacb7 100644 --- a/src/include/duckdb/parser/base_expression.hpp +++ b/src/include/duckdb/parser/base_expression.hpp @@ -62,6 +62,9 @@ class BaseExpression { virtual bool IsScalar() const = 0; //! Returns true if the expression has a parameter virtual bool HasParameter() const = 0; + //! Returns true if executing the base expression can throw an error + //! I.e Cast('a' to INT). + virtual bool CanThrow() const = 0; //! Get the name of the expression virtual string GetName() const; diff --git a/src/include/duckdb/parser/expression/cast_expression.hpp b/src/include/duckdb/parser/expression/cast_expression.hpp index 7c48e0ddec2..c16bd90a754 100644 --- a/src/include/duckdb/parser/expression/cast_expression.hpp +++ b/src/include/duckdb/parser/expression/cast_expression.hpp @@ -34,6 +34,7 @@ class CastExpression : public ParsedExpression { static bool Equal(const CastExpression &a, const CastExpression &b); unique_ptr Copy() const override; + bool CanThrow() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/include/duckdb/parser/parsed_expression.hpp b/src/include/duckdb/parser/parsed_expression.hpp index 46acfe9e60f..a3725706e3e 100644 --- a/src/include/duckdb/parser/parsed_expression.hpp +++ b/src/include/duckdb/parser/parsed_expression.hpp @@ -39,6 +39,7 @@ class ParsedExpression : public BaseExpression { bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; + bool CanThrow() const override; bool Equals(const BaseExpression &other) const override; hash_t Hash() const override; diff --git a/src/include/duckdb/planner/expression.hpp b/src/include/duckdb/planner/expression.hpp index a7f9045f989..c5b2d6115b5 100644 --- a/src/include/duckdb/planner/expression.hpp +++ b/src/include/duckdb/planner/expression.hpp @@ -32,6 +32,7 @@ class Expression : public BaseExpression { bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; + bool CanThrow() const override; virtual bool IsVolatile() const; virtual bool IsConsistent() const; virtual bool PropagatesNullValues() const; diff --git a/src/include/duckdb/planner/expression/bound_cast_expression.hpp b/src/include/duckdb/planner/expression/bound_cast_expression.hpp index d45ac502f35..c625fb4f9d9 100644 --- a/src/include/duckdb/planner/expression/bound_cast_expression.hpp +++ b/src/include/duckdb/planner/expression/bound_cast_expression.hpp @@ -54,6 +54,8 @@ class BoundCastExpression : public Expression { unique_ptr Copy() const override; + bool CanThrow() const override; + void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/optimizer/pushdown/pushdown_projection.cpp b/src/optimizer/pushdown/pushdown_projection.cpp index 5ecc1396c4b..f66ce66fd89 100644 --- a/src/optimizer/pushdown/pushdown_projection.cpp +++ b/src/optimizer/pushdown/pushdown_projection.cpp @@ -25,43 +25,6 @@ static bool IsVolatile(LogicalProjection &proj, const unique_ptr &ex return is_volatile; } -static bool ProjectionCastsToLowerType(Expression &proj_expr) { - bool changes_type = false; - if (proj_expr.expression_class == ExpressionClass::BOUND_CAST) { - auto &cast = proj_expr.Cast(); - const auto return_type = proj_expr.return_type; - const auto child_type = cast.child->return_type; - if (return_type.id() != child_type.id() && - LogicalType::ForceMaxLogicalType(return_type, child_type) == child_type.id()) { - return true; - } - } - ExpressionIterator::EnumerateChildren( - proj_expr, [&](unique_ptr &child) { changes_type |= ProjectionCastsToLowerType(*child); }); - return changes_type; -} - -static bool ProjectionChangesExpressionType(LogicalProjection &proj, unique_ptr expr) { - auto changes_type = false; - if (expr->type == ExpressionType::BOUND_COLUMN_REF) { - auto &colref = expr->Cast(); - D_ASSERT(colref.binding.table_index == proj.table_index); - D_ASSERT(colref.binding.column_index < proj.expressions.size()); - D_ASSERT(colref.depth == 0); - // replace the binding with a copy to the expression at the referenced index - auto &proj_expr = proj.expressions[colref.binding.column_index]; - // if max logical type is the child type, then the cast could be VARCHAR -> INT - // do not allow pushdown as another filter may filter out non castable VARCHARS - if (ProjectionCastsToLowerType(*proj_expr)) { - return true; - } - } - ExpressionIterator::EnumerateChildren(*expr, [&](unique_ptr &child) { - changes_type |= ProjectionChangesExpressionType(proj, std::move(child)); - }); - return changes_type; -} - static unique_ptr ReplaceProjectionBindings(LogicalProjection &proj, unique_ptr expr) { if (expr->type == ExpressionType::BOUND_COLUMN_REF) { auto &colref = expr->Cast(); @@ -90,7 +53,7 @@ unique_ptr FilterPushdown::PushdownProjection(unique_ptrCopy())) { + if (is_volatile || f.filter->CanThrow()) { // We can't push down related expressions if the column in the // expression is generated by the functions which have side effects remain_expressions.push_back(std::move(f.filter)); diff --git a/src/parser/expression/cast_expression.cpp b/src/parser/expression/cast_expression.cpp index 758cc29ce02..602f43355ce 100644 --- a/src/parser/expression/cast_expression.cpp +++ b/src/parser/expression/cast_expression.cpp @@ -21,6 +21,11 @@ string CastExpression::ToString() const { return ToString(*this); } +bool CastExpression::CanThrow() const { + // Cast('a' to INT) can throw + return true; +} + bool CastExpression::Equal(const CastExpression &a, const CastExpression &b) { if (!a.child->Equals(*b.child)) { return false; diff --git a/src/parser/parsed_expression.cpp b/src/parser/parsed_expression.cpp index 617d60a3d89..03184c6dd3d 100644 --- a/src/parser/parsed_expression.cpp +++ b/src/parser/parsed_expression.cpp @@ -48,6 +48,13 @@ bool ParsedExpression::HasSubquery() const { return has_subquery; } +bool ParsedExpression::CanThrow() const { + bool can_throw = false; + ParsedExpressionIterator::EnumerateChildren(*this, + [&](const ParsedExpression &child) { can_throw |= child.CanThrow(); }); + return can_throw; +} + bool ParsedExpression::Equals(const BaseExpression &other) const { if (!BaseExpression::Equals(other)) { return false; diff --git a/src/planner/expression.cpp b/src/planner/expression.cpp index 9fa426b8a37..3e9539b0c4a 100644 --- a/src/planner/expression.cpp +++ b/src/planner/expression.cpp @@ -58,6 +58,12 @@ bool Expression::IsConsistent() const { return is_consistent; } +bool Expression::CanThrow() const { + bool can_throw = false; + ExpressionIterator::EnumerateChildren(*this, [&](const Expression &child) { can_throw |= child.CanThrow(); }); + return can_throw; +} + bool Expression::PropagatesNullValues() const { if (type == ExpressionType::OPERATOR_IS_NULL || type == ExpressionType::OPERATOR_IS_NOT_NULL || type == ExpressionType::COMPARE_NOT_DISTINCT_FROM || type == ExpressionType::COMPARE_DISTINCT_FROM || diff --git a/src/planner/expression/bound_cast_expression.cpp b/src/planner/expression/bound_cast_expression.cpp index 1c8dc951872..392d38c0c87 100644 --- a/src/planner/expression/bound_cast_expression.cpp +++ b/src/planner/expression/bound_cast_expression.cpp @@ -217,4 +217,15 @@ unique_ptr BoundCastExpression::Copy() const { return std::move(copy); } +bool BoundCastExpression::CanThrow() const { + const auto child_type = child->return_type; + if (return_type.id() != child_type.id() && + LogicalType::ForceMaxLogicalType(return_type, child_type) == child_type.id()) { + return true; + } + bool changes_type = false; + ExpressionIterator::EnumerateChildren(*this, [&](const Expression &child) { changes_type |= child.CanThrow(); }); + return changes_type; +} + } // namespace duckdb diff --git a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test index ce257266096..121240a812e 100644 --- a/test/sql/optimizer/test_no_pushdown_cast_into_cte.test +++ b/test/sql/optimizer/test_no_pushdown_cast_into_cte.test @@ -40,7 +40,7 @@ SELECT a, b FROM t WHERE a < b; ---- -logical_opt :.*FILTER.*\(a < b\).*PROJECTION.*CAST.*CAST.*FILTER.*position.* +logical_opt :.*FILTER.*CAST\(a AS INTEGER.*<.*b AS INTEGER\).*PROJECTION.*FILTER.*position.* # INT can always be cast to varchar, so the filter a[1] = '1' From c6c4a994b9340217de562719013a14325f8019da Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 14 Oct 2024 16:13:31 +0200 Subject: [PATCH 10/11] CanThrow is only for BoundExpressionsr --- src/include/duckdb/parser/base_expression.hpp | 3 --- src/include/duckdb/parser/expression/cast_expression.hpp | 1 - src/include/duckdb/parser/parsed_expression.hpp | 1 - src/include/duckdb/planner/expression.hpp | 3 ++- src/parser/expression/cast_expression.cpp | 5 ----- src/parser/parsed_expression.cpp | 7 ------- 6 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/include/duckdb/parser/base_expression.hpp b/src/include/duckdb/parser/base_expression.hpp index 07b400aacb7..a64baf7243f 100644 --- a/src/include/duckdb/parser/base_expression.hpp +++ b/src/include/duckdb/parser/base_expression.hpp @@ -62,9 +62,6 @@ class BaseExpression { virtual bool IsScalar() const = 0; //! Returns true if the expression has a parameter virtual bool HasParameter() const = 0; - //! Returns true if executing the base expression can throw an error - //! I.e Cast('a' to INT). - virtual bool CanThrow() const = 0; //! Get the name of the expression virtual string GetName() const; diff --git a/src/include/duckdb/parser/expression/cast_expression.hpp b/src/include/duckdb/parser/expression/cast_expression.hpp index c16bd90a754..7c48e0ddec2 100644 --- a/src/include/duckdb/parser/expression/cast_expression.hpp +++ b/src/include/duckdb/parser/expression/cast_expression.hpp @@ -34,7 +34,6 @@ class CastExpression : public ParsedExpression { static bool Equal(const CastExpression &a, const CastExpression &b); unique_ptr Copy() const override; - bool CanThrow() const override; void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/include/duckdb/parser/parsed_expression.hpp b/src/include/duckdb/parser/parsed_expression.hpp index a3725706e3e..46acfe9e60f 100644 --- a/src/include/duckdb/parser/parsed_expression.hpp +++ b/src/include/duckdb/parser/parsed_expression.hpp @@ -39,7 +39,6 @@ class ParsedExpression : public BaseExpression { bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; - bool CanThrow() const override; bool Equals(const BaseExpression &other) const override; hash_t Hash() const override; diff --git a/src/include/duckdb/planner/expression.hpp b/src/include/duckdb/planner/expression.hpp index c5b2d6115b5..97094bbde70 100644 --- a/src/include/duckdb/planner/expression.hpp +++ b/src/include/duckdb/planner/expression.hpp @@ -32,11 +32,12 @@ class Expression : public BaseExpression { bool HasSubquery() const override; bool IsScalar() const override; bool HasParameter() const override; - bool CanThrow() const override; + virtual bool IsVolatile() const; virtual bool IsConsistent() const; virtual bool PropagatesNullValues() const; virtual bool IsFoldable() const; + virtual bool CanThrow() const; hash_t Hash() const override; diff --git a/src/parser/expression/cast_expression.cpp b/src/parser/expression/cast_expression.cpp index 602f43355ce..758cc29ce02 100644 --- a/src/parser/expression/cast_expression.cpp +++ b/src/parser/expression/cast_expression.cpp @@ -21,11 +21,6 @@ string CastExpression::ToString() const { return ToString(*this); } -bool CastExpression::CanThrow() const { - // Cast('a' to INT) can throw - return true; -} - bool CastExpression::Equal(const CastExpression &a, const CastExpression &b) { if (!a.child->Equals(*b.child)) { return false; diff --git a/src/parser/parsed_expression.cpp b/src/parser/parsed_expression.cpp index 03184c6dd3d..617d60a3d89 100644 --- a/src/parser/parsed_expression.cpp +++ b/src/parser/parsed_expression.cpp @@ -48,13 +48,6 @@ bool ParsedExpression::HasSubquery() const { return has_subquery; } -bool ParsedExpression::CanThrow() const { - bool can_throw = false; - ParsedExpressionIterator::EnumerateChildren(*this, - [&](const ParsedExpression &child) { can_throw |= child.CanThrow(); }); - return can_throw; -} - bool ParsedExpression::Equals(const BaseExpression &other) const { if (!BaseExpression::Equals(other)) { return false; From 2e84d42fce3caa9474cd638ed776b3f0abdb1c86 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 15 Oct 2024 08:48:33 +0200 Subject: [PATCH 11/11] include expression iterator --- src/planner/expression/bound_cast_expression.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/planner/expression/bound_cast_expression.cpp b/src/planner/expression/bound_cast_expression.cpp index 392d38c0c87..2cd3869d57b 100644 --- a/src/planner/expression/bound_cast_expression.cpp +++ b/src/planner/expression/bound_cast_expression.cpp @@ -2,6 +2,7 @@ #include "duckdb/planner/expression/bound_default_expression.hpp" #include "duckdb/planner/expression/bound_parameter_expression.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/planner/expression_iterator.hpp" #include "duckdb/function/cast_rules.hpp" #include "duckdb/function/cast/cast_function_set.hpp" #include "duckdb/main/config.hpp"