forked from duckdb/duckdb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
clean up / unify code and add a test
- Loading branch information
Showing
6 changed files
with
230 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
206 changes: 206 additions & 0 deletions
206
test/sql/parallelism/intraquery/depth_first_evaluation.test_slow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
# name: test/sql/parallelism/intraquery/depth_first_evaluation.test_slow | ||
# description: Test that query plans are evaluated in a depth-first fashion | ||
# group: [intraquery] | ||
|
||
# we need a persistent DB because we want to compress the table that we're working with | ||
load __TEST_DIR__/depth_first_evaluation.db | ||
|
||
# we don't want any disk spilling because we're testing memory pressure | ||
statement ok | ||
SET temp_directory = '' | ||
|
||
# 1GiB is pretty tight | ||
statement ok | ||
SET memory_limit = '1GiB' | ||
|
||
statement ok | ||
SET threads = 4 | ||
|
||
# 10M integers but the table is tiny because of delta compression | ||
statement ok | ||
CREATE TABLE integers AS SELECT range i FROM range(10_000_000) | ||
|
||
# one of these should easily fit in memory | ||
query I | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM integers) | ||
---- | ||
10000000 | ||
|
||
# the next query performs 10 of the same distinct aggregations and unions them together | ||
# each distinct aggregation has a different limit (which doesn't do anything) | ||
# so that this test is future-proof (in case DuckDB does any common sub-plan elimination in the future) | ||
|
||
# the idea here is that if DuckDB would do breadth-first plan evaluation (like it did before) | ||
# DuckDB would first perform the 'Sink' for every distinct aggregation one by one | ||
# this would create a HUGE temporary intermediates | ||
# only after that DuckDB would perform the 'Finalize' for every distinct aggregation one by one | ||
# the 'Finalize' reduces the data size to a single row | ||
# so, this used to throw an OOM exception given the current memory limit | ||
|
||
# with depth-first plan evaluation, DuckDB performs 'Finalize' for every distinct aggregation, | ||
# before starting 'Sink' on the next distinct aggregation | ||
# now this query completes without much memory pressure! | ||
query I | ||
SELECT sum(c) | ||
FROM ( | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_000)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_001)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_002)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_003)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_004)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_005)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_006)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_007)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_008)) | ||
UNION ALL | ||
SELECT count(*) c FROM (SELECT DISTINCT i FROM (SELECT i FROM integers LIMIT 100_000_009)) | ||
) | ||
---- | ||
100000000 | ||
|
||
statement ok | ||
DROP TABLE integers | ||
|
||
# column i has 0, 100, 200, etc., around 100 unique values spread out over the range 0 to 10 million | ||
# all other values in column j are equal to range + 0.5 | ||
# column j and k are just ranges from 0 to 10 million | ||
# we have to do this so our statistics propagation and dynamic join filters don't trivialise the query | ||
statement ok | ||
CREATE TABLE doubles AS | ||
SELECT | ||
CASE WHEN range % 100_000 = 0 THEN range ELSE range + 0.5 END i, | ||
range::DOUBLE j, | ||
range::DOUBLE k | ||
FROM range(10_000_000) | ||
|
||
# one of these should always fit in memory | ||
# the idea is that the cte is a large join (10m x 10m) | ||
# but it's really selective, only 100 tuples come out of it | ||
|
||
# then, we join with doubles union'ed with itself, so that it becomes the probe pipeline, | ||
# i.e., it has a higher cardinality than the selective join, which goes into a build | ||
query I | ||
WITH c AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
) | ||
SELECT count(*) | ||
FROM ( | ||
SELECT k FROM doubles | ||
UNION ALL | ||
SELECT k FROM doubles | ||
) d | ||
JOIN c | ||
ON (d.k = c.k) | ||
---- | ||
200 | ||
|
||
# now we just crank up the number of ctes that we're joining with to 10 | ||
|
||
# again, if DuckDB would do breadth-first plan evaluation (like it did before) | ||
# DuckDB would 'Sink' into all of of the builds in the cte's one by one, creating huge intermediates | ||
# only after that it would perform all the selective joins and reduce the size of the intermediates | ||
# so, this used to throw an OOM exception | ||
|
||
# with depth-first plan evaluation, DuckDB performs the selective joins one by one, | ||
# reducing the size of intermediates immediately, and the query completes! | ||
query I | ||
WITH c0 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_000 | ||
), c1 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_001 | ||
), c2 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_002 | ||
), c3 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_003 | ||
), c4 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_004 | ||
), c5 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_005 | ||
), c6 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_006 | ||
), c7 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_007 | ||
), c8 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_008 | ||
), c9 AS NOT MATERIALIZED ( | ||
SELECT d0.k | ||
FROM doubles d0 | ||
JOIN doubles d1 | ||
ON (d0.i = d1.j) | ||
LIMIT 100_000_009 | ||
) | ||
SELECT count(*) | ||
FROM ( | ||
SELECT k FROM doubles | ||
UNION ALL | ||
SELECT k FROM doubles | ||
) d | ||
JOIN c0 | ||
ON (d.k = c0.k) | ||
JOIN c1 | ||
ON (d.k = c1.k) | ||
JOIN c2 | ||
ON (d.k = c2.k) | ||
JOIN c3 | ||
ON (d.k = c3.k) | ||
JOIN c4 | ||
ON (d.k = c4.k) | ||
JOIN c5 | ||
ON (d.k = c5.k) | ||
JOIN c6 | ||
ON (d.k = c6.k) | ||
JOIN c7 | ||
ON (d.k = c7.k) | ||
JOIN c8 | ||
ON (d.k = c8.k) | ||
JOIN c9 | ||
ON (d.k = c9.k) | ||
---- | ||
200 |