From 12cd3c4ac165518d60b660c1e8928af7461de219 Mon Sep 17 00:00:00 2001 From: Przemek Denkiewicz Date: Fri, 26 Jul 2024 16:54:04 +0200 Subject: [PATCH 1/2] Change table materialization logic when on_table_exists = 'rename' --- .../unreleased/Fixes-20240729-101114.yaml | 7 ++ .../trino/macros/materializations/table.sql | 29 +++-- .../materialization/test_on_table_exists.py | 110 ++++++++++++++++++ 3 files changed, 135 insertions(+), 11 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240729-101114.yaml diff --git a/.changes/unreleased/Fixes-20240729-101114.yaml b/.changes/unreleased/Fixes-20240729-101114.yaml new file mode 100644 index 00000000..56df1849 --- /dev/null +++ b/.changes/unreleased/Fixes-20240729-101114.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Change table materialization logic when on_table_exists = 'rename' +time: 2024-07-29T10:11:14.451171+02:00 +custom: + Author: hovaesco + Issue: "423" + PR: "425" diff --git a/dbt/include/trino/macros/materializations/table.sql b/dbt/include/trino/macros/materializations/table.sql index 33e615d8..09ed5476 100644 --- a/dbt/include/trino/macros/materializations/table.sql +++ b/dbt/include/trino/macros/materializations/table.sql @@ -48,20 +48,27 @@ {% macro on_table_exists_logic(on_table_exists, existing_relation, intermediate_relation, backup_relation, target_relation) -%} {#-- Create table with given `on_table_exists` mode #} {% if on_table_exists == 'rename' %} - {#-- build modeldock #} - {% call statement('main') -%} - {{ create_table_as(False, intermediate_relation, sql) }} - {%- endcall %} - {#-- cleanup #} - {% if existing_relation is not none %} - {{ adapter.rename_relation(existing_relation, backup_relation) }} - {% endif %} + {#-- table does not exists #} + {% if existing_relation is none %} + {% call statement('main') -%} + {{ create_table_as(False, target_relation, sql) }} + {%- endcall %} + + {#-- table does exists #} + {% else %} + {#-- build modeldock #} + {% call statement('main') -%} + {{ create_table_as(False, intermediate_relation, sql) }} + {%- endcall %} - {{ adapter.rename_relation(intermediate_relation, target_relation) }} + {#-- cleanup #} + {{ adapter.rename_relation(existing_relation, backup_relation) }} + {{ adapter.rename_relation(intermediate_relation, target_relation) }} - {#-- finally, drop the existing/backup relation after the commit #} - {{ drop_relation_if_exists(backup_relation) }} + {#-- finally, drop the existing/backup relation after the commit #} + {{ drop_relation_if_exists(backup_relation) }} + {% endif %} {% elif on_table_exists == 'drop' %} {#-- cleanup #} diff --git a/tests/functional/adapter/materialization/test_on_table_exists.py b/tests/functional/adapter/materialization/test_on_table_exists.py index 24daf64b..01842bea 100644 --- a/tests/functional/adapter/materialization/test_on_table_exists.py +++ b/tests/functional/adapter/materialization/test_on_table_exists.py @@ -25,6 +25,116 @@ def models(self): } +class TestOnTableExistsRename(BaseOnTableExists): + """ + Testing on_table_exists = `rename` configuration for table materialization, + using dbt seed, run and tests commands and validate data load correctness. + """ + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "table_rename", + "models": {"+materialized": "table", "+on_table_exists": "rename"}, + "seeds": { + "+column_types": {"some_date": "timestamp(6)"}, + }, + } + + # The actual sequence of dbt commands and assertions + # pytest will take care of all "setup" + "teardown" + def test_run_seed_test(self, project): + # seed seeds + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + # run models two times to check on_table_exists = 'rename' + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert len(results) == 1 + assert ( + f'create table "{project.database}"."{project.test_schema}"."materialization"' in logs + ) + assert "alter table" not in logs + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert len(results) == 1 + assert ( + f'create table "{project.database}"."{project.test_schema}"."materialization__dbt_tmp"' + in logs + ) + assert ( + f'alter table "{project.database}"."{project.test_schema}"."materialization" rename to "{project.database}"."{project.test_schema}"."materialization__dbt_backup"' + in logs + ) + assert ( + f'alter table "{project.database}"."{project.test_schema}"."materialization__dbt_tmp" rename to "{project.database}"."{project.test_schema}"."materialization"' + in logs + ) + assert ( + f'drop table if exists "{project.database}"."{project.test_schema}"."materialization__dbt_backup"' + in logs + ) + # test tests + results = run_dbt(["test"], expect_pass=True) + assert len(results) == 3 + + # check if the data was loaded correctly + check_relations_equal(project.adapter, ["seed", "materialization"]) + + +class TestOnTableExistsRenameIncrementalFullRefresh(BaseOnTableExists): + """ + Testing on_table_exists = `rename` configuration for incremental materialization and full refresh flag, + using dbt seed, run and tests commands and validate data load correctness. + """ + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "table_rename", + "models": {"+materialized": "incremental", "+on_table_exists": "rename"}, + "seeds": { + "+column_types": {"some_date": "timestamp(6)"}, + }, + } + + # The actual sequence of dbt commands and assertions + # pytest will take care of all "setup" + "teardown" + def test_run_seed_test(self, project): + # seed seeds + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + # run models two times to check on_table_exists = 'rename' + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert ( + f'create table "{project.database}"."{project.test_schema}"."materialization"' in logs + ) + assert "alter table" not in logs + results, logs = run_dbt_and_capture(["--debug", "run", "--full-refresh"], expect_pass=True) + assert len(results) == 1 + assert ( + f'create table "{project.database}"."{project.test_schema}"."materialization__dbt_tmp"' + in logs + ) + assert ( + f'alter table "{project.database}"."{project.test_schema}"."materialization" rename to "{project.database}"."{project.test_schema}"."materialization__dbt_backup"' + in logs + ) + assert ( + f'alter table "{project.database}"."{project.test_schema}"."materialization__dbt_tmp" rename to "{project.database}"."{project.test_schema}"."materialization"' + in logs + ) + assert ( + f'drop table if exists "{project.database}"."{project.test_schema}"."materialization__dbt_backup"' + in logs + ) + assert "create or replace view" not in logs + # test tests + results = run_dbt(["test"], expect_pass=True) + assert len(results) == 3 + + # check if the data was loaded correctly + check_relations_equal(project.adapter, ["seed", "materialization"]) + + class TestOnTableExistsDrop(BaseOnTableExists): """ Testing on_table_exists = `drop` configuration for table materialization, From a93855e81506b2b4ec30999bd0781b0ab8cc49e2 Mon Sep 17 00:00:00 2001 From: Przemek Denkiewicz Date: Mon, 29 Jul 2024 10:08:45 +0200 Subject: [PATCH 2/2] Add tpch catalog --- docker/starburst/catalog/tpch.properties | 1 + docker/trino/catalog/tpch.properties | 1 + 2 files changed, 2 insertions(+) create mode 100644 docker/starburst/catalog/tpch.properties create mode 100644 docker/trino/catalog/tpch.properties diff --git a/docker/starburst/catalog/tpch.properties b/docker/starburst/catalog/tpch.properties new file mode 100644 index 00000000..75110c5a --- /dev/null +++ b/docker/starburst/catalog/tpch.properties @@ -0,0 +1 @@ +connector.name=tpch diff --git a/docker/trino/catalog/tpch.properties b/docker/trino/catalog/tpch.properties new file mode 100644 index 00000000..75110c5a --- /dev/null +++ b/docker/trino/catalog/tpch.properties @@ -0,0 +1 @@ +connector.name=tpch