From 03f68e3307058f6369c284ff2bfe368c0de946a3 Mon Sep 17 00:00:00 2001 From: Yasuhisa Yoshida Date: Mon, 4 Nov 2024 08:41:34 +0900 Subject: [PATCH] Add tests for data profile scan --- tests/functional/test_data_profile_scan.py | 257 +++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 tests/functional/test_data_profile_scan.py diff --git a/tests/functional/test_data_profile_scan.py b/tests/functional/test_data_profile_scan.py new file mode 100644 index 000000000..a209bfe01 --- /dev/null +++ b/tests/functional/test_data_profile_scan.py @@ -0,0 +1,257 @@ +import pytest +from unittest.mock import patch +from dbt.adapters.bigquery.relation import BigQueryRelation +from dbt.tests.util import run_dbt, get_connection, relation_from_name + +SCAN_LOCATION = "us-central1" +SCAN_ID = "bigquery_data_profile_scan_test" +MODEL_NAME = "test_model" + +ORIGINAL_LABELS = { + "my_label_key": "my_label_value", +} + +PROFILE_SCAN_LABELS = [ + "dataplex-dp-published-scan", + "dataplex-dp-published-project", + "dataplex-dp-published-location", +] + +SQL_CONTENT = """ +{{ + config( + materialized="table" + ) +}} + select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all + select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour +""" + +YAML_CONTENT = f"""version: 2 +models: + - name: {MODEL_NAME} +""" + +YAML_CONTENT_WITH_PROFILE_SCAN_SETTING = f"""version: 2 +models: + - name: {MODEL_NAME} + config: + data_profile_scan: + location: us-central1 + scan_id: yasuhisa-test4 + sampling_percent: 10 + row_filter: "TRUE" + cron: "CRON_TZ=Asia/New_York 0 9 * * *" +""" + + +class TestDataProfileScanWithProjectProfileScanSetting: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "+labels": ORIGINAL_LABELS, + "+data_profile_scan": { + "location": SCAN_LOCATION, + "scan_id": SCAN_ID, + "sampling_percent": 10, + "row_filter": "TRUE", + }, + }, + } + + @pytest.fixture(scope="class") + def models(self): + return { + f"{MODEL_NAME}.sql": SQL_CONTENT, + f"{MODEL_NAME}.yml": YAML_CONTENT, + } + + def test_create_data_profile_scan(self, project): + with patch( + "dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient" + ) as MockDataScanClient: + mock_data_scan_client = MockDataScanClient.return_value + + results = run_dbt() + assert len(results) == 1 + + mock_data_scan_client.create_data_scan.assert_called_once() + mock_data_scan_client.run_data_scan.assert_called_once() + + relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME) + adapter = project.adapter + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) + labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys()) + assert set(table.labels.keys()) == set(labels_to_be_created) + + +class TestDataProfileScanWithProjectProfileScanSettingAndCron: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "+labels": ORIGINAL_LABELS, + "+data_profile_scan": { + "location": SCAN_LOCATION, + "scan_id": SCAN_ID, + "sampling_percent": 10, + "row_filter": "TRUE", + "cron": "CRON_TZ=Asia/New_York 0 9 * * *", + }, + }, + } + + @pytest.fixture(scope="class") + def models(self): + return { + f"{MODEL_NAME}.sql": SQL_CONTENT, + f"{MODEL_NAME}.yml": YAML_CONTENT, + } + + def test_create_data_profile_scan(self, project): + with patch( + "dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient" + ) as MockDataScanClient: + mock_data_scan_client = MockDataScanClient.return_value + + results = run_dbt() + assert len(results) == 1 + + mock_data_scan_client.create_data_scan.assert_called_once() + mock_data_scan_client.run_data_scan.assert_not_called() + + relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME) + adapter = project.adapter + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) + labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys()) + assert set(table.labels.keys()) == set(labels_to_be_created) + + +class TestDataProfileScanWithModelProfileScanSetting: + @pytest.fixture(scope="class") + def models(self): + sql_content = f""" + {{{{ + config( + materialized="table", + labels={ORIGINAL_LABELS}, + ) + }}}} + select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all + select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour + """ + + return { + f"{MODEL_NAME}.sql": sql_content, + f"{MODEL_NAME}.yml": YAML_CONTENT_WITH_PROFILE_SCAN_SETTING, + } + + def test_create_data_profile_scan(self, project): + with patch( + "dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient" + ) as MockDataScanClient: + mock_data_scan_client = MockDataScanClient.return_value + + results = run_dbt() + assert len(results) == 1 + + mock_data_scan_client.create_data_scan.assert_called_once() + mock_data_scan_client.run_data_scan.assert_not_called() + + relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME) + adapter = project.adapter + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) + labels_to_be_created = PROFILE_SCAN_LABELS + list(ORIGINAL_LABELS.keys()) + assert set(table.labels.keys()) == set(labels_to_be_created) + + +class TestDataProfileScanWithoutProfileScanSetting: + @pytest.fixture(scope="class") + def models(self): + return { + f"{MODEL_NAME}.sql": SQL_CONTENT, + f"{MODEL_NAME}.yml": YAML_CONTENT, + } + + def test_create_data_profile_scan(self, project): + with patch( + "dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient" + ) as MockDataScanClient: + mock_data_scan_client = MockDataScanClient.return_value + + results = run_dbt() + assert len(results) == 1 + + mock_data_scan_client.create_data_scan.assert_not_called() + mock_data_scan_client.run_data_scan.assert_not_called() + + relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME) + adapter = project.adapter + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) + labels_to_be_created = [] + assert set(table.labels.keys()) == set(labels_to_be_created) + + +class TestDataProfileScanDisabledProfileScanSetting: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "+data_profile_scan": { + "location": SCAN_LOCATION, + "scan_id": SCAN_ID, + "enabled": False, + }, + }, + } + + @pytest.fixture(scope="class") + def models(self): + return { + f"{MODEL_NAME}.sql": SQL_CONTENT, + f"{MODEL_NAME}.yml": YAML_CONTENT, + } + + def test_create_data_profile_scan(self, project): + with patch( + "dbt.adapters.bigquery.impl.dataplex_v1.DataScanServiceClient" + ) as MockDataScanClient: + mock_data_scan_client = MockDataScanClient.return_value + + results = run_dbt() + assert len(results) == 1 + + mock_data_scan_client.create_data_scan.assert_not_called() + mock_data_scan_client.run_data_scan.assert_not_called() + + relation: BigQueryRelation = relation_from_name(project.adapter, MODEL_NAME) + adapter = project.adapter + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) + labels_to_be_created = [] + assert set(table.labels.keys()) == set(labels_to_be_created)