diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 72c3825383..3569841cc6 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -93,7 +93,7 @@ jobs: run: poetry install --no-interaction --with sentry-sdk - run: | - poetry run pytest tests/common tests/normalize tests/reflection tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py + poetry run pytest tests/common tests/normalize tests/reflection tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py if: runner.os != 'Windows' name: Run common tests with minimum dependencies Linux/MAC - run: | @@ -103,14 +103,14 @@ jobs: shell: cmd - name: Install duckdb dependencies - run: poetry install --no-interaction -E duckdb --with sentry-sdk + run: poetry install --no-interaction -E duckdb -E filesystem --with sentry-sdk - run: | - poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py + poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py tests/pipeline/test_filesystem_sql_secrets.py if: runner.os != 'Windows' name: Run pipeline smoke tests with minimum deps Linux/MAC - run: | - poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py -m "not forked" + poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py tests/pipeline/test_filesystem_sql_secrets.py -m "not forked" if: runner.os == 'Windows' name: Run smoke tests with minimum deps Windows shell: cmd diff --git a/dlt/destinations/impl/filesystem/sql_client.py b/dlt/destinations/impl/filesystem/sql_client.py index 8b4c576f14..fec761ff36 100644 --- a/dlt/destinations/impl/filesystem/sql_client.py +++ b/dlt/destinations/impl/filesystem/sql_client.py @@ -24,6 +24,8 @@ ) from dlt.destinations.utils import is_compression_disabled +from pathlib import Path + SUPPORTED_PROTOCOLS = ["gs", "gcs", "s3", "file", "memory", "az", "abfss"] if TYPE_CHECKING: @@ -80,17 +82,22 @@ def create_authentication(self, persistent: bool = False, secret_name: str = Non if persistent and self.memory_db: raise Exception("Creating persistent secrets for in memory db is not allowed.") - current_secret_directory = self._conn.sql( - "SELECT current_setting('secret_directory') AS secret_directory;" - ).fetchone()[0] + secrets_path = Path( + self._conn.sql( + "SELECT current_setting('secret_directory') AS secret_directory;" + ).fetchone()[0] + ) - # TODO: what is with windows? - is_default_secrets_directory = current_secret_directory.endswith("/.duckdb/stored_secrets") + is_default_secrets_directory = ( + len(secrets_path.parts) >= 2 + and secrets_path.parts[-1] == "stored_secrets" + and secrets_path.parts[-2] == ".duckdb" + ) if is_default_secrets_directory and persistent: logger.warn( "You are persisting duckdb secrets but are storing them in the default folder" - f" {current_secret_directory}. These secrets are saved there unencrypted, we" + f" {secrets_path}. These secrets are saved there unencrypted, we" " recommend using a custom secret directory." ) diff --git a/pytest.ini b/pytest.ini index 4f033f672c..a18e534285 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,7 +1,7 @@ [pytest] pythonpath= dlt docs/website/docs norecursedirs= .direnv .eggs build dist -addopts= -v --showlocals --durations 10 +addopts= -v --showlocals --durations 10 --capture=tee-sys xfail_strict= true log_cli= 1 log_cli_level= INFO diff --git a/tests/load/filesystem/test_sql_client.py b/tests/load/filesystem/test_sql_client.py index f40a1b6312..ac2ada2551 100644 --- a/tests/load/filesystem/test_sql_client.py +++ b/tests/load/filesystem/test_sql_client.py @@ -7,6 +7,8 @@ import dlt import os import shutil +import logging + from dlt import Pipeline from dlt.common.utils import uniq_id @@ -318,87 +320,6 @@ def test_delta_tables( ) -@pytest.mark.essential -@pytest.mark.parametrize( - "destination_config", - destinations_configs(all_buckets_filesystem_configs=True, bucket_subset=(AWS_BUCKET,)), - ids=lambda x: x.name, -) -def test_secrets_management(destination_config: DestinationTestConfiguration) -> None: - """Test the handling of secrets by the sql_client, we only need to do this on s3 - as the other destinations work accordingly""" - - pipeline = destination_config.setup_pipeline( - "read_pipeline", - dataset_name="read_test", - ) - pipeline.run([1, 2, 3], table_name="items") - - import duckdb - from duckdb import HTTPException - from dlt.destinations.impl.filesystem.sql_client import ( - FilesystemSqlClient, - DuckDbCredentials, - ) - - duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id() - secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}" - - def _external_duckdb_connection() -> duckdb.DuckDBPyConnection: - external_db = duckdb.connect(duck_db_location) - external_db.sql(f"SET secret_directory = '{secrets_dir}';") - external_db.execute("CREATE SCHEMA IF NOT EXISTS first;") - return external_db - - def _fs_sql_client_for_external_db( - connection: duckdb.DuckDBPyConnection, - ) -> FilesystemSqlClient: - return FilesystemSqlClient( - dataset_name="second", - fs_client=pipeline.destination_client(), # type: ignore - credentials=DuckDbCredentials(connection), - ) - - def _secrets_exist() -> bool: - return os.path.isdir(secrets_dir) and len(os.listdir(secrets_dir)) > 0 - - # first test what happens if there are no external secrets - external_db = _external_duckdb_connection() - fs_sql_client = _fs_sql_client_for_external_db(external_db) - with fs_sql_client as sql_client: - sql_client.create_views_for_tables({"items": "items"}) - external_db.close() - - # no secrets will not work - external_db = _external_duckdb_connection() - with pytest.raises(HTTPException): - assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3 - external_db.close() - - # add secrets and check that they are there - external_db = _external_duckdb_connection() - fs_sql_client = _fs_sql_client_for_external_db(external_db) - with fs_sql_client as sql_client: - fs_sql_client.create_authentication(persistent=True) - assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3 - assert _secrets_exist() - - # remove secrets and check that they are removed - with fs_sql_client as sql_client: - fs_sql_client.drop_authentication() - assert not _secrets_exist() - external_db.close() - - # prevent creating persistent secrets on in mem databases - fs_sql_client = FilesystemSqlClient( - dataset_name="second", - fs_client=pipeline.destination_client(), # type: ignore - ) - with pytest.raises(Exception): - with fs_sql_client as sql_client: - fs_sql_client.create_authentication(persistent=True) - - @pytest.mark.essential @pytest.mark.parametrize( "destination_config", diff --git a/tests/pipeline/test_filesystem_sql_secrets.py b/tests/pipeline/test_filesystem_sql_secrets.py new file mode 100644 index 0000000000..0484ea732a --- /dev/null +++ b/tests/pipeline/test_filesystem_sql_secrets.py @@ -0,0 +1,106 @@ +import pytest +import os + +from tests.utils import TEST_STORAGE_ROOT +from tests.load.utils import ( + destinations_configs, + DestinationTestConfiguration, + AWS_BUCKET, +) +from dlt.common.utils import uniq_id + + +@pytest.mark.essential +@pytest.mark.parametrize( + "destination_config", + destinations_configs(all_buckets_filesystem_configs=True, bucket_subset=(AWS_BUCKET,)), + ids=lambda x: x.name, +) +def test_secrets_management( + destination_config: DestinationTestConfiguration, capfd: pytest.CaptureFixture +) -> None: + """Test the handling of secrets by the sql_client, we only need to do this on s3 + as the other destinations work accordingly""" + warning_mesage = "You are persisting duckdb secrets but are storing them in the default folder" + + pipeline = destination_config.setup_pipeline( + "read_pipeline", + dataset_name="read_test", + ) + pipeline.run([1, 2, 3], table_name="items") + + import duckdb + from duckdb import HTTPException + from dlt.destinations.impl.filesystem.sql_client import ( + FilesystemSqlClient, + DuckDbCredentials, + ) + + duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id() + secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}" + + def _external_duckdb_connection() -> duckdb.DuckDBPyConnection: + external_db = duckdb.connect(duck_db_location) + external_db.sql(f"SET secret_directory = '{secrets_dir}';") + external_db.execute("CREATE SCHEMA IF NOT EXISTS first;") + return external_db + + def _fs_sql_client_for_external_db( + connection: duckdb.DuckDBPyConnection, + ) -> FilesystemSqlClient: + return FilesystemSqlClient( + dataset_name="second", + fs_client=pipeline.destination_client(), # type: ignore + credentials=DuckDbCredentials(connection), + ) + + def _secrets_exist() -> bool: + return os.path.isdir(secrets_dir) and len(os.listdir(secrets_dir)) > 0 + + # first test what happens if there are no external secrets + external_db = _external_duckdb_connection() + fs_sql_client = _fs_sql_client_for_external_db(external_db) + with fs_sql_client as sql_client: + sql_client.create_views_for_tables({"items": "items"}) + external_db.close() + + # no secrets will not work + external_db = _external_duckdb_connection() + with pytest.raises(HTTPException): + assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3 + external_db.close() + + # add secrets and check that they are there + external_db = _external_duckdb_connection() + fs_sql_client = _fs_sql_client_for_external_db(external_db) + with fs_sql_client as sql_client: + fs_sql_client.create_authentication(persistent=True) + assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3 + assert _secrets_exist() + + # remove secrets and check that they are removed + with fs_sql_client as sql_client: + fs_sql_client.drop_authentication() + assert not _secrets_exist() + external_db.close() + + # prevent creating persistent secrets on in mem databases + fs_sql_client = FilesystemSqlClient( + dataset_name="second", + fs_client=pipeline.destination_client(), # type: ignore + ) + with pytest.raises(Exception): + with fs_sql_client as sql_client: + fs_sql_client.create_authentication(persistent=True) + + # check that no warning was logged + assert warning_mesage not in capfd.readouterr().err + + # check that warning is logged when secrets are persisted in the default folder + duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id() + secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}" + duck_db = duckdb.connect(duck_db_location) + fs_sql_client = _fs_sql_client_for_external_db(duck_db) + with fs_sql_client as sql_client: + sql_client.create_authentication(persistent=True) + assert warning_mesage in capfd.readouterr().err