Skip to content

Commit

Permalink
fix warning message
Browse files Browse the repository at this point in the history
test secrets on all platforms
  • Loading branch information
sh-rp committed Nov 4, 2024
1 parent 1a2df53 commit cd30eb8
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 92 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:
run: poetry install --no-interaction --with sentry-sdk

- run: |
poetry run pytest tests/common tests/normalize tests/reflection tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py
poetry run pytest tests/common tests/normalize tests/reflection tests/load/test_dummy_client.py tests/extract/test_extract.py tests/extract/test_sources.py tests/pipeline/test_pipeline_state.py
if: runner.os != 'Windows'
name: Run common tests with minimum dependencies Linux/MAC
- run: |
Expand All @@ -103,14 +103,14 @@ jobs:
shell: cmd
- name: Install duckdb dependencies
run: poetry install --no-interaction -E duckdb --with sentry-sdk
run: poetry install --no-interaction -E duckdb -E filesystem --with sentry-sdk

- run: |
poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py
poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py tests/pipeline/test_filesystem_sql_secrets.py
if: runner.os != 'Windows'
name: Run pipeline smoke tests with minimum deps Linux/MAC
- run: |
poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py -m "not forked"
poetry run pytest tests/pipeline/test_pipeline.py tests/pipeline/test_import_export_schema.py tests/pipeline/test_filesystem_sql_secrets.py -m "not forked"
if: runner.os == 'Windows'
name: Run smoke tests with minimum deps Windows
shell: cmd
Expand Down
19 changes: 13 additions & 6 deletions dlt/destinations/impl/filesystem/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
)
from dlt.destinations.utils import is_compression_disabled

from pathlib import Path

SUPPORTED_PROTOCOLS = ["gs", "gcs", "s3", "file", "memory", "az", "abfss"]

if TYPE_CHECKING:
Expand Down Expand Up @@ -80,17 +82,22 @@ def create_authentication(self, persistent: bool = False, secret_name: str = Non
if persistent and self.memory_db:
raise Exception("Creating persistent secrets for in memory db is not allowed.")

current_secret_directory = self._conn.sql(
"SELECT current_setting('secret_directory') AS secret_directory;"
).fetchone()[0]
secrets_path = Path(
self._conn.sql(
"SELECT current_setting('secret_directory') AS secret_directory;"
).fetchone()[0]
)

# TODO: what is with windows?
is_default_secrets_directory = current_secret_directory.endswith("/.duckdb/stored_secrets")
is_default_secrets_directory = (
len(secrets_path.parts) >= 2
and secrets_path.parts[-1] == "stored_secrets"
and secrets_path.parts[-2] == ".duckdb"
)

if is_default_secrets_directory and persistent:
logger.warn(
"You are persisting duckdb secrets but are storing them in the default folder"
f" {current_secret_directory}. These secrets are saved there unencrypted, we"
f" {secrets_path}. These secrets are saved there unencrypted, we"
" recommend using a custom secret directory."
)

Expand Down
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[pytest]
pythonpath= dlt docs/website/docs
norecursedirs= .direnv .eggs build dist
addopts= -v --showlocals --durations 10
addopts= -v --showlocals --durations 10 --capture=tee-sys
xfail_strict= true
log_cli= 1
log_cli_level= INFO
Expand Down
83 changes: 2 additions & 81 deletions tests/load/filesystem/test_sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import dlt
import os
import shutil
import logging


from dlt import Pipeline
from dlt.common.utils import uniq_id
Expand Down Expand Up @@ -318,87 +320,6 @@ def test_delta_tables(
)


@pytest.mark.essential
@pytest.mark.parametrize(
"destination_config",
destinations_configs(all_buckets_filesystem_configs=True, bucket_subset=(AWS_BUCKET,)),
ids=lambda x: x.name,
)
def test_secrets_management(destination_config: DestinationTestConfiguration) -> None:
"""Test the handling of secrets by the sql_client, we only need to do this on s3
as the other destinations work accordingly"""

pipeline = destination_config.setup_pipeline(
"read_pipeline",
dataset_name="read_test",
)
pipeline.run([1, 2, 3], table_name="items")

import duckdb
from duckdb import HTTPException
from dlt.destinations.impl.filesystem.sql_client import (
FilesystemSqlClient,
DuckDbCredentials,
)

duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id()
secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}"

def _external_duckdb_connection() -> duckdb.DuckDBPyConnection:
external_db = duckdb.connect(duck_db_location)
external_db.sql(f"SET secret_directory = '{secrets_dir}';")
external_db.execute("CREATE SCHEMA IF NOT EXISTS first;")
return external_db

def _fs_sql_client_for_external_db(
connection: duckdb.DuckDBPyConnection,
) -> FilesystemSqlClient:
return FilesystemSqlClient(
dataset_name="second",
fs_client=pipeline.destination_client(), # type: ignore
credentials=DuckDbCredentials(connection),
)

def _secrets_exist() -> bool:
return os.path.isdir(secrets_dir) and len(os.listdir(secrets_dir)) > 0

# first test what happens if there are no external secrets
external_db = _external_duckdb_connection()
fs_sql_client = _fs_sql_client_for_external_db(external_db)
with fs_sql_client as sql_client:
sql_client.create_views_for_tables({"items": "items"})
external_db.close()

# no secrets will not work
external_db = _external_duckdb_connection()
with pytest.raises(HTTPException):
assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3
external_db.close()

# add secrets and check that they are there
external_db = _external_duckdb_connection()
fs_sql_client = _fs_sql_client_for_external_db(external_db)
with fs_sql_client as sql_client:
fs_sql_client.create_authentication(persistent=True)
assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3
assert _secrets_exist()

# remove secrets and check that they are removed
with fs_sql_client as sql_client:
fs_sql_client.drop_authentication()
assert not _secrets_exist()
external_db.close()

# prevent creating persistent secrets on in mem databases
fs_sql_client = FilesystemSqlClient(
dataset_name="second",
fs_client=pipeline.destination_client(), # type: ignore
)
with pytest.raises(Exception):
with fs_sql_client as sql_client:
fs_sql_client.create_authentication(persistent=True)


@pytest.mark.essential
@pytest.mark.parametrize(
"destination_config",
Expand Down
106 changes: 106 additions & 0 deletions tests/pipeline/test_filesystem_sql_secrets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import pytest
import os

from tests.utils import TEST_STORAGE_ROOT
from tests.load.utils import (
destinations_configs,
DestinationTestConfiguration,
AWS_BUCKET,
)
from dlt.common.utils import uniq_id


@pytest.mark.essential
@pytest.mark.parametrize(
"destination_config",
destinations_configs(all_buckets_filesystem_configs=True, bucket_subset=(AWS_BUCKET,)),
ids=lambda x: x.name,
)
def test_secrets_management(
destination_config: DestinationTestConfiguration, capfd: pytest.CaptureFixture
) -> None:
"""Test the handling of secrets by the sql_client, we only need to do this on s3
as the other destinations work accordingly"""
warning_mesage = "You are persisting duckdb secrets but are storing them in the default folder"

pipeline = destination_config.setup_pipeline(
"read_pipeline",
dataset_name="read_test",
)
pipeline.run([1, 2, 3], table_name="items")

import duckdb
from duckdb import HTTPException
from dlt.destinations.impl.filesystem.sql_client import (
FilesystemSqlClient,
DuckDbCredentials,
)

duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id()
secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}"

def _external_duckdb_connection() -> duckdb.DuckDBPyConnection:
external_db = duckdb.connect(duck_db_location)
external_db.sql(f"SET secret_directory = '{secrets_dir}';")
external_db.execute("CREATE SCHEMA IF NOT EXISTS first;")
return external_db

def _fs_sql_client_for_external_db(
connection: duckdb.DuckDBPyConnection,
) -> FilesystemSqlClient:
return FilesystemSqlClient(
dataset_name="second",
fs_client=pipeline.destination_client(), # type: ignore
credentials=DuckDbCredentials(connection),
)

def _secrets_exist() -> bool:
return os.path.isdir(secrets_dir) and len(os.listdir(secrets_dir)) > 0

# first test what happens if there are no external secrets
external_db = _external_duckdb_connection()
fs_sql_client = _fs_sql_client_for_external_db(external_db)
with fs_sql_client as sql_client:
sql_client.create_views_for_tables({"items": "items"})
external_db.close()

# no secrets will not work
external_db = _external_duckdb_connection()
with pytest.raises(HTTPException):
assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3
external_db.close()

# add secrets and check that they are there
external_db = _external_duckdb_connection()
fs_sql_client = _fs_sql_client_for_external_db(external_db)
with fs_sql_client as sql_client:
fs_sql_client.create_authentication(persistent=True)
assert len(external_db.sql("SELECT * FROM second.items").fetchall()) == 3
assert _secrets_exist()

# remove secrets and check that they are removed
with fs_sql_client as sql_client:
fs_sql_client.drop_authentication()
assert not _secrets_exist()
external_db.close()

# prevent creating persistent secrets on in mem databases
fs_sql_client = FilesystemSqlClient(
dataset_name="second",
fs_client=pipeline.destination_client(), # type: ignore
)
with pytest.raises(Exception):
with fs_sql_client as sql_client:
fs_sql_client.create_authentication(persistent=True)

# check that no warning was logged
assert warning_mesage not in capfd.readouterr().err

# check that warning is logged when secrets are persisted in the default folder
duck_db_location = TEST_STORAGE_ROOT + "/" + uniq_id()
secrets_dir = f"{TEST_STORAGE_ROOT}/duck_secrets_{uniq_id()}"
duck_db = duckdb.connect(duck_db_location)
fs_sql_client = _fs_sql_client_for_external_db(duck_db)
with fs_sql_client as sql_client:
sql_client.create_authentication(persistent=True)
assert warning_mesage in capfd.readouterr().err

0 comments on commit cd30eb8

Please sign in to comment.