diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d017675..d663902 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,17 +30,9 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.7', '3.10'] - dbt-version: [0.18.x, 0.19.x, 1.1.x, 1.4.x] - exclude: - - python-version: '3.10' - dbt-version: 0.18.x - - python-version: '3.10' - dbt-version: 0.19.x - - python-version: '3.7' - dbt-version: 1.1.x - - python-version: '3.7' - dbt-version: 1.4.x + python-version: ['3.8', '3.11'] + dbt-version: [1.5.x, 1.6.x] + steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} @@ -51,17 +43,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements/requirements_dbt_${{ matrix.dbt-version }}.txt - # There is more than one target in profiles.yml because - # dbt-sqlite for dbt~=0.18.x uses a different format than other - # versions. - - name: Run tests for dbt~=0.18.x - if: ${{ matrix.dbt-version == '0.18.x' }} - env: - TARGET: ${{ matrix.dbt-version }} - run: | - python tests/test.py - - name: Run tests for other dbt versions - if: ${{ matrix.dbt-version != '0.18.x' }} + - name: Run tests env: TARGET: default run: | diff --git a/.gitignore b/.gitignore index 5391d87..437f041 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,11 @@ dmypy.json .pytype/ # Cython debug symbols -cython_debug/ \ No newline at end of file +cython_debug/ + +# testing stuff +dbt_invoke/test_run.py + +# duck db files +*.duckdb +.user.yml diff --git a/dbt_invoke/internal/_utils.py b/dbt_invoke/internal/_utils.py index 05178e9..7f69a7e 100644 --- a/dbt_invoke/internal/_utils.py +++ b/dbt_invoke/internal/_utils.py @@ -120,10 +120,7 @@ def get_project_info(ctx, project_dir=None): :return: None """ project = Project(project_dir) - if DBT_VERSION < '1.5.0': - project_path = get_nearest_project_dir(project) - else: - project_path = get_nearest_project_dir(project.project_dir) + project_path = get_nearest_project_dir(project.project_dir) project_yml_path = Path(project_path, 'dbt_project.yml') # Get project configuration values from dbt_project.yml # (or use dbt defaults) @@ -207,6 +204,11 @@ def dbt_ls( except ValueError: result_lines_filtered.append(line) continue + data = line_dict.get("data") + if data and "msg" in data: + line_dict = json.loads(data["msg"]) + else: + continue # If 'resource_type' is in line_dict, then this is likely # an actual result and not something else like a warning. if 'resource_type' in line_dict: diff --git a/dbt_invoke/internal/_version.py b/dbt_invoke/internal/_version.py index d93b5b2..1f356cc 100644 --- a/dbt_invoke/internal/_version.py +++ b/dbt_invoke/internal/_version.py @@ -1 +1 @@ -__version__ = '0.2.3' +__version__ = '1.0.0' diff --git a/dbt_invoke/properties.py b/dbt_invoke/properties.py index d8440c8..01412e1 100644 --- a/dbt_invoke/properties.py +++ b/dbt_invoke/properties.py @@ -521,25 +521,7 @@ def _transform_ls_results(ctx, **kwargs): potential_result_paths = None results = dict() for i, potential_result in enumerate(potential_results): - if 'original_file_path' in potential_result: - potential_result_path = potential_result['original_file_path'] - # Before dbt version 0.20.0, original_file_path was not - # included in the json response of "dbt ls". For older - # versions of dbt, we need to run "dbt ls" with the - # "--output path" argument in order to retrieve paths - else: - if potential_result_paths is None: - potential_result_paths = _utils.dbt_ls( - ctx, - supported_resource_types=_SUPPORTED_RESOURCE_TYPES, - logger=_LOGGER, - output='path', - **kwargs, - ) - assert len(potential_result_paths) == len( - potential_results - ), 'Length of results differs from length of result details' - potential_result_path = potential_result_paths[i] + potential_result_path = potential_result['original_file_path'] if Path(ctx.config['project_path'], potential_result_path).exists(): results[potential_result_path] = potential_result _LOGGER.info( @@ -782,25 +764,7 @@ def _get_columns(ctx, resource_location, resource_dict, **kwargs): relevant_lines = list( filter( - lambda x: x.get( - # dbt-core>=1.0,<1.4 - # run-operation logs contain structure - # { - # 'code': 'M011', - # 'msg': ['column1', 'column2', ...] - # } - 'code', - # dbt-core>=1.4 - # run-operation logs contain structure - # { - # 'info': { - # 'code': 'M011', - # 'msg': "['column1', 'column2', ...]" # string value - # } - # } - x.get('info', dict()).get('code'), - ) - == 'M011', + lambda x: x["info"].get("code") == "I062", result_lines, ) ) @@ -810,21 +774,16 @@ def _get_columns(ctx, resource_location, resource_dict, **kwargs): 'msg', relevant_line.get('info', dict()).get('msg'), ) - else: - # for older dbt-core versions, we need to cross fingers a little harder - relevant_lines = result_lines[1:] - # also, the message key is different - columns = relevant_lines[-1].get('message') - # In some version of dbt columns are not passed as valid json but as - # a string representation of a list - is_string_list = ( - isinstance(columns, str) - and columns.startswith('[') - and columns.endswith(']') - ) - if is_string_list: - columns = ast.literal_eval(columns) - return columns + # In some version of dbt columns are not passed as valid json but as + # a string representation of a list + is_string_list = ( + isinstance(columns, str) + and columns.startswith('[') + and columns.endswith(']') + ) + if is_string_list: + columns = ast.literal_eval(columns) + return columns def _structure_property_file_dict(location, resource_dict, columns_list): diff --git a/requirements/requirements_dbt_0.18.x.txt b/requirements/requirements_dbt_0.18.x.txt deleted file mode 100644 index f41c790..0000000 --- a/requirements/requirements_dbt_0.18.x.txt +++ /dev/null @@ -1,11 +0,0 @@ -agate<1.6.2 -cryptography<3 -dbt-core~=0.18.0 -dbt-sqlite~=0.0.4 -invoke>=1.4.1 -MarkupSafe==2.0.1 -PyYAML>=5.1 -pyopenssl<20.0.0 -pytz<2021.0 -ruamel.yaml>=0.17.12 --e . \ No newline at end of file diff --git a/requirements/requirements_dbt_0.19.x.txt b/requirements/requirements_dbt_0.19.x.txt deleted file mode 100644 index ef6276a..0000000 --- a/requirements/requirements_dbt_0.19.x.txt +++ /dev/null @@ -1,9 +0,0 @@ -agate<1.6.2 -dbt-core~=0.19.0 -dbt-sqlite~=0.1.0 -invoke>=1.4.1 -MarkupSafe==2.0.1 -pytz<2021.0 -PyYAML>=5.1 -ruamel.yaml>=0.17.12 --e . \ No newline at end of file diff --git a/requirements/requirements_dbt_1.1.x.txt b/requirements/requirements_dbt_1.1.x.txt deleted file mode 100644 index 7cfc2a3..0000000 --- a/requirements/requirements_dbt_1.1.x.txt +++ /dev/null @@ -1,8 +0,0 @@ -agate<1.6.4 -dbt-core~=1.1.0 -dbt-sqlite~=1.1.0 -invoke>=1.4.1 -pytz<2021.0 -PyYAML>=5.1 -ruamel.yaml>=0.17.12 --e . diff --git a/requirements/requirements_dbt_1.4.x.txt b/requirements/requirements_dbt_1.5.x.txt similarity index 67% rename from requirements/requirements_dbt_1.4.x.txt rename to requirements/requirements_dbt_1.5.x.txt index e0a1a04..47753b4 100644 --- a/requirements/requirements_dbt_1.4.x.txt +++ b/requirements/requirements_dbt_1.5.x.txt @@ -1,6 +1,6 @@ agate>=1.6,<1.7.1 -dbt-core~=1.4.0 -dbt-sqlite~=1.4.0 +dbt-core~=1.5.0 +dbt-duckdb~=1.5.0 invoke>=1.4.1 PyYAML>=5.1 ruamel.yaml>=0.17.12 diff --git a/requirements/requirements_dbt_1.6.x.txt b/requirements/requirements_dbt_1.6.x.txt new file mode 100644 index 0000000..4e55e2b --- /dev/null +++ b/requirements/requirements_dbt_1.6.x.txt @@ -0,0 +1,7 @@ +agate>=1.6,<1.7.1 +dbt-core~=1.6.0 +dbt-duckdb~=1.6.0 +invoke>=1.4.1 +PyYAML>=5.1 +ruamel.yaml>=0.17.12 +-e . diff --git a/tests/data_files/customers.csv b/tests/data_files/customers.csv new file mode 100644 index 0000000..375f0d3 --- /dev/null +++ b/tests/data_files/customers.csv @@ -0,0 +1,4 @@ +customer_id,created_at +1,2022-01-01 +2,2022-01-01 +3,2022-01-01 diff --git a/tests/data_files/items.csv b/tests/data_files/items.csv new file mode 100644 index 0000000..2c20845 --- /dev/null +++ b/tests/data_files/items.csv @@ -0,0 +1,2 @@ +item_id,price,updated_at +1,1,2023-01-01 diff --git a/tests/data_files/orders.csv b/tests/data_files/orders.csv new file mode 100644 index 0000000..bdd459b --- /dev/null +++ b/tests/data_files/orders.csv @@ -0,0 +1,2 @@ +order_id,customer_id,item_id,quantity,order_at +1,1,1,2,2022-01-01 diff --git a/tests/dbt_project_files/dbt_project.yml b/tests/dbt_project.yml similarity index 90% rename from tests/dbt_project_files/dbt_project.yml rename to tests/dbt_project.yml index 1e02d43..3afa9ee 100644 --- a/tests/dbt_project_files/dbt_project.yml +++ b/tests/dbt_project.yml @@ -2,7 +2,7 @@ name: test_dbt_project version: 1.0.0 config-version: 2 -profile: dbt-sqlite +profile: dbt-duckdb seed-paths: ["data"] diff --git a/tests/dbt_project_files/dbt_project_pre_dbt_v1.yml b/tests/dbt_project_files/dbt_project_pre_dbt_v1.yml deleted file mode 100644 index e4779a6..0000000 --- a/tests/dbt_project_files/dbt_project_pre_dbt_v1.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: test_dbt_project -version: 1.0.0 -config-version: 2 - -profile: dbt-sqlite - -clean-targets: - - target - - dbt_modules - - logs - -analysis-paths: - - analyses - -snapshots: - +target_schema: main \ No newline at end of file diff --git a/tests/test.py b/tests/test.py index b97b620..d3326df 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,11 +1,9 @@ +import itertools import os +import sys import unittest from pathlib import Path from unittest.mock import patch -import sys -import pkg_resources -import shutil -import itertools import invoke @@ -27,24 +25,6 @@ def setUpClass(cls): cls.config_path = Path(PARENT_DIR, 'test_config.yml') cls.config = _utils.parse_yaml(cls.config_path) - # for backward compatibility, select the correct dbt_project.yml file - if pkg_resources.get_distribution("dbt-core").version >= '1.0.0': - shutil.copy( - Path(PARENT_DIR, 'dbt_project_files/dbt_project.yml'), - Path( - PARENT_DIR, cls.config['project_name'], 'dbt_project.yml' - ), - ) - else: - shutil.copy( - Path( - PARENT_DIR, 'dbt_project_files/dbt_project_pre_dbt_v1.yml' - ), - Path( - PARENT_DIR, cls.config['project_name'], 'dbt_project.yml' - ), - ) - cls.project_dir = Path(PARENT_DIR, cls.config['project_name']) cls.profiles_dir = Path(PARENT_DIR, cls.config['project_name']) cls.test_base_dir = PARENT_DIR @@ -58,18 +38,40 @@ def setUpClass(cls): cls.ctx.config['macro_paths'][0], f'{cls.macro_name}.sql', ) + cls.dbt_seed = ( + 'dbt seed' + f' --project-dir {cls.project_dir}' + f' --profiles-dir {cls.project_dir}' + f' --target-path {cls.project_dir}/target' + ) cls.dbt_clean = ( 'dbt clean' f' --project-dir {cls.project_dir}' f' --profiles-dir {cls.project_dir}' ) + cls.dbt_run = ( + 'dbt run' + f' --project-dir {cls.project_dir}' + f' --profiles-dir {cls.project_dir}' + f' --target-path {cls.project_dir}/target' + ) + cls.dbt_snapshot = ( + 'dbt snapshot' + f' --project-dir {cls.project_dir}' + f' --profiles-dir {cls.project_dir}' + f' --target-path {cls.project_dir}/target' + ) cls.dbt_compile = ( 'dbt compile' f' --project-dir {cls.project_dir}' f' --profiles-dir {cls.project_dir}' + f' --target-path {cls.project_dir}/target' ) + invoke.run(cls.dbt_seed) invoke.run(cls.dbt_clean) invoke.run(cls.dbt_compile) + invoke.run(cls.dbt_run) + invoke.run(cls.dbt_snapshot) def setUp(self): """ diff --git a/tests/test_config.yml b/tests/test_config.yml index 72a7530..7841393 100644 --- a/tests/test_config.yml +++ b/tests/test_config.yml @@ -61,6 +61,10 @@ expected_properties: description: '' - name: updated_at description: '' + - name: dbt_scd_id + description: '' + - name: dbt_updated_at + description: '' - name: dbt_valid_from description: '' - name: dbt_valid_to diff --git a/tests/test_dbt_project/dbt_project.yml b/tests/test_dbt_project/dbt_project.yml index 1e02d43..3afa9ee 100644 --- a/tests/test_dbt_project/dbt_project.yml +++ b/tests/test_dbt_project/dbt_project.yml @@ -2,7 +2,7 @@ name: test_dbt_project version: 1.0.0 config-version: 2 -profile: dbt-sqlite +profile: dbt-duckdb seed-paths: ["data"] diff --git a/tests/test_dbt_project/models/marts/core/customers.sql b/tests/test_dbt_project/models/marts/core/customers.sql index 2a7ecaa..51acf02 100644 --- a/tests/test_dbt_project/models/marts/core/customers.sql +++ b/tests/test_dbt_project/models/marts/core/customers.sql @@ -2,4 +2,4 @@ SELECT customer_id , created_at FROM - customers \ No newline at end of file + {{ source('external_source', 'customers') }} \ No newline at end of file diff --git a/tests/test_dbt_project/models/marts/core/orders.sql b/tests/test_dbt_project/models/marts/core/orders.sql index fd56b90..d0158cb 100644 --- a/tests/test_dbt_project/models/marts/core/orders.sql +++ b/tests/test_dbt_project/models/marts/core/orders.sql @@ -5,4 +5,4 @@ SELECT , quantity , order_at FROM - orders \ No newline at end of file + {{ source('external_source', 'orders') }} \ No newline at end of file diff --git a/tests/test_dbt_project/models/sources.yml b/tests/test_dbt_project/models/sources.yml new file mode 100644 index 0000000..1d42657 --- /dev/null +++ b/tests/test_dbt_project/models/sources.yml @@ -0,0 +1,8 @@ +sources: + - name: external_source + meta: + external_location: "tests/data_files/{name}.csv" + tables: + - name: customers + - name: orders + - name: items diff --git a/tests/test_dbt_project/profiles.yml b/tests/test_dbt_project/profiles.yml index 02a5eac..3d273a0 100644 --- a/tests/test_dbt_project/profiles.yml +++ b/tests/test_dbt_project/profiles.yml @@ -1,33 +1,9 @@ config: send_anonymous_usage_stats: False -# Credit to https://github.com/codeforkjeff/dbt-sqlite -dbt-sqlite: +dbt-duckdb: target: "{{ env_var('TARGET', 'default') }}" outputs: - # There is more than one target because dbt-sqlite for dbt~=0.18.x - # uses a different format for schemas_and_paths - 0.18.x: - type: &type sqlite - # sqlite locks the whole db on writes so anything > 1 won't help - threads: &threads 1 - # Value is arbitrary - database: &database database - # Value of 'schema' must be defined in schema_paths below. - # In most cases, this should be 'main' - schema: &schema main - # Connect schemas to paths: at least one of these must be 'main' - schemas_and_paths: main=test.db - # Directory where all *.db files are attached as schema, using - # base filename as schema name, and where new schema are created. - # This can overlap with the dirs of files in schemas_and_paths as - # long as there are no conflicts. - schema_directory: &schema_directory . default: - type: *type - threads: *threads - database: *database - schema: *schema - schemas_and_paths: - main: test.db - schema_directory: *schema_directory + type: duckdb + path: ./dbt.duckdb diff --git a/tests/test_dbt_project/snapshots/items_snapshot.sql b/tests/test_dbt_project/snapshots/items_snapshot.sql index b6c5187..1fc9778 100644 --- a/tests/test_dbt_project/snapshots/items_snapshot.sql +++ b/tests/test_dbt_project/snapshots/items_snapshot.sql @@ -6,5 +6,5 @@ updated_at='updated_at' ) }} - select * from {{ ref('items') }} + select * from {{ source('external_source', 'items') }} {% endsnapshot %} \ No newline at end of file diff --git a/tests/test_dbt_project/test.db b/tests/test_dbt_project/test.db deleted file mode 100644 index bfa2676..0000000 Binary files a/tests/test_dbt_project/test.db and /dev/null differ diff --git a/tests/test_utils.py b/tests/test_utils.py index b7a491a..a3dc7fb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -42,12 +42,13 @@ def test_dbt_ls(self): project_dir=self.project_dir, profiles_dir=self.profiles_dir, supported_resource_types=SUPPORTED_RESOURCE_TYPES, - output='path', + output='json', logger=self.logger, **dbt_ls_kwargs, ) result_parts = [ - list(Path(line).parts) for line in result_lines + list(Path(line['original_file_path']).parts) + for line in result_lines ] self.assertCountEqual(result_parts, expected_result_parts)