diff --git a/CHANGELOG.md b/CHANGELOG.md index db3b0c03..c708eb6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ ## Contributors: ---> +# Unreleased +## New features +- The `equality` test now accepts an additional argument, `precision` to aide in comparing floating point numbers ([#757](https://github.com/dbt-labs/dbt-utils/issues/757), [#765](https://github.com/dbt-labs/dbt-utils/pull/765)) +## Contributors: @rlh1994 + # dbt utils v1.0 ## Migration Guide diff --git a/README.md b/README.md index eee75ede..91b55479 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr #### equality ([source](macros/generic_tests/equality.sql)) -Asserts the equality of two relations. Optionally specify a subset of columns to compare. +Asserts the equality of two relations. Optionally specify a subset of columns to compare, and a precision to compare numeric columns on. **Usage:** @@ -128,6 +128,7 @@ models: compare_columns: - first_column - second_column + precision: 4 ``` #### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) diff --git a/integration_tests/data/etc/data_test_equality_floats.csv b/integration_tests/data/etc/data_test_equality_floats.csv new file mode 100644 index 00000000..85241961 --- /dev/null +++ b/integration_tests/data/etc/data_test_equality_floats.csv @@ -0,0 +1,11 @@ +id,float_number +05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186 +cfae9054-940b-42a1-84d4-052daae6194f,81.2511656 +6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675 +c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841 +59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434 +b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425 +26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223 +b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680 +11c979b7-2661-4375-8143-7c9b54b90627,19.5755431 +a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047 diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 252b9cd7..57c7ff91 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -50,7 +50,7 @@ seeds: sql: data_events_20180103: +schema: events - + data_get_column_values_dropped: # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}" @@ -71,3 +71,8 @@ seeds: data_test_sequential_timestamps: +column_types: my_timestamp: timestamp + + etc: + data_test_equality_floats: + +column_types: + float_number: float diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index 96a46a3b..908034f7 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -89,14 +89,14 @@ seeds: upper_bound_column: valid_to partition_by: subscription_id zero_length_range_allowed: true - + - name: data_unique_combination_of_columns tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: - month - product - + - name: data_cardinality_equality_a columns: - name: same_name @@ -191,7 +191,18 @@ models: - first_name - last_name - email - + + - name: test_equality_floats + tests: + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats') + precision: 4 + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats') + precision: 8 + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + - name: test_fewer_rows_than tests: - dbt_utils.fewer_rows_than: diff --git a/integration_tests/models/generic_tests/test_equality_floats.sql b/integration_tests/models/generic_tests/test_equality_floats.sql new file mode 100644 index 00000000..9c84bb5c --- /dev/null +++ b/integration_tests/models/generic_tests/test_equality_floats.sql @@ -0,0 +1,9 @@ +with data as ( + + select * from {{ ref('data_test_equality_floats') }} + +) + +select + id, float_number + 0.0000001 as float_number +from data diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index ffc6a2b8..5fe1f623 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -1,8 +1,8 @@ -{% test equality(model, compare_model, compare_columns=None) %} - {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }} +{% test equality(model, compare_model, compare_columns=None, precision = None) %} + {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, precision)) }} {% endtest %} -{% macro default__test_equality(model, compare_model, compare_columns=None) %} +{% macro default__test_equality(model, compare_model, compare_columns=None, precision = None) %} {% set set_diff %} count(*) + coalesce(abs( @@ -22,17 +22,39 @@ -- setup {%- do dbt_utils._is_relation(model, 'test_equality') -%} -{#- -If the compare_cols arg is provided, we can run this test without querying the -information schema — this allows the model to be an ephemeral model --#} - -{%- if not compare_columns -%} +{%- if not precision -%} + {#- + If the compare_cols arg is provided, we can run this test without querying the + information schema — this allows the model to be an ephemeral model + -#} + {%- if not compare_columns -%} + {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} + {%- endif -%} + + {% set compare_cols_csv = compare_columns | join(', ') %} +{% else %} + {#- + If rounding is required, we need to get the types, so it can't be ephermeral + -#} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} -{%- endif -%} - -{% set compare_cols_csv = compare_columns | join(', ') %} + {%- set columns = adapter.get_columns_in_relation(model) -%} + + {% set columns_list = [] %} + {%- for col in columns -%} + {%- if (compare_columns and col.name|lower in compare_columns|map('lower')) or not compare_columns -%} + {# Databricks double type is not picked up by any number type checks in dbt #} + {%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%} + {# Cast is required due to postgres not having round for a double precision number #} + {%- do columns_list.append('round(cast(' ~ col.name ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.name) -%} + {%- else -%} + {%- do columns_list.append(col.name) -%} + {%- endif -%} + {% endif %} + {%- endfor -%} + + {% set compare_cols_csv = columns_list | join(', ') %} +{% endif %} with a as (