From 524cec4a6b2c043c60067cc16ac7058a87ceadc8 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Mon, 6 Feb 2023 15:01:15 +0000 Subject: [PATCH] add precision option to equality test --- CHANGELOG.md | 6 ++ README.md | 3 +- .../data_test_equality_floats.csv | 11 +++ .../data_test_equality_floats_columns.csv | 11 +++ integration_tests/dbt_project.yml | 10 ++- .../models/generic_tests/schema.yml | 40 ++++++++- .../generic_tests/test_equality_floats.sql | 9 ++ .../test_equality_floats_columns.sql | 9 ++ macros/generic_tests/equality.sql | 86 +++++++++++++------ 9 files changed, 152 insertions(+), 33 deletions(-) create mode 100644 integration_tests/data/schema_tests/data_test_equality_floats.csv create mode 100644 integration_tests/data/schema_tests/data_test_equality_floats_columns.csv create mode 100644 integration_tests/models/generic_tests/test_equality_floats.sql create mode 100644 integration_tests/models/generic_tests/test_equality_floats_columns.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b7bd526..0be322ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ ## Contributors: ---> +# Unreleased +## New features +- The `equality` test now accepts an additional argument, `precision` to aide in comparing floating point numbers ([#757](https://github.com/dbt-labs/dbt-utils/issues/757), [#765](https://github.com/dbt-labs/dbt-utils/pull/765)) +## Contributors: +- [@rlh1994](https://github.com/rlh1994) + # Unreleased ## New features - Add option to ignore columns in equality test ([#734](https://github.com/dbt-labs/dbt-utils/issues/734), [#737](https://github.com/dbt-labs/dbt-utils/pull/737)) diff --git a/README.md b/README.md index a6b330ac..3aa818a3 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr ### equality ([source](macros/generic_tests/equality.sql)) -Asserts the equality of two relations. Optionally specify a subset of columns to compare or ignore. +Asserts the equality of two relations. Optionally specify a subset of columns to compare or ignore, and a precision to compare numeric columns on. **Usage:** @@ -136,6 +136,7 @@ models: compare_columns: - first_column - second_column + precision: 4 # compare all columns except the ones on the ignore list - name: model_name_ignore_columns diff --git a/integration_tests/data/schema_tests/data_test_equality_floats.csv b/integration_tests/data/schema_tests/data_test_equality_floats.csv new file mode 100644 index 00000000..85241961 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_equality_floats.csv @@ -0,0 +1,11 @@ +id,float_number +05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186 +cfae9054-940b-42a1-84d4-052daae6194f,81.2511656 +6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675 +c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841 +59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434 +b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425 +26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223 +b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680 +11c979b7-2661-4375-8143-7c9b54b90627,19.5755431 +a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047 diff --git a/integration_tests/data/schema_tests/data_test_equality_floats_columns.csv b/integration_tests/data/schema_tests/data_test_equality_floats_columns.csv new file mode 100644 index 00000000..77beeae9 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_equality_floats_columns.csv @@ -0,0 +1,11 @@ +id,float_number,to_ignore +05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,a +cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,a +6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,a +c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,a +59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,a +b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,a +26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,a +b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,a +11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,a +a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,a diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 252b9cd7..e6318140 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -50,7 +50,7 @@ seeds: sql: data_events_20180103: +schema: events - + data_get_column_values_dropped: # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}" @@ -71,3 +71,11 @@ seeds: data_test_sequential_timestamps: +column_types: my_timestamp: timestamp + + data_test_equality_floats: + +column_types: + float_number: float + + data_test_equality_floats_columns: + +column_types: + float_number: float diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index a89d83e1..d96cc6c3 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -89,14 +89,14 @@ seeds: upper_bound_column: valid_to partition_by: subscription_id zero_length_range_allowed: true - + - name: data_unique_combination_of_columns tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: - month - product - + - name: data_cardinality_equality_a columns: - name: same_name @@ -209,7 +209,41 @@ models: - first_name - last_name - email - + + - name: test_equality_floats + tests: + # test precision only + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats') + precision: 4 + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats') + precision: 8 + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + + - name: test_equality_floats_columns + tests: + # Positive assertion tests + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats_columns') + compare_columns: + - id + - float_number + precision: 4 + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats_columns') + ignore_columns: + - to_ignore + precision: 4 + # all columns should fail even with rounding + - dbt_utils.equality: + compare_model: ref('data_test_equality_floats_columns') + precision: 4 + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + + - name: test_fewer_rows_than tests: - dbt_utils.fewer_rows_than: diff --git a/integration_tests/models/generic_tests/test_equality_floats.sql b/integration_tests/models/generic_tests/test_equality_floats.sql new file mode 100644 index 00000000..9c84bb5c --- /dev/null +++ b/integration_tests/models/generic_tests/test_equality_floats.sql @@ -0,0 +1,9 @@ +with data as ( + + select * from {{ ref('data_test_equality_floats') }} + +) + +select + id, float_number + 0.0000001 as float_number +from data diff --git a/integration_tests/models/generic_tests/test_equality_floats_columns.sql b/integration_tests/models/generic_tests/test_equality_floats_columns.sql new file mode 100644 index 00000000..bda9b248 --- /dev/null +++ b/integration_tests/models/generic_tests/test_equality_floats_columns.sql @@ -0,0 +1,9 @@ +with data as ( + + select * from {{ ref('data_test_equality_floats') }} + +) + +select + id, float_number + 0.0000001 as float_number, 'b' as to_ignore +from data diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index 3ef74c10..e844a24a 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -1,14 +1,14 @@ -{% test equality(model, compare_model, compare_columns=None, ignore_columns=None) %} - {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, ignore_columns)) }} +{% test equality(model, compare_model, compare_columns=None, ignore_columns=None, precision = None) %} + {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, ignore_columns, precision)) }} {% endtest %} -{% macro default__test_equality(model, compare_model, compare_columns=None, ignore_columns=None) %} +{% macro default__test_equality(model, compare_model, compare_columns=None, ignore_columns=None, precision = None) %} {%- if compare_columns and ignore_columns -%} {{ exceptions.raise_compiler_error("Both a compare and an ignore list were provided to the `equality` macro. Only one is allowed") }} {%- endif -%} -{% set set_diff %} +{% set set_diff %} count(*) + coalesce(abs( sum(case when which_diff = 'a_minus_b' then 1 else 0 end) - sum(case when which_diff = 'b_minus_a' then 1 else 0 end) @@ -26,32 +26,62 @@ -- setup {%- do dbt_utils._is_relation(model, 'test_equality') -%} -{#- -If the compare_cols arg is provided, we can run this test without querying the -information schema — this allows the model to be an ephemeral model --#} - -{%- if not compare_columns -%} +{%- if not precision -%} + {#- + If the compare_cols arg is provided, we can run this test without querying the + information schema — this allows the model to be an ephemeral model + -#} + {%- if not compare_columns -%} + {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model)-%} + + + {%- if ignore_columns -%} + {#-- Lower case ignore columns for easier comparison --#} + {%- set ignore_columns = ignore_columns | map("lower") | list %} + + {# Filter out the excluded columns #} + {%- set include_columns = [] %} + {%- for column in compare_columns -%} + {%- if column.name | lower not in ignore_columns -%} + {% do include_columns.append(column) %} + {%- endif %} + {%- endfor %} + + {%- set compare_columns = include_columns | map(attribute='quoted') %} + {%- else -%} + {%- set compare_columns = compare_columns | map(attribute='quoted') %} + {%- endif -%} + {%- endif -%} + + {% set compare_cols_csv = compare_columns | join(', ') %} + +{% else %} + {#- + If rounding is required, we need to get the types, so it can't be ephermeral + -#} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') -%} -{%- endif -%} - -{%- if ignore_columns -%} - {#-- Lower case ignore columns for easier comparison --#} - {%- set ignore_columns = ignore_columns | map("lower") | list %} - - {%- set include_columns = [] %} - {%- for column in compare_columns -%} - {%- if column | lower not in ignore_columns -%} - {% do include_columns.append(column) %} - {%- endif %} - {%- endfor %} + {%- set columns = adapter.get_columns_in_relation(model) -%} + + {% set columns_list = [] %} + {%- for col in columns -%} + {%- if ( + (col.name|lower in compare_columns|map('lower') or not compare_columns) and + (col.name|lower not in ignore_columns|map('lower') or not ignore_columns) + ) -%} + {# Databricks double type is not picked up by any number type checks in dbt #} + {%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%} + {# Cast is required due to postgres not having round for a double precision number #} + {%- do columns_list.append('round(cast(' ~ col.quoted ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.quoted) -%} + {%- else -%} + {%- do columns_list.append(col.quoted) -%} + {%- endif -%} + {% endif %} + {%- endfor -%} + + {% set compare_cols_csv = columns_list | join(', ') %} - {%- set compare_columns = include_columns %} - -{%- endif -%} - -{% set compare_cols_csv = compare_columns | join(', ') %} +{% endif %} with a as (