Skip to content

Commit

Permalink
Normalize is_iceberg Field for Consistency in Snowflake with QUOTED_I…
Browse files Browse the repository at this point in the history
…DENTIFIERS_IGNORE_CASE (#1229)

* Reproduce. Test fail. Fix. Test green.

* Add changelog.

* Make a test that tests the actual behavior without hardcoding the quoted flag.

* I can't believe how elegant a solution this is.

Use a session-only configuration.

* Fix test.

* Simplify logic for this by moving this away from a quoting situation.

Columns that unquoted are capitalized regardless of the quoting ignore flag. So we let it be uppercase and then normalize it down to lowercase in Python.

* edit comments.

---------

Co-authored-by: Mike Alfare <[email protected]>
  • Loading branch information
VersusFacit and mikealfare authored Nov 10, 2024
1 parent 01dbd70 commit f6468f6
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 13 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241104-172349.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Iceberg quoting ignore fix.
time: 2024-11-04T17:23:49.706297-08:00
custom:
Author: versusfacit
Issue: "1227"
5 changes: 5 additions & 0 deletions dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ def list_relations_without_caching(
# this can be collapsed once Snowflake adds is_iceberg to show objects
columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"]
if self.behavior.enable_iceberg_materializations.no_warn:
# The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like
# is_iceberg which is created by dbt, but it does not affect the case
# of column values in Snowflake's SHOW OBJECTS query! This
# normalization step ensures metadata queries are handled consistently.
schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"})
columns.append("is_iceberg")

return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)]
Expand Down
3 changes: 1 addition & 2 deletions dbt/include/snowflake/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@
{% endmacro %}

{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %}

{%- set max_total_results = max_results_per_iter * max_iter -%}
{%- set sql -%}
{% if schema_relation is string %}
Expand All @@ -147,7 +146,7 @@
{# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the
-- latency penalty. #}
{% if adapter.behavior.enable_iceberg_materializations.no_warn %}
select all_objects.*, is_iceberg as "is_iceberg"
select all_objects.*, is_iceberg
from table(result_scan(last_query_id(-1))) all_objects
left join INFORMATION_SCHEMA.tables as all_tables
on all_tables.table_name = all_objects."name"
Expand Down
60 changes: 49 additions & 11 deletions tests/functional/adapter/list_relations_tests/test_show_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest

from pathlib import Path

from dbt.adapters.factory import get_adapter_by_type
from dbt.adapters.snowflake import SnowflakeRelation

Expand Down Expand Up @@ -41,8 +43,32 @@
"""
)

_MODEL_ICEBERG = """
{{
config(
materialized = "table",
table_format="iceberg",
external_volume="s3_iceberg_snow",
)
}}
select 1
"""


class ShowObjectsBase:
@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations


class TestShowObjects:
class TestShowObjects(ShowObjectsBase):
views: int = 10
tables: int = 10
dynamic_tables: int = 10
Expand All @@ -66,16 +92,6 @@ def setup(self, project):
run_dbt(["seed"])
run_dbt(["run"])

@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations

def test_list_relations_without_caching(self, project):
relations = self.list_relations_without_caching(project)
assert len([relation for relation in relations if relation.is_view]) == self.views
Expand All @@ -87,3 +103,25 @@ def test_list_relations_without_caching(self, project):
len([relation for relation in relations if relation.is_dynamic_table])
== self.dynamic_tables
)


class TestShowIcebergObjects(ShowObjectsBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"enable_iceberg_materializations": True}}

@pytest.fixture(scope="class")
def models(self):
return {"my_model.sql": _MODEL_ICEBERG}

def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project):
"""https://github.com/dbt-labs/dbt-snowflake/issues/1227
The list relations function involves a metadata sub-query. Regardless of
QUOTED_IDENTIFIERS_IGNORE_CASE, this function will fail without proper
normalization within the encapsulating python function after the macro invocation
returns. This test verifies that normalization is working.
"""
run_dbt(["run"])

self.list_relations_without_caching(project)

0 comments on commit f6468f6

Please sign in to comment.