dbt-labs · jarno-r · Mar 1, 2022 · Apr 13, 2023 · Apr 13, 2023
@@ -41,6 +41,12 @@ sources:
         columns: *cols-of-the-people
         tests: *equal-to-the-people
 
+      - name: people_csv_partitioned_inferred_using
+        external:
+          <<: *csv-people-using
+          recover_partitions: true
+        tests: *equal-to-the-people
+
 # ----- TODO: hive format
 
 #      - name: people_csv_unpartitioned_hive_format

@@ -1,7 +1,8 @@
 {% macro spark__recover_partitions(source_node) %}
     {# https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-table.html #}
 
-    {%- if source_node.external.partitions and source_node.external.using and source_node.external.using|lower != 'delta' -%}
+    {%- if (source_node.external.partitions or source_node.external.recover_partitions)
+            and source_node.external.using and source_node.external.using|lower != 'delta' -%}
         {% set ddl %}
             ALTER TABLE {{ source(source_node.source_name, source_node.name) }} RECOVER PARTITIONS
         {% endset %}

@@ -37,3 +37,15 @@ sources:
           - name: contexts
             data_type: string
             description: "Contexts attached to event by Tracker"
+
+- name: event_inferred_schema
+        description: "Snowplow events stored as partitioned parquet files in HDFS with inferred schema"
+        external:
+          # File path can contain partitions such as: hdfs://.../events/my_partition=2022-03-01/events1.parquet
+          # These partitions are excluded from 'location'.
+          location: 'hdfs://.../events/'
+          using: parquet
+
+          # Setting recover_partitions to true causes partitions to be refreshed,
+          # even though partitions are not explicitly specified.
+          recover_partitions: true