diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index ac0c05558e1f4..9e7015cb03f64 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -612,6 +612,7 @@ def get_long_description(): "unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource", "gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource", "sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource", + "dummy = datahub.ingestion.source.dummy:DummySource", ], "datahub.ingestion.transformer.plugins": [ "simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership", diff --git a/metadata-ingestion/src/datahub/ingestion/source/dummy.py b/metadata-ingestion/src/datahub/ingestion/source/dummy.py new file mode 100644 index 0000000000000..01083a415ac4b --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/dummy.py @@ -0,0 +1,107 @@ +import logging +from dataclasses import dataclass, field as dataclass_field +from typing import Iterable, List, Optional + +import pydantic +from pydantic import Field + +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, + StaleEntityRemovalSourceReport, + StatefulStaleMetadataRemovalConfig, +) +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionConfigBase, + StatefulIngestionSourceBase, +) +from datahub.metadata.schema_classes import StatusClass +from datahub.utilities.urns.dataset_urn import DatasetUrn + +# Logger instance +logger = logging.getLogger(__name__) + +dummy_datasets: List = ["dummy_dataset1", "dummy_dataset2", "dummy_dataset3"] + + +@dataclass +class DummySourceReport(StaleEntityRemovalSourceReport): + datasets_scanned: int = 0 + filtered_datasets: List[str] = dataclass_field(default_factory=list) + + def report_datasets_scanned(self, count: int = 1) -> None: + self.datasets_scanned += count + + def report_datasets_dropped(self, model: str) -> None: + self.filtered_datasets.append(model) + + +class DummySourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): + dataset_patterns: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for datasets to filter in ingestion.", + ) + # Configuration for stateful ingestion + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field( + default=None, description="Dummy source Ingestion Config." + ) + + +class DummySource(StatefulIngestionSourceBase): + """ + This is dummy source which only extract dummy datasets + """ + + source_config: DummySourceConfig + reporter: DummySourceReport + platform: str = "dummy" + + def __init__(self, config: DummySourceConfig, ctx: PipelineContext): + super(DummySource, self).__init__(config, ctx) + self.source_config = config + self.reporter = DummySourceReport() + # Create and register the stateful ingestion use-case handler. + self.stale_entity_removal_handler = StaleEntityRemovalHandler.create( + self, self.source_config, self.ctx + ) + + @classmethod + def create(cls, config_dict, ctx): + config = DummySourceConfig.parse_obj(config_dict) + return cls(config, ctx) + + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + self.stale_entity_removal_handler.workunit_processor, + ] + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + """ + Datahub Ingestion framework invoke this method + """ + logger.info("Dummy plugin execution is started") + + for dataset in dummy_datasets: + if not self.source_config.dataset_patterns.allowed(dataset): + self.reporter.report_datasets_dropped(dataset) + continue + else: + self.reporter.report_datasets_scanned() + dataset_urn = DatasetUrn.create_from_ids( + platform_id="postgres", + table_name=dataset, + env=DEFAULT_ENV, + ) + yield MetadataChangeProposalWrapper( + entityUrn=str(dataset_urn), + aspect=StatusClass(removed=False), + ).as_workunit() + + def get_report(self) -> SourceReport: + return self.reporter diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json deleted file mode 100644 index b53d5857f1d66..0000000000000 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ /dev/null @@ -1,2164 +0,0 @@ -[ -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "The country", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "City", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "is_latest", - "nullable": false, - "description": "Is latest data", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BooleanType": {} - } - }, - "nativeDataType": "yesno", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "timestamp", - "nullable": false, - "description": "Timestamp of measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "time", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "average_measurement", - "nullable": false, - "description": "My measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "average", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "foo.view.lkml" - }, - "name": "my_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n is_latest,\n country,\n city,\n timestamp,\n measurement\n FROM\n my_table", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_derived_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "The country", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "City", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "timestamp", - "nullable": false, - "description": "Timestamp of measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "time", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "average_measurement", - "nullable": false, - "description": "My measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "average", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "bar.view.lkml" - }, - "name": "my_derived_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.include_able,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "included_view_file.view.lkml" - }, - "name": "include_able_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml" - }, - "name": "looker_events", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "extending_looker_events", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "additional_measure", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml" - }, - "name": "extending_looker_events", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..autodetect_sql_name_based_on_view_name,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml" - }, - "name": "autodetect_sql_name_based_on_view_name", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.include_able,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "view_declarations.view.lkml" - }, - "name": "test_include_external_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..fragment_derived_view,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "fragment_derived_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "aliased_platform", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "globalTags": { - "tags": [] - }, - "isPartOfKey": false - }, - { - "fieldPath": "country", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "globalTags": { - "tags": [] - }, - "isPartOfKey": false - }, - { - "fieldPath": "date", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "unknown", - "recursive": false, - "globalTags": { - "tags": [] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "nested/fragment_derived.view.lkml" - }, - "name": "fragment_derived_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..order,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "liquid.view.lkml" - }, - "name": "customer_facts", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.ecommerce.ability,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.ecommerce.ability,PROD),pk)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "ability", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "pk", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "count", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "ability.view.lkml" - }, - "name": "ability", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD),id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),id)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD),owner_name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),owner_name)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "owners", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": true - }, - { - "fieldPath": "owner_name", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [ - "id" - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "owners.view.lkml" - }, - "name": "owners", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),country)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.city)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "view_derived_explore", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "unique_countries", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "count_distinct", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "derived_col", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "sum", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "native_derived_table.view.lkml" - }, - "name": "view_derived_explore", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.flightstats.accidents,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.flightstats.accidents,PROD),id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD),id)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "flights", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "nullable": false, - "description": "", - "label": "id", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": true - } - ], - "primaryKeys": [ - "id" - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "flights.view.lkml" - }, - "name": "flights", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Dimension", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Dimension" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Measure", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Measure" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Temporal", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Temporal" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/golden_test_state.json b/metadata-ingestion/tests/integration/lookml/golden_test_state.json new file mode 100644 index 0000000000000..c62106ac10089 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/golden_test_state.json @@ -0,0 +1,26 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(file,lookml_stateful,prod),lookml_stale_entity_removal)", + "changeType": "UPSERT", + "aspectName": "datahubIngestionCheckpoint", + "aspect": { + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "pipelineName": "lookml_stateful", + "platformInstanceId": "", + "config": "", + "state": { + "formatVersion": "1.0", + "serde": "base85-bz2-json", + "payload": "LRx4!F+o`-Q(4)<4JiNuUmt)_WdINa0@Mn>@BivB0a-v1sF;Ar&}h&A0K-EjK*+=xnKU%Oib;?JVrrXB7?aRqCarWwpZm8v5Yh+DsN{|c*msMh9%WJXjKPvIPsDn^@g3;DD9Q9kBh?*|=8M4uRW$_0HKn3XhN;RhAcLIBhLnO2%UA@Ykl;h&Xx(^@2;Y9C#d4g3K_2CA-I*M)h{NMA8Nu4C3XjEQYdh{nR--&lfRUsTL}OOkOO435f=1nKzYJ^9)mbBljM0}gaqy26URw1=q<80Eb9y)y?Vl88kG;g~MToq#r%6trK9U`U?k}RS<@^?i@1M1@9*%tk}1N3hRzUaNB" + }, + "runId": "lookml-test" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_golden_deleted_stateful.json deleted file mode 100644 index a323118666940..0000000000000 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_golden_deleted_stateful.json +++ /dev/null @@ -1,650 +0,0 @@ -[ -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..my_table,PROD)", - "type": "VIEW" - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "my_view", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "country", - "nullable": false, - "description": "The country", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "city", - "nullable": false, - "description": "City", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "is_latest", - "nullable": false, - "description": "Is latest data", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BooleanType": {} - } - }, - "nativeDataType": "yesno", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "timestamp", - "nullable": false, - "description": "Timestamp of measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "time", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "average_measurement", - "nullable": false, - "description": "My measurement", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "average", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Measure" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "foo.view.lkml" - }, - "name": "my_view", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "SELECT\n is_latest,\n country,\n city,\n timestamp,\n measurement\n FROM\n my_table", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/prod/looker/lkml_samples/views" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 1586847600000, - "actor": "urn:li:corpuser:datahub" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD),id)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),id)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,..owners,PROD),owner_name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD),owner_name)" - ], - "confidenceScore": 1.0 - } - ] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "owners", - "platform": "urn:li:dataPlatform:looker", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": true - }, - { - "fieldPath": "owner_name", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - } - ], - "primaryKeys": [ - "id" - ] - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "owners.view.lkml" - }, - "name": "owners", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "looker" - }, - { - "id": "lkml_samples" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Dimension", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Dimension" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Measure", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Measure" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Temporal", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Temporal" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index 124b0519f1d30..b1853cfa2b3c0 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -1,6 +1,6 @@ import logging import pathlib -from typing import Any, Dict, List, cast +from typing import Any, List from unittest import mock import pydantic @@ -17,17 +17,13 @@ LookerRefinementResolver, LookMLSourceConfig, ) -from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState from datahub.metadata.schema_classes import ( DatasetSnapshotClass, MetadataChangeEventClass, UpstreamLineageClass, ) from tests.test_helpers import mce_helpers -from tests.test_helpers.state_helpers import ( - get_current_checkpoint_from_pipeline, - validate_all_providers_have_committed_successfully, -) +from tests.test_helpers.state_helpers import get_current_checkpoint_from_pipeline logging.getLogger("lkml").setLevel(logging.INFO) @@ -728,11 +724,10 @@ def test_hive_platform_drops_ids(pytestconfig, tmp_path, mock_time): @freeze_time(FROZEN_TIME) -def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time): +def test_lookml_stateful_ingestion(pytestconfig, tmp_path, mock_time): output_file_name: str = "lookml_mces.json" - golden_file_name: str = "expected_output.json" - output_file_deleted_name: str = "lookml_mces_deleted_stateful.json" - golden_file_deleted_name: str = "lookml_mces_golden_deleted_stateful.json" + state_file_name: str = "lookml_state_mces.json" + golden_file_name: str = "golden_test_state.json" test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" @@ -756,7 +751,7 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time): "state_provider": { "type": "file", "config": { - "filename": f"{tmp_path}/checkpoint_mces.json", + "filename": f"{tmp_path}/{state_file_name}", }, }, }, @@ -764,27 +759,20 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time): }, "sink": { "type": "file", - "config": {}, + "config": { + "filename": f"{tmp_path}/{output_file_name}", + }, }, } - pipeline_run1 = None - pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore - base_pipeline_config # type: ignore - ) - # Set the special properties for this run - pipeline_run1_config["source"]["config"]["emit_reachable_views_only"] = False - pipeline_run1_config["sink"]["config"][ - "filename" - ] = f"{tmp_path}/{output_file_name}" - pipeline_run1 = Pipeline.create(pipeline_run1_config) + pipeline_run1 = Pipeline.create(base_pipeline_config) pipeline_run1.run() pipeline_run1.raise_from_status() pipeline_run1.pretty_print_summary() mce_helpers.check_golden_file( pytestconfig, - output_path=tmp_path / output_file_name, + output_path=f"{tmp_path}/{state_file_name}", golden_path=f"{test_resources_dir}/{golden_file_name}", ) @@ -792,60 +780,6 @@ def test_lookml_ingest_stateful(pytestconfig, tmp_path, mock_time): assert checkpoint1 assert checkpoint1.state - pipeline_run2 = None - pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore - # Set the special properties for this run - pipeline_run2_config["source"]["config"]["emit_reachable_views_only"] = True - pipeline_run2_config["sink"]["config"][ - "filename" - ] = f"{tmp_path}/{output_file_deleted_name}" - pipeline_run2 = Pipeline.create(pipeline_run2_config) - pipeline_run2.run() - pipeline_run2.raise_from_status() - pipeline_run2.pretty_print_summary() - - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / output_file_deleted_name, - golden_path=f"{test_resources_dir}/{golden_file_deleted_name}", - ) - checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) - assert checkpoint2 - assert checkpoint2.state - - # Validate that all providers have committed successfully. - validate_all_providers_have_committed_successfully( - pipeline=pipeline_run1, expected_providers=1 - ) - validate_all_providers_have_committed_successfully( - pipeline=pipeline_run2, expected_providers=1 - ) - - # Perform all assertions on the states. The deleted table should not be - # part of the second state - state1 = cast(GenericCheckpointState, checkpoint1.state) - state2 = cast(GenericCheckpointState, checkpoint2.state) - - difference_dataset_urns = list( - state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) - ) - # the difference in dataset urns are all the views that are not reachable from the model file - assert len(difference_dataset_urns) == 11 - deleted_dataset_urns: List[str] = [ - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", - ] - assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns) - def test_lookml_base_folder(): fake_api = { diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_deleted_stateful_ingestion.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_deleted_stateful_ingestion.json new file mode 100644 index 0000000000000..9d6f755374462 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_deleted_stateful_ingestion.json @@ -0,0 +1,50 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion.json new file mode 100644 index 0000000000000..4a77651c93066 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion.json @@ -0,0 +1,50 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py new file mode 100644 index 0000000000000..1d67f18427ba1 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py @@ -0,0 +1,115 @@ +from typing import Any, Dict, List, cast + +from freezegun import freeze_time + +from datahub.ingestion.run.pipeline import Pipeline +from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState +from tests.test_helpers import mce_helpers +from tests.test_helpers.state_helpers import ( + get_current_checkpoint_from_pipeline, + validate_all_providers_have_committed_successfully, +) + +FROZEN_TIME = "2020-04-14 07:00:00" + + +@freeze_time(FROZEN_TIME) +def test_stateful_ingestion(pytestconfig, tmp_path, mock_time): + # test stateful ingestion using dummy source + output_file_name: str = "dummy_mces.json" + golden_file_name: str = "golden_test_stateful_ingestion.json" + output_file_deleted_name: str = "dummy_mces_deleted_stateful.json" + golden_file_deleted_name: str = "golden_test_deleted_stateful_ingestion.json" + + test_resources_dir = pytestconfig.rootpath / "tests/unit/stateful_ingestion/state" + + base_pipeline_config = { + "run_id": "dummy-test-stateful-ingestion", + "pipeline_name": "dummy_stateful", + "source": { + "type": "dummy", + "config": { + "stateful_ingestion": { + "enabled": True, + "remove_stale_metadata": True, + "state_provider": { + "type": "file", + "config": { + "filename": f"{tmp_path}/checkpoint_mces.json", + }, + }, + }, + }, + }, + "sink": { + "type": "file", + "config": {}, + }, + } + + pipeline_run1 = None + pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore + base_pipeline_config # type: ignore + ) + pipeline_run1_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_name}" + pipeline_run1 = Pipeline.create(pipeline_run1_config) + pipeline_run1.run() + pipeline_run1.raise_from_status() + pipeline_run1.pretty_print_summary() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_name, + golden_path=f"{test_resources_dir}/{golden_file_name}", + ) + + checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1) + assert checkpoint1 + assert checkpoint1.state + + pipeline_run2 = None + pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore + pipeline_run2_config["source"]["config"]["dataset_patterns"] = { + "allow": ["dummy_dataset1", "dummy_dataset2"], + } + pipeline_run2_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_deleted_name}" + pipeline_run2 = Pipeline.create(pipeline_run2_config) + pipeline_run2.run() + pipeline_run2.raise_from_status() + pipeline_run2.pretty_print_summary() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_deleted_name, + golden_path=f"{test_resources_dir}/{golden_file_deleted_name}", + ) + checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) + assert checkpoint2 + assert checkpoint2.state + + # Validate that all providers have committed successfully. + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run1, expected_providers=1 + ) + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run2, expected_providers=1 + ) + + # Perform all assertions on the states. The deleted table should not be + # part of the second state + state1 = cast(GenericCheckpointState, checkpoint1.state) + state2 = cast(GenericCheckpointState, checkpoint2.state) + + difference_dataset_urns = list( + state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) + ) + # the difference in dataset urns are all the views that are not reachable from the model file + assert len(difference_dataset_urns) == 1 + deleted_dataset_urns: List[str] = [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + ] + assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns)