From 5094dabdf4b529841a6e6ee4551a26bf51b8d37e Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:08:15 +0530 Subject: [PATCH] feat(ingest/gx): support gx version 0.18.0 (#11823) --- .github/workflows/gx-plugin.yml | 2 ++ metadata-ingestion-modules/gx-plugin/setup.py | 2 +- .../gx-plugin/src/datahub_gx_plugin/action.py | 20 ++++++++++++++++++- .../unit/test_great_expectations_action.py | 12 +++++------ 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index 99121f81099f2..aa7c3f069c765 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -39,6 +39,8 @@ jobs: extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0" - python-version: "3.11" extraPythonRequirement: "great-expectations~=0.17.0" + - python-version: "3.11" + extraPythonRequirement: "great-expectations~=0.18.0" fail-fast: false steps: - name: Set up JDK 17 diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py index 48b9d2d170d8c..e87bbded96584 100644 --- a/metadata-ingestion-modules/gx-plugin/setup.py +++ b/metadata-ingestion-modules/gx-plugin/setup.py @@ -38,7 +38,7 @@ def get_long_description(): # GE added handling for higher version of jinja2 in version 0.15.12 # https://github.com/great-expectations/great_expectations/pull/5382/files # TODO: support GX 0.18.0 - "great-expectations>=0.15.12, <0.18.0", + "great-expectations>=0.15.12, <1.0.0", # datahub does not depend on traitlets directly but great expectations does. # https://github.com/ipython/traitlets/issues/741 "traitlets<5.2.2", diff --git a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py index 76e43cf8c2c3d..1cb736bb1ba83 100644 --- a/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py +++ b/metadata-ingestion-modules/gx-plugin/src/datahub_gx_plugin/action.py @@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import datahub.emitter.mce_builder as builder +import packaging.version from datahub.cli.env_utils import get_boolean_env_variable from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter @@ -59,6 +60,16 @@ from sqlalchemy.engine.base import Connection, Engine from sqlalchemy.engine.url import make_url +# TODO: move this and version check used in tests to some common module +try: + from great_expectations import __version__ as GX_VERSION # type: ignore + + has_name_positional_arg = packaging.version.parse( + GX_VERSION + ) >= packaging.version.Version("0.18.0") +except Exception: + has_name_positional_arg = False + if TYPE_CHECKING: from great_expectations.data_context.types.resource_identifiers import ( GXCloudIdentifier, @@ -78,6 +89,8 @@ class DataHubValidationAction(ValidationAction): def __init__( self, data_context: AbstractDataContext, + # this would capture `name` positional arg added in GX 0.18.0 + *args: Union[str, Any], server_url: str, env: str = builder.DEFAULT_ENV, platform_alias: Optional[str] = None, @@ -94,7 +107,12 @@ def __init__( name: str = "DataHubValidationAction", ): - super().__init__(data_context) + if has_name_positional_arg: + if len(args) >= 1 and isinstance(args[0], str): + name = args[0] + super().__init__(data_context, name) + else: + super().__init__(data_context) self.server_url = server_url self.env = env self.platform_alias = platform_alias diff --git a/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py b/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py index c870a4449abea..fcad17fa4b71d 100644 --- a/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py +++ b/metadata-ingestion-modules/gx-plugin/tests/unit/test_great_expectations_action.py @@ -30,7 +30,7 @@ ) from great_expectations.core.id_dict import IDDict from great_expectations.core.run_identifier import RunIdentifier -from great_expectations.data_context import DataContext, FileDataContext +from great_expectations.data_context import FileDataContext from great_expectations.data_context.types.resource_identifiers import ( ExpectationSuiteIdentifier, ValidationResultIdentifier, @@ -52,7 +52,7 @@ @pytest.fixture(scope="function") -def ge_data_context(tmp_path: str) -> DataContext: +def ge_data_context(tmp_path: str) -> FileDataContext: return FileDataContext.create(tmp_path) @@ -233,7 +233,7 @@ def ge_validation_result_suite_id_pandas() -> ValidationResultIdentifier: @mock.patch("datahub.emitter.rest_emitter.DatahubRestEmitter.emit_mcp", autospec=True) def test_DataHubValidationAction_sqlalchemy( mock_emitter: mock.MagicMock, - ge_data_context: DataContext, + ge_data_context: FileDataContext, ge_validator_sqlalchemy: Validator, ge_validation_result_suite: ExpectationSuiteValidationResult, ge_validation_result_suite_id: ValidationResultIdentifier, @@ -337,7 +337,7 @@ def test_DataHubValidationAction_sqlalchemy( @mock.patch("datahub.emitter.rest_emitter.DatahubRestEmitter.emit_mcp", autospec=True) def test_DataHubValidationAction_pandas( mock_emitter: mock.MagicMock, - ge_data_context: DataContext, + ge_data_context: FileDataContext, ge_validator_pandas: Validator, ge_validation_result_suite_pandas: ExpectationSuiteValidationResult, ge_validation_result_suite_id_pandas: ValidationResultIdentifier, @@ -399,7 +399,7 @@ def test_DataHubValidationAction_pandas( def test_DataHubValidationAction_graceful_failure( - ge_data_context: DataContext, + ge_data_context: FileDataContext, ge_validator_sqlalchemy: Validator, ge_validation_result_suite: ExpectationSuiteValidationResult, ge_validation_result_suite_id: ValidationResultIdentifier, @@ -418,7 +418,7 @@ def test_DataHubValidationAction_graceful_failure( def test_DataHubValidationAction_not_supported( - ge_data_context: DataContext, + ge_data_context: FileDataContext, ge_validator_spark: Validator, ge_validation_result_suite: ExpectationSuiteValidationResult, ge_validation_result_suite_id: ValidationResultIdentifier,