From 63662044583031fc27d98af02f2913d324245db0 Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Wed, 17 Jul 2024 01:22:16 +0200 Subject: [PATCH] openlineage: add method to common.compat to not force hooks to try/except every 2.10 hook lineage call (#40812) Signed-off-by: Maciej Obuchowski --- .pre-commit-config.yaml | 9 ++ .../common/compat/lineage/__init__.py | 5 - .../providers/common/compat/lineage/hook.py | 41 +++++ airflow/providers/openlineage/sqlparser.py | 1 - contributing-docs/08_static_code_checks.rst | 2 + dev/breeze/doc/images/output-commands.svg | 42 ++--- .../doc/images/output_static-checks.svg | 144 +++++++++--------- .../doc/images/output_static-checks.txt | 2 +- .../src/airflow_breeze/pre_commit_ids.py | 1 + .../common/compat/lineage/__init__.py | 16 ++ .../common/compat/lineage/test_hook.py | 24 +++ 11 files changed, 189 insertions(+), 98 deletions(-) rename tests/providers/common/compat/test_empty.py => airflow/providers/common/compat/lineage/__init__.py (91%) create mode 100644 airflow/providers/common/compat/lineage/hook.py create mode 100644 tests/providers/common/compat/lineage/__init__.py create mode 100644 tests/providers/common/compat/lineage/test_hook.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09e8f95c4dde..0ff5abf906ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -675,6 +675,15 @@ repos: (?x) ^airflow/providers/.*\.py$ exclude: ^.*/.*_vendor/ + - id: check-get-lineage-collector-providers + language: pygrep + name: Check providers import hook lineage code from compat + description: Make sure you import from airflow.provider.common.compat.lineage.hook instead of + airflow.lineage.hook. + entry: "airflow\\.lineage\\.hook" + pass_filenames: true + files: ^airflow/providers/.*\.py$ + exclude: ^airflow/providers/common/compat/.*\.py$ - id: check-decorated-operator-implements-custom-name name: Check @task decorator implements custom_operator_name language: python diff --git a/tests/providers/common/compat/test_empty.py b/airflow/providers/common/compat/lineage/__init__.py similarity index 91% rename from tests/providers/common/compat/test_empty.py rename to airflow/providers/common/compat/lineage/__init__.py index f91891fb5509..13a83393a912 100644 --- a/tests/providers/common/compat/test_empty.py +++ b/airflow/providers/common/compat/lineage/__init__.py @@ -14,8 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from __future__ import annotations - - -def test_empty(): - assert True diff --git a/airflow/providers/common/compat/lineage/hook.py b/airflow/providers/common/compat/lineage/hook.py new file mode 100644 index 000000000000..2115c992e7a4 --- /dev/null +++ b/airflow/providers/common/compat/lineage/hook.py @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + + +def get_hook_lineage_collector(): + # HookLineageCollector added in 2.10 + try: + from airflow.lineage.hook import get_hook_lineage_collector + + return get_hook_lineage_collector() + except ImportError: + + class NoOpCollector: + """ + NoOpCollector is a hook lineage collector that does nothing. + + It is used when you want to disable lineage collection. + """ + + def add_input_dataset(self, *_): + pass + + def add_output_dataset(self, *_): + pass + + return NoOpCollector() diff --git a/airflow/providers/openlineage/sqlparser.py b/airflow/providers/openlineage/sqlparser.py index a76808fdd73f..9906f3db3cda 100644 --- a/airflow/providers/openlineage/sqlparser.py +++ b/airflow/providers/openlineage/sqlparser.py @@ -160,7 +160,6 @@ def parse_table_schemas( "database": database or database_info.database, "use_flat_cross_db_query": database_info.use_flat_cross_db_query, } - self.log.info("PRE getting schemas for input and output tables") return get_table_schemas( hook, namespace, diff --git a/contributing-docs/08_static_code_checks.rst b/contributing-docs/08_static_code_checks.rst index 125257f64db7..0b8e5e6cd453 100644 --- a/contributing-docs/08_static_code_checks.rst +++ b/contributing-docs/08_static_code_checks.rst @@ -174,6 +174,8 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-for-inclusive-language | Check for language that we do not accept as community | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| check-get-lineage-collector-providers | Check providers import hook lineage code from compat | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-google-re2-as-dependency | Check google-re2 is declared as dependency when needed | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-hatch-build-order | Check order of dependencies in hatch_build.py | | diff --git a/dev/breeze/doc/images/output-commands.svg b/dev/breeze/doc/images/output-commands.svg index 08d3dc2a13ee..5888d1fc862e 100644 --- a/dev/breeze/doc/images/output-commands.svg +++ b/dev/breeze/doc/images/output-commands.svg @@ -298,53 +298,53 @@ Usage:breeze[OPTIONSCOMMAND [ARGS]... ╭─ Execution mode ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. +--python-pPython major/minor version used in Airflow image for images. (>3.8< | 3.9 | 3.10 | 3.11 | 3.12)                           [default: 3.8]                                               ---integrationIntegration(s) to enable when running (can be more than one).                        +--integrationIntegration(s) to enable when running (can be more than one).                        (all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql  | openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)                ---standalone-dag-processorRun standalone dag processor for start-airflow. ---database-isolationRun airflow in database isolation mode. +--standalone-dag-processorRun standalone dag processor for start-airflow. +--database-isolationRun airflow in database isolation mode. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Docker Compose selection and cleanup ───────────────────────────────────────────────────────────────────────────────╮ ---project-nameName of the docker-compose project to bring down. The `docker-compose` is for legacy breeze        -project name and you can use `breeze down --project-name docker-compose` to stop all containers    +--project-nameName of the docker-compose project to bring down. The `docker-compose` is for legacy breeze        +project name and you can use `breeze down --project-name docker-compose` to stop all containers    belonging to it.                                                                                   (breeze | pre-commit | docker-compose)                                                             [default: breeze]                                                                                  ---docker-hostOptional - docker host to use when running docker commands. When set, the `--builder` option is    +--docker-hostOptional - docker host to use when running docker commands. When set, the `--builder` option is    ignored when building images.                                                                      (TEXT)                                                                                             ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Database ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database     +--backend-bDatabase backend to use. If 'none' is chosen, Breeze will start with an invalid database     configuration, meaning there will be no database available, and any attempts to connect to   the Airflow database will fail.                                                              (>sqlite< | mysql | postgres | none)                                                         [default: sqlite]                                                                            ---postgres-version-PVersion of Postgres used.(>12< | 13 | 14 | 15 | 16)[default: 12] ---mysql-version-MVersion of MySQL used.(>8.0< | 8.4)[default: 8.0] ---db-reset-dReset DB when entering the container. +--postgres-version-PVersion of Postgres used.(>12< | 13 | 14 | 15 | 16)[default: 12] +--mysql-version-MVersion of MySQL used.(>8.0< | 8.4)[default: 8.0] +--db-reset-dReset DB when entering the container. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Build CI image (before entering shell) ─────────────────────────────────────────────────────────────────────────────╮ ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) [default: autodetect]                                          ---use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv] ---uv-http-timeoutTimeout for requests that UV makes (only used in case of UV builds).(INTEGER RANGE) +--use-uv/--no-use-uvUse uv instead of pip as packaging tool to build the image.[default: use-uv] +--uv-http-timeoutTimeout for requests that UV makes (only used in case of UV builds).(INTEGER RANGE) [default: 300; x>=1]                                                 ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Other options ──────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---forward-credentials-fForward local credentials to container when running. ---max-timeMaximum time that the command should take - if it takes longer, the command will fail. +--forward-credentials-fForward local credentials to container when running. +--max-timeMaximum time that the command should take - if it takes longer, the command will fail. (INTEGER RANGE)                                                                        ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---verbose-vPrint verbose information about performed steps. ---help-hShow this message and exit. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Developer commands ─────────────────────────────────────────────────────────────────────────────────────────────────╮ start-airflow          Enter breeze environment and starts all Airflow components in the tmux session. Compile     diff --git a/dev/breeze/doc/images/output_static-checks.svg b/dev/breeze/doc/images/output_static-checks.svg index e0e4c7a29e51..32d75da56fa7 100644 --- a/dev/breeze/doc/images/output_static-checks.svg +++ b/dev/breeze/doc/images/output_static-checks.svg @@ -1,4 +1,4 @@ - + Run static checks. ╭─ Pre-commit flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---type-tType(s) of the static checks to run.                                              +--type-tType(s) of the static checks to run.                                              (all | bandit | blacken-docs | check-aiobotocore-optional |                       check-airflow-k8s-not-used | check-airflow-provider-compatibility |               check-airflow-providers-bug-report-template | check-apache-license-rat |          @@ -332,72 +335,73 @@ | check-docstring-param-types | check-example-dags-urls |                         check-executables-have-shebangs | check-extra-packages-references |               check-extras-order | check-fab-migrations | check-for-inclusive-language |        -check-google-re2-as-dependency | check-hatch-build-order | check-hooks-apply |    -check-incorrect-use-of-LoggingMixin | check-init-decorator-arguments |            -check-integrations-list-consistent | check-lazy-logging |                         -check-links-to-example-dags-do-not-use-hardcoded-versions | check-merge-conflict  -| check-newsfragments-are-valid | check-no-airflow-deprecation-in-providers |     -check-no-providers-in-core-examples | check-only-new-session-with-provide-session -| check-persist-credentials-disabled-in-github-workflows |                        -check-pre-commit-information-consistent | check-provide-create-sessions-imports | -check-provider-docs-valid | check-provider-yaml-valid |                           -check-providers-init-file-missing | check-providers-subpackages-init-file-exist | -check-pydevd-left-in-code | check-revision-heads-map |                            -check-safe-filter-usage-in-html | check-sql-dependency-common-data-structure |    -check-start-date-not-used-in-defaults | check-system-tests-present |              -check-system-tests-tocs | check-template-context-variable-in-sync |               -check-tests-in-the-right-folders | check-tests-unittest-testcase |                -check-urlparse-usage-in-code | check-usage-of-re2-over-re | check-xml | codespell -| compile-www-assets | compile-www-assets-dev |                                   -create-missing-init-py-files-tests | debug-statements | detect-private-key |      -doctoc | end-of-file-fixer | fix-encoding-pragma | flynt |                        -generate-airflow-diagrams | generate-pypi-readme | identity | insert-license |    -kubeconform | lint-chart-schema | lint-css | lint-dockerfile | lint-helm-chart |  -lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending |             -mypy-airflow | mypy-dev | mypy-docs | mypy-providers | pretty-format-json |       -pylint | python-no-log-warn | replace-bad-characters | rst-backticks | ruff |     -ruff-format | shellcheck | trailing-whitespace | ts-compile-format-lint-www |     -update-black-version | update-breeze-cmd-output |                                 -update-breeze-readme-config-hash | update-build-dependencies |                    -update-chart-dependencies | update-common-sql-api-stubs | update-er-diagram |     -update-extras | update-in-the-wild-to-be-sorted |                                 -update-inlined-dockerfile-scripts | update-installed-providers-to-be-sorted |     -update-installers | update-local-yml-file | update-migration-references |         -update-openapi-spec-tags-to-be-sorted | update-providers-dependencies |           -update-reproducible-source-date-epoch | update-spelling-wordlist-to-be-sorted |   -update-supported-versions | update-vendored-in-k8s-json-schema | update-version | -validate-operators-init | yamllint)                                               ---show-diff-on-failure-sShow diff for files modified by the checks. ---initialize-environmentInitialize environment before running checks. ---max-initialization-attemptsMaximum number of attempts to initialize environment before giving up. -(INTEGER RANGE)                                                        -[default: 3; 1<=x<=10]                                                 -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Selecting files to run the checks on ───────────────────────────────────────────────────────────────────────────────╮ ---file-fList of files to run the checks on.(PATH) ---all-files-aRun checks on all files. ---commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually     -exclusive with --last-commit.                                                                 -(TEXT)                                                                                        ---last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref. ---only-my-changes-mRun checks for commits belonging to my PR only: for all commits between merge base to `main`  -branch and HEAD of your branch.                                                               -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Building image before running checks ───────────────────────────────────────────────────────────────────────────────╮ ---skip-image-upgrade-checkSkip checking if the CI image is up to date. ---force-buildForce image build no matter if it is determined as needed. ---image-tagTag of the image which is used to run the image (implies --mount-sources=skip). -(TEXT)                                                                          -[default: latest]                                                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) -[default: autodetect]                                          -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---verbose-vPrint verbose information about performed steps. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +check-get-lineage-collector-providers | check-google-re2-as-dependency |          +check-hatch-build-order | check-hooks-apply | check-incorrect-use-of-LoggingMixin +| check-init-decorator-arguments | check-integrations-list-consistent |           +check-lazy-logging | check-links-to-example-dags-do-not-use-hardcoded-versions |  +check-merge-conflict | check-newsfragments-are-valid |                            +check-no-airflow-deprecation-in-providers | check-no-providers-in-core-examples | +check-only-new-session-with-provide-session |                                     +check-persist-credentials-disabled-in-github-workflows |                          +check-pre-commit-information-consistent | check-provide-create-sessions-imports | +check-provider-docs-valid | check-provider-yaml-valid |                           +check-providers-init-file-missing | check-providers-subpackages-init-file-exist | +check-pydevd-left-in-code | check-revision-heads-map |                            +check-safe-filter-usage-in-html | check-sql-dependency-common-data-structure |    +check-start-date-not-used-in-defaults | check-system-tests-present |              +check-system-tests-tocs | check-template-context-variable-in-sync |               +check-tests-in-the-right-folders | check-tests-unittest-testcase |                +check-urlparse-usage-in-code | check-usage-of-re2-over-re | check-xml | codespell +| compile-www-assets | compile-www-assets-dev |                                   +create-missing-init-py-files-tests | debug-statements | detect-private-key |      +doctoc | end-of-file-fixer | fix-encoding-pragma | flynt |                        +generate-airflow-diagrams | generate-pypi-readme | identity | insert-license |    +kubeconform | lint-chart-schema | lint-css | lint-dockerfile | lint-helm-chart |  +lint-json-schema | lint-markdown | lint-openapi | mixed-line-ending |             +mypy-airflow | mypy-dev | mypy-docs | mypy-providers | pretty-format-json |       +pylint | python-no-log-warn | replace-bad-characters | rst-backticks | ruff |     +ruff-format | shellcheck | trailing-whitespace | ts-compile-format-lint-www |     +update-black-version | update-breeze-cmd-output |                                 +update-breeze-readme-config-hash | update-build-dependencies |                    +update-chart-dependencies | update-common-sql-api-stubs | update-er-diagram |     +update-extras | update-in-the-wild-to-be-sorted |                                 +update-inlined-dockerfile-scripts | update-installed-providers-to-be-sorted |     +update-installers | update-local-yml-file | update-migration-references |         +update-openapi-spec-tags-to-be-sorted | update-providers-dependencies |           +update-reproducible-source-date-epoch | update-spelling-wordlist-to-be-sorted |   +update-supported-versions | update-vendored-in-k8s-json-schema | update-version | +validate-operators-init | yamllint)                                               +--show-diff-on-failure-sShow diff for files modified by the checks. +--initialize-environmentInitialize environment before running checks. +--max-initialization-attemptsMaximum number of attempts to initialize environment before giving up. +(INTEGER RANGE)                                                        +[default: 3; 1<=x<=10]                                                 +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Selecting files to run the checks on ───────────────────────────────────────────────────────────────────────────────╮ +--file-fList of files to run the checks on.(PATH) +--all-files-aRun checks on all files. +--commit-ref-rRun checks for this commit reference only (can be any git commit-ish reference). Mutually     +exclusive with --last-commit.                                                                 +(TEXT)                                                                                        +--last-commit-cRun checks for all files in last commit. Mutually exclusive with --commit-ref. +--only-my-changes-mRun checks for commits belonging to my PR only: for all commits between merge base to `main`  +branch and HEAD of your branch.                                                               +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Building image before running checks ───────────────────────────────────────────────────────────────────────────────╮ +--skip-image-upgrade-checkSkip checking if the CI image is up to date. +--force-buildForce image build no matter if it is determined as needed. +--image-tagTag of the image which is used to run the image (implies --mount-sources=skip). +(TEXT)                                                                          +[default: latest]                                                               +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +[default: autodetect]                                          +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_static-checks.txt b/dev/breeze/doc/images/output_static-checks.txt index 0a484fbe70f6..cd2dce871a4b 100644 --- a/dev/breeze/doc/images/output_static-checks.txt +++ b/dev/breeze/doc/images/output_static-checks.txt @@ -1 +1 @@ -7b121e6337aeb2242ab88b8f51ae1907 +9381c6120248c8e22bd10d9f882ef667 diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index b31340be7858..4d699a02de87 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -52,6 +52,7 @@ "check-extras-order", "check-fab-migrations", "check-for-inclusive-language", + "check-get-lineage-collector-providers", "check-google-re2-as-dependency", "check-hatch-build-order", "check-hooks-apply", diff --git a/tests/providers/common/compat/lineage/__init__.py b/tests/providers/common/compat/lineage/__init__.py new file mode 100644 index 000000000000..13a83393a912 --- /dev/null +++ b/tests/providers/common/compat/lineage/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/common/compat/lineage/test_hook.py b/tests/providers/common/compat/lineage/test_hook.py new file mode 100644 index 000000000000..1fd88405bc13 --- /dev/null +++ b/tests/providers/common/compat/lineage/test_hook.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector + + +def test_that_compat_does_not_raise(): + # On compat tests this goes into ImportError code path + assert get_hook_lineage_collector() is not None