From 12950dd03b8ffea7bfe206dea40a74dab8694593 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 4 Nov 2024 13:01:38 +0100 Subject: [PATCH] Enable back iterative development of latest providers with old airflows (#43617) * Enable back iterative development of latest providers with old airflows The compatibility tests in CI are using providers built as packages from sources, so the compatibility tests run there using "providers/tests" work just fine, because all providers are installed in the airflow.providers site library. However when we are iterating and debugging backwards compatiblity provider tests, we should be able to use local provider sources, rather than installed packages and we have the possibility of mounting both - providers sources and tests to the image. See `contributing-docs/testing/unit_tests.rst` on how to do it by using ``--mount-sources providers-and-tests`` flag connected with `--use-airflow-version`. However as of #42505 this has been broken, because currently in main we rely on airflow having "pkgutil" namespace package for both - airflow, and airflow.providers packages (previous airflow versions had implicit package for airflow.providers package) - so providers installed locally cannot be used as "another" source of providers. Previously it was working because both "installed" and "sources" `airflow.providers` package were implicit namespace packages. As explained in https://packaging.python.org/en/latest/guides/packaging-namespace-packages/#native-namespace-packages > Every distribution that uses the namespace package must include such > an `__init__.py`. If any distribution does not, it will cause the > namespace logic to fail and the other sub-packages will not be > importable. Any additional code in __init__.py will be inaccessible. So because old airflow uses implicit provider's packages and main airflow from source uses "explicit" provider's package, the only way we can make the "source" providers is to mount them or symbolically link them to inside installed distribution of airflow package (in site directory) (or dynamically remove the __init__.py from provider's source directory. We cannot mount the provider package sources ot inside the installed airflow - because when --use-airflow-version is used, airflow is installed dynamically inside the container - after the container is started. This PR solves the problem by adding an env variable that will make the initialization script to remove the installed airflow.providers folder after installing airflow and linking the "providers/src/airflow/providers" folder there. This has the added benefit that all providers (including the preinstalled ones) are used from "main" sources rather than from installed packages - which was problematic for the past way of using providers from sources - which used the fact that both "airflow.providers" in the site-library and the one in sources were implicit namespace packages. * Update Dockerfile.ci Co-authored-by: GPK * Update scripts/docker/entrypoint_ci.sh Co-authored-by: GPK --------- Co-authored-by: GPK --- Dockerfile.ci | 7 +++++++ scripts/ci/docker-compose/providers-and-tests-sources.yml | 5 +++-- scripts/docker/entrypoint_ci.sh | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Dockerfile.ci b/Dockerfile.ci index 666344e28525..6ddf2f4e1ac4 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -1017,6 +1017,13 @@ function determine_airflow_to_use() { --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt # Some packages might leave legacy typing module which causes test issues pip uninstall -y typing || true + if [[ ${LINK_PROVIDERS_TO_AIRFLOW_PACKAGE=} == "true" ]]; then + echo + echo "${COLOR_BLUE}Linking providers to airflow package as we are using them from mounted sources.${COLOR_RESET}" + echo + rm -rf /usr/local/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages/airflow/providers + ln -s "${AIRFLOW_SOURCES}/providers/src/airflow/providers" "/usr/local/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages/airflow/providers" + fi fi if [[ "${USE_AIRFLOW_VERSION}" =~ ^2\.2\..*|^2\.1\..*|^2\.0\..* && "${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=}" != "" ]]; then diff --git a/scripts/ci/docker-compose/providers-and-tests-sources.yml b/scripts/ci/docker-compose/providers-and-tests-sources.yml index 29498ae27eb9..8a06f2fcc0d1 100644 --- a/scripts/ci/docker-compose/providers-and-tests-sources.yml +++ b/scripts/ci/docker-compose/providers-and-tests-sources.yml @@ -21,6 +21,7 @@ services: tty: true # docker run -t environment: - AIRFLOW__CORE__PLUGINS_FOLDER=/files/plugins + - LINK_PROVIDERS_TO_AIRFLOW_PACKAGE=true # We only mount tests folder volumes: - ../../../.bash_aliases:/root/.bash_aliases:cached @@ -30,8 +31,8 @@ services: - ../../../empty:/opt/airflow/airflow # but keep tests - ../../../tests/:/opt/airflow/tests:cached - # and providers - - ../../../providers/src/airflow/providers:/opt/airflow/airflow/providers:cached + # Mount providers to make sure that we have the latest providers - both tests and sources + - ../../../providers/:/opt/airflow/providers:cached # and entrypoint and in_container scripts for testing - ../../../scripts/docker/entrypoint_ci.sh:/entrypoint - ../../../scripts/in_container/:/opt/airflow/scripts/in_container diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh index cbd7bdce141e..8e864ba83121 100755 --- a/scripts/docker/entrypoint_ci.sh +++ b/scripts/docker/entrypoint_ci.sh @@ -236,6 +236,13 @@ function determine_airflow_to_use() { --constraint https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt # Some packages might leave legacy typing module which causes test issues pip uninstall -y typing || true + if [[ ${LINK_PROVIDERS_TO_AIRFLOW_PACKAGE=} == "true" ]]; then + echo + echo "${COLOR_BLUE}Linking providers to airflow package as we are using them from mounted sources.${COLOR_RESET}" + echo + rm -rf /usr/local/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages/airflow/providers + ln -s "${AIRFLOW_SOURCES}/providers/src/airflow/providers" "/usr/local/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages/airflow/providers" + fi fi if [[ "${USE_AIRFLOW_VERSION}" =~ ^2\.2\..*|^2\.1\..*|^2\.0\..* && "${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=}" != "" ]]; then