Skip to content

Commit

Permalink
fix(spark-test): upgrade gradle and fix spark smoke test (#8777)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Sep 8, 2023
1 parent 794eb03 commit 68ae3bf
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 140 deletions.
10 changes: 7 additions & 3 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ jobs:
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
publish: ${{ steps.publish.outputs.publish }}
python_release_version: ${{ steps.tag.outputs.python_release_version }}
steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -58,6 +59,7 @@ jobs:
echo "unique_tag=$(get_unique_tag)" >> $GITHUB_OUTPUT
echo "unique_slim_tag=$(get_unique_tag)-slim" >> $GITHUB_OUTPUT
echo "unique_full_tag=$(get_unique_tag)-full" >> $GITHUB_OUTPUT
echo "python_release_version=$(get_python_docker_release_v)" >> $GITHUB_OUTPUT
- name: Check whether publishing enabled
id: publish
env:
Expand Down Expand Up @@ -573,7 +575,7 @@ jobs:
with:
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
- name: Build and push Slim Image
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
uses: ./.github/actions/docker-custom-build-and-push
with:
target: final
Expand All @@ -582,6 +584,7 @@ jobs:
build-args: |
BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_slim_tag || 'head' }}
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
APP_ENV=slim
tags: ${{ needs.setup.outputs.slim_tag }}
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
Expand Down Expand Up @@ -655,7 +658,7 @@ jobs:
with:
image: ${{ env.DATAHUB_INGESTION_BASE_IMAGE }}:${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
- name: Build and push Full Image
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' }}
if: ${{ steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true' || needs.setup.outputs.publish }}
uses: ./.github/actions/docker-custom-build-and-push
with:
target: final
Expand All @@ -664,6 +667,7 @@ jobs:
build-args: |
BASE_IMAGE=${{ env.DATAHUB_INGESTION_BASE_IMAGE }}
DOCKER_VERSION=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}
RELEASE_VERSION=${{ needs.setup.outputs.python_release_version }}
tags: ${{ needs.setup.outputs.unique_full_tag }}
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
Expand All @@ -673,7 +677,7 @@ jobs:
platforms: linux/amd64,linux/arm64/v8
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
run: echo "tag=${{ (steps.filter.outputs.datahub-ingestion-base == 'true' || steps.filter.outputs.datahub-ingestion == 'true') && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT
datahub_ingestion_full_scan:
permissions:
contents: read # for actions/checkout to fetch code
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/spark-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 800
fetch-tags: true
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
Expand Down
6 changes: 5 additions & 1 deletion docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0

COPY ./metadata-ingestion /datahub-ingestion
COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plugin

ARG RELEASE_VERSION
WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
cat src/datahub/__init__.py && \
chown -R datahub /datahub-ingestion

Expand All @@ -21,7 +23,9 @@ FROM base as slim-install
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"

FROM base as full-install
RUN pip install --no-cache --user ".[all]"
RUN pip install --no-cache --user ".[base]" && \
pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \
pip install --no-cache --user ".[all]"

FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
Expand Down
25 changes: 25 additions & 0 deletions docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Defining environment
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=latest

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0

COPY ./metadata-ingestion /datahub-ingestion

ARG RELEASE_VERSION
WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
cat src/datahub/__init__.py && \
chown -R datahub /datahub-ingestion

USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"

FROM base as slim-install
RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]"

FROM slim-install as final

USER datahub
ENV PATH="/datahub-ingestion/.local/bin:$PATH"
9 changes: 5 additions & 4 deletions docker/datahub-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,19 @@ dependencies {
docker {
name "${docker_registry}/${docker_repo}:v${version}-slim"
version "v${version}-slim"
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile")
dockerfile file("${rootProject.projectDir}/docker/${docker_dir}/Dockerfile-slim-only")
files fileTree(rootProject.projectDir) {
include "docker/${docker_dir}/*"
include "metadata-ingestion/**"
include "metadata-ingestion-modules/**"
}.exclude {
i -> i.file.isHidden() ||
i.file == buildDir ||
i.file == project(':metadata-ingestion').buildDir
i.file == project(':metadata-ingestion').buildDir ||
i.file == project(':metadata-ingestion-modules').buildDir
}
buildArgs([DOCKER_VERSION: version,
RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', ''),
APP_ENV: 'slim'])
RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace('-slim', '')])
}
tasks.getByName('docker').dependsOn(['build',
':docker:datahub-ingestion-base:docker',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.9.2-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.6.2-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Loading

0 comments on commit 68ae3bf

Please sign in to comment.