Skip to content

Use new lakefs python SDK #284

Use new lakefs python SDK

Use new lakefs python SDK #284

Workflow file for this run

name: Provider
on:
pull_request:
push:
branches:
- main
jobs:
build-provider:
name: build latest provider
runs-on: ubuntu-latest
env:
KEY: "AKIAIOSFODNN7EXAMPLE"
SECRET: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
services:
lakefs:
image: treeverse/lakefs:latest
env:
LAKEFS_AUTH_ENCRYPT_SECRET_KEY: "some random secret string"
LAKEFS_DATABASE_TYPE: local
LAKEFS_BLOCKSTORE_TYPE: local
LAKEFS_GATEWAYS_S3_DOMAIN_NAME: s3.local.lakefs.io:8000
LAKEFS_LOGGING_LEVEL: TRACE
LAKEFS_STATS_ENABLED: false
LAKEFS_INSTALLATION_USER_NAME: docker
LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }}
LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }}
LAKECTL_SERVER_ENDPOINT_URL: http://localhost:8000
LAKECTL_CREDENTIALS_ACCESS_KEY_ID: $${ env.Key }}
LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: $${ env.SECRET }}
ports:
- 8000:8000
options: >-
--name lakefs
--health-cmd "curl --fail -LI http://localhost:8000/_health"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install requirements
run: pip install -r requirements.txt
- name: Build package
run: |
python3 -m pip install build
python3 -m build
- name: Install astro
run: curl -sSL https://install.astronomer.io | sudo bash -s -- 0.28.1
- name: Create astro dev env
run: mkdir astro && cd astro && astro dev init
- name: Copy files
run: |
cp -R ./dist/ astro/
cp ./lakefs_provider/example_dags/lakefs-dag.py astro/dags/
- name: Insert Dockerfile pip install
run: printf "\nRUN pip install --user dist/airflow_provider_lakefs-*-py3-none-any.whl" >> astro/Dockerfile
- name: Start astro
working-directory: astro
run: astro dev start
- name: Create test repo
run: |
curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' 'http://localhost:8000/api/v1/repositories'
- name: Run lakeFS DAG
working-directory: astro
run: |
astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://172.17.0.1:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}"
astro dev run dags unpause lakeFS_workflow
astro dev run dags trigger lakeFS_workflow
sleep 30
- name : Run DAG state check script
id : dag_status_id
run: python3 dag_status.py
- name: Wait until Airflow makes output file available on main
env:
# To avoid the lack of region - see https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
AWS_EC2_METADATA_DISABLED: true
uses: nick-invision/retry@v2
with:
timeout_minutes: 3
max_attempts: 30
command: AWS_ACCESS_KEY_ID=${{ env.KEY }} AWS_SECRET_ACCESS_KEY=${{ env.SECRET }} aws s3 cp --endpoint-url=http://s3.local.lakefs.io:8000 s3://example-repo/main/path/to/_SUCCESS -
- name: Wait symlink file creation and validate it
env:
AWS_EC2_METADATA_DISABLED: true
uses: nick-invision/retry@v2
with:
timeout_minutes: 3
max_attempts: 30
command: docker exec lakefs ls lakefs/data/block/data/symlinks/example-repo/example-branch/path/to/symlink.txt
- name: airflow scheduler logs
if: ${{ always() }}
working-directory: astro
run: astro dev logs --scheduler
- name: airflow triggerer logs
if: ${{ always() }}
working-directory: astro
run: astro dev logs --triggerer