From 3e2a2575f2ebfaa313479c8e5016cd9dd9fcace3 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 08:48:09 +0300 Subject: [PATCH 01/12] Use github services instead of docker compose - setup lakefs using env - no need to maintain another docker-compose for lakefs - use curl basic auth (consider replace with lakectl) --- .github/workflows/provider.yaml | 46 +++++++++++++++++---------------- ops/docker-compose.yaml | 24 ----------------- 2 files changed, 24 insertions(+), 46 deletions(-) delete mode 100644 ops/docker-compose.yaml diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 1e30c3c..863d206 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -12,6 +12,21 @@ jobs: env: KEY: "AKIAIOSFODNN7EXAMPLE" SECRET: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + + services: + lakefs: + image: treeverse/lakefs:latest + env: + LAKEFS_AUTH_ENCRYPT_SECRET_KEY: "some random secret string" + LAKEFS_DATABASE_TYPE: local + LAKEFS_BLOCKSTORE_TYPE: local + LAKEFS_GATEWAYS_S3_DOMAIN_NAME: s3.local.lakefs.io:8000 + LAKEFS_LOGGING_LEVEL: TRACE + LAKEFS_STATS_ENABLED: false + LAKEFS_INSTALLATION_USER_NAME: docker + LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }} + LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }} + steps: - name: Checkout uses: actions/checkout@v4 @@ -41,24 +56,15 @@ jobs: run: printf "\nRUN pip install --user dist/airflow_provider_lakefs-*-py3-none-any.whl" >> astro/Dockerfile - name: Start astro - run: | - cd astro && astro dev start - - - name: spin up lakeFS - run: docker-compose -f ops/docker-compose.yaml up --quiet-pull -d && sleep 30 - - - name: Setup lakeFS - run: | - curl localhost:8000/api/v1/setup_lakefs -H "Content-Type: application/json" --request POST --data '{"username":"test","key":{"access_key_id":"${{ env.KEY }}","secret_access_key":"${{ env.SECRET }}"}}' + working-directory: astro + run: astro dev start - name: Create test repo - run: | - export BASIC_AUTH=$(echo -n "${{ env.KEY }}:${{ env.SECRET }}" | base64) - curl localhost:8000/api/v1/repositories -H "Content-Type: application/json" -H "Authorization: Basic $(echo $BASIC_AUTH | tr -d ' ')" --request POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' + run: curl -u "${{ env.KEY }}:${{ env.SECRET }}" http://localhost:8000/api/v1/repositories -H "Content-Type: application/json" --request POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' - name: Run lakeFS DAG + working-directory: astro run: | - cd astro astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://172.17.0.1:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}" astro dev run dags unpause lakeFS_workflow astro dev run dags trigger lakeFS_workflow @@ -66,9 +72,7 @@ jobs: - name : Run DAG state check script id : dag_status_id - run: | - chmod +x dag_status.py - python3 dag_status.py + run: python3 dag_status.py - name: Wait until Airflow makes output file available on main env: @@ -95,12 +99,10 @@ jobs: - name: airflow scheduler logs if: ${{ always() }} - run: | - cd astro - astro dev logs --scheduler + working-directory: astro + run: astro dev logs --scheduler - name: airflow triggerer logs if: ${{ always() }} - run: | - cd astro - astro dev logs --triggerer + working-directory: astro + run: astro dev logs --triggerer diff --git a/ops/docker-compose.yaml b/ops/docker-compose.yaml deleted file mode 100644 index bd47412..0000000 --- a/ops/docker-compose.yaml +++ /dev/null @@ -1,24 +0,0 @@ - -version: '3' -services: - lakefs: - image: "treeverse/lakefs:latest" - ports: - - "8000:8000" - environment: - - LAKEFS_AUTH_ENCRYPT_SECRET_KEY="some random secret string" - - LAKEFS_DATABASE_TYPE=local - - LAKEFS_BLOCKSTORE_TYPE=local - - LAKEFS_GATEWAYS_S3_DOMAIN_NAME=s3.local.lakefs.io:8000 - - LAKEFS_LOGGING_LEVEL=TRACE - - LAKEFS_STATS_ENABLED=false - - LAKECTL_SERVER_ENDPOINT_URL=http://localhost:8000 - - LAKECTL_CREDENTIALS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE - - LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - entrypoint: ["/bin/sh", "-c"] - command: - - | - lakefs setup --local-settings --user-name docker --access-key-id AKIAIOSFODNN7EXAMPLE --secret-access-key wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY || true - lakefs run --local-settings & - wait-for -t 60 lakefs:8000 -- lakectl repo create lakefs://example s3://example || true - wait From 1546839658a3aff8e961577dbbb96ec24b4a7225 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:08:44 +0300 Subject: [PATCH 02/12] use lakectl to create repository --- .github/workflows/provider.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 863d206..e2a1c6e 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -26,6 +26,12 @@ jobs: LAKEFS_INSTALLATION_USER_NAME: docker LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }} LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }} + LAKEFS_INSTALLATION_USER_NAME: docker + LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }} + LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }} + LAKECTL_SERVER_ENDPOINT_URL: http://localhost:8000 + LAKECTL_CREDENTIALS_ACCESS_KEY_ID: $${ env.Key }} + LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: $${ env.SECRET }} steps: - name: Checkout @@ -60,7 +66,7 @@ jobs: run: astro dev start - name: Create test repo - run: curl -u "${{ env.KEY }}:${{ env.SECRET }}" http://localhost:8000/api/v1/repositories -H "Content-Type: application/json" --request POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' + run: docker exec lakectl repo create example-repo local://data/example-repo - name: Run lakeFS DAG working-directory: astro From fdc1d259450794aa7ac1f4e26e66c0eabd0f6f28 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:22:00 +0300 Subject: [PATCH 03/12] remove dups --- .github/workflows/provider.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index e2a1c6e..7cc955f 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -26,9 +26,6 @@ jobs: LAKEFS_INSTALLATION_USER_NAME: docker LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }} LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }} - LAKEFS_INSTALLATION_USER_NAME: docker - LAKEFS_INSTALLATION_ACCESS_KEY_ID: ${{ env.KEY }} - LAKEFS_INSTALLATION_SECRET_ACCESS_KEY: ${{ env.SECRET }} LAKECTL_SERVER_ENDPOINT_URL: http://localhost:8000 LAKECTL_CREDENTIALS_ACCESS_KEY_ID: $${ env.Key }} LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: $${ env.SECRET }} From 7895d86ebcd662dfbbcc2a232e355be45ec198a2 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:26:20 +0300 Subject: [PATCH 04/12] specify the container name --- .github/workflows/provider.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 7cc955f..8ef3d2b 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -63,7 +63,7 @@ jobs: run: astro dev start - name: Create test repo - run: docker exec lakectl repo create example-repo local://data/example-repo + run: docker exec lakefs lakectl repo create example-repo local://data/example-repo - name: Run lakeFS DAG working-directory: astro From 671ccba07743a0e8dc45fc4ce659fe3bcdb21e70 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:37:19 +0300 Subject: [PATCH 05/12] wait for lakefs and back to use curl --- .github/workflows/provider.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 8ef3d2b..9f0cbef 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -29,7 +29,13 @@ jobs: LAKECTL_SERVER_ENDPOINT_URL: http://localhost:8000 LAKECTL_CREDENTIALS_ACCESS_KEY_ID: $${ env.Key }} LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: $${ env.SECRET }} - + ports: + 8000:8000 + options: >- + --health-cmd curl --head --retry 5 --retry-connrefused --retry-delay 1 http://localhost:8000/_health + --health-interval 10s + --health-timeout 5s + --health-retries 5 steps: - name: Checkout uses: actions/checkout@v4 @@ -63,7 +69,7 @@ jobs: run: astro dev start - name: Create test repo - run: docker exec lakefs lakectl repo create example-repo local://data/example-repo + run: curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' http://localhost:8000/api/v1/repositories - name: Run lakeFS DAG working-directory: astro From 242467edfdc62afe7e54321edde3d5d6c40e7659 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:42:30 +0300 Subject: [PATCH 06/12] fix yaml syntax --- .github/workflows/provider.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 9f0cbef..c3f77f7 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -69,7 +69,8 @@ jobs: run: astro dev start - name: Create test repo - run: curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' http://localhost:8000/api/v1/repositories + run: | + curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' http://localhost:8000/api/v1/repositories - name: Run lakeFS DAG working-directory: astro From 7a85323360a15f56d6b45e9c27799aaa41f1faf8 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 12:47:03 +0300 Subject: [PATCH 07/12] fix port mapping --- .github/workflows/provider.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index c3f77f7..eb5bb13 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -30,7 +30,7 @@ jobs: LAKECTL_CREDENTIALS_ACCESS_KEY_ID: $${ env.Key }} LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY: $${ env.SECRET }} ports: - 8000:8000 + - 8000:8000 options: >- --health-cmd curl --head --retry 5 --retry-connrefused --retry-delay 1 http://localhost:8000/_health --health-interval 10s @@ -70,7 +70,7 @@ jobs: - name: Create test repo run: | - curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' http://localhost:8000/api/v1/repositories + curl -u '${{ env.KEY }}:${{ env.SECRET }}' -H 'Content-Type: application/json' -X POST --data '{"name":"example-repo","storage_namespace":"local://data/"}' 'http://localhost:8000/api/v1/repositories' - name: Run lakeFS DAG working-directory: astro From 5f7bb5c638904c45610e8d8ff28fcca6725b076a Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 14:09:50 +0300 Subject: [PATCH 08/12] fix curl flags --- .github/workflows/provider.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index eb5bb13..221eae7 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -32,7 +32,7 @@ jobs: ports: - 8000:8000 options: >- - --health-cmd curl --head --retry 5 --retry-connrefused --retry-delay 1 http://localhost:8000/_health + --health-cmd curl --fail -LI http://localhost:8000/_health --health-interval 10s --health-timeout 5s --health-retries 5 From 45869dde120b78865ba30f6dd14816a4d079971e Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 14:17:22 +0300 Subject: [PATCH 09/12] quote health cmd --- .github/workflows/provider.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 221eae7..0bda30d 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -32,7 +32,7 @@ jobs: ports: - 8000:8000 options: >- - --health-cmd curl --fail -LI http://localhost:8000/_health + --health-cmd "curl --fail -LI http://localhost:8000/_health" --health-interval 10s --health-timeout 5s --health-retries 5 From 8739cbe8448117db3dd013d7d47ac5b3385b5980 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Sun, 15 Oct 2023 14:34:53 +0300 Subject: [PATCH 10/12] use docker exec to list symlink --- .github/workflows/provider.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 0bda30d..fcbceb6 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -75,7 +75,7 @@ jobs: - name: Run lakeFS DAG working-directory: astro run: | - astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://172.17.0.1:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}" + astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://localhost:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}" astro dev run dags unpause lakeFS_workflow astro dev run dags trigger lakeFS_workflow sleep 30 @@ -101,11 +101,7 @@ jobs: with: timeout_minutes: 3 max_attempts: 30 - command: docker-compose -f ops/docker-compose.yaml exec -T lakefs ls lakefs/data/block/data/symlinks/example-repo/example-branch/path/to/symlink.txt - - - name: lakeFS logs - if: ${{ always() }} - run: docker-compose -f ops/docker-compose.yaml logs --tail=1000 lakefs + command: docker exec lakefs ls lakefs/data/block/data/symlinks/example-repo/example-branch/path/to/symlink.txt - name: airflow scheduler logs if: ${{ always() }} From b8fd2582b60294886eb4aa44ceacd7ed72f30fed Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 16 Oct 2023 00:54:03 +0300 Subject: [PATCH 11/12] revert lakesf host for astro conn --- .github/workflows/provider.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index fcbceb6..42daf03 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -75,7 +75,7 @@ jobs: - name: Run lakeFS DAG working-directory: astro run: | - astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://localhost:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}" + astro dev run connections add conn_lakefs --conn-type=HTTP --conn-host=http://172.17.0.1:8000 --conn-login="${{ env.KEY }}" --conn-password="${{ env.SECRET }}" astro dev run dags unpause lakeFS_workflow astro dev run dags trigger lakeFS_workflow sleep 30 From 207c9a98544d8c77c9742e1cdac154e27ea0a938 Mon Sep 17 00:00:00 2001 From: Barak Amar Date: Mon, 16 Oct 2023 15:16:23 +0300 Subject: [PATCH 12/12] name the service --- .github/workflows/provider.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/provider.yaml b/.github/workflows/provider.yaml index 42daf03..11bf787 100644 --- a/.github/workflows/provider.yaml +++ b/.github/workflows/provider.yaml @@ -32,6 +32,7 @@ jobs: ports: - 8000:8000 options: >- + --name lakefs --health-cmd "curl --fail -LI http://localhost:8000/_health" --health-interval 10s --health-timeout 5s