diff --git a/.github/workflows/fetch_filter_resources.yaml b/.github/workflows/fetch_filter_resources.yaml index a46f60ef..4d85d2ed 100644 --- a/.github/workflows/fetch_filter_resources.yaml +++ b/.github/workflows/fetch_filter_resources.yaml @@ -13,9 +13,30 @@ concurrency: cancel-in-progress: false jobs: - fetch-filter: + fetch-servers: runs-on: ubuntu-20.04 - name: Fetch tool stepwise and merge, fetch tutorials and filter the resources for communities + name: Merge tools, fetch tutorials and filter the resources for communities + steps: + - name: Checkout main + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirement + run: | + python -m pip install -r requirements.txt + sudo apt-get install jq + - name: Fetch list of all available servers + run: | + python sources/bin/get_public_galaxy_servers.py -o sources/data/available_public_servers.csv + - name: Archive available servers + uses: actions/upload-artifact@v4 + with: + name: available-servers + path: sources/data/available_public_servers.csv + fetch-tools-stepwise: + runs-on: ubuntu-20.04 + name: Fetch tool stepwise strategy: max-parallel: 1 #need to run one after another, since otherwise there is a chance, that mulitple jobs want to push to the results branch at the same time (which fails due to merge) matrix: @@ -32,17 +53,43 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install requirement - run: | - python -m pip install -r requirements.txt - sudo apt-get install jq - - name: Fetch list of all available servers - run: | - python sources/bin/get_public_galaxy_servers.py -o sources/data/available_public_servers.csv + run: python -m pip install -r requirements.txt + - name: Download a single artifact + uses: actions/download-artifact@v4 + with: + name: available-servers + path: | + sources/data/available_public_servers.csv - name: Fetch all tool stepwise run: | bash sources/bin/extract_all_tools.sh "${{ matrix.subset }}" env: GITHUB_API_KEY: ${{ secrets.GH_API_TOKEN }} + - name: Archive tool sublists production artifacts + uses: actions/upload-artifact@v4 + with: + name: tools-${{ matrix.subset }} + path: communities/all/resources/repositories${{ matrix.subset }}.list_tools.tsv + merge-fetch-filter: + runs-on: ubuntu-20.04 + name: Merge tools, fetch tutorials and filter the resources for communities + steps: + - name: Checkout main + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirement + run: | + python -m pip install -r requirements.txt + sudo apt-get install jq + - name: Download All Artifacts + uses: actions/download-artifact@v4 + with: + path: tools + pattern: tools-* + merge-multiple: true + path: communities/all/resources/ - name: Merge all tools run: | #merge files with only one header -> https://stackoverflow.com/questions/16890582/unixmerge-multiple-csv-files-with-same-header-by-keeping-the-header-of-the-firs; map(.[]) -> https://stackoverflow.com/questions/42011086/merge-arrays-of-json (get flat array, one tool per entry) awk 'FNR==1 && NR!=1{next;}{print}' communities/all/resources/repositories*.list_tools.tsv > communities/all/resources/tools.tsv