diff --git a/.github/workflows/check_links.yml b/.github/workflows/check_links.yml index 5d2e1f1..8f56fb2 100644 --- a/.github/workflows/check_links.yml +++ b/.github/workflows/check_links.yml @@ -39,13 +39,40 @@ jobs: key: cache-lychee-${{ github.sha }} restore-keys: cache-lychee- - # Extract and check URLs directly from token files + # Extract and check URLs directly from token files (checking dumped file did not work) - name: Check token art URLs uses: lycheeverse/lychee-action@v2 with: args: '${{env.lychee_args}} -- tokens.xml' jobSummary: true + # Extract picture URLs from tokens.xml + - name: Extract URLs + id: tokens_pic_urls + uses: lycheeverse/lychee-action@v2 + with: + args: '--dump --exclude www.w3.org -- tokens.xml' + output: lychee/out.md + jobSummary: false + + # Analyse extracted links (1/2) + - name: List duplicated image links + if: steps.tokens_pic_urls.outcome == 'success' + shell: bash + # Remove empty lines | trim trailing integers (Scryfall) | sort | count and list duplicates + run: | + echo "🪞 **Duplicated Image Links**" >> $GITHUB_STEP_SUMMARY + grep . lychee/out.md | sed 's/\.jpg?.*/.jpg/' | sort | uniq -cd >> $GITHUB_STEP_SUMMARY + + # Analyse extracted links (2/2) + - name: List image hosting sources + if: steps.tokens_pic_urls.outcome == 'success' + shell: bash + # Extract 3rd field (domain name) | remove empty lines | sort | count duplicates and list with numbers | sort descending + run: | + echo "📊 **Image Hosting Statistics**" >> $GITHUB_STEP_SUMMARY + awk -F/ '{print $3}' lychee/out.md | grep . | sort | uniq -c | sort -nr >> $GITHUB_STEP_SUMMARY + # Always save cache - name: Save lychee cache uses: actions/cache/save@v4