Cockatrice · tooomm · Dec 4, 2022 · Dec 4, 2022 · Dec 4, 2022 · Dec 4, 2022
diff --git a/.github/workflows/check_links.yml b/.github/workflows/check_links.yml
@@ -39,13 +39,40 @@ jobs:
           key: cache-lychee-${{ github.sha }}
           restore-keys: cache-lychee-
 
-      # Extract and check URLs directly from token files
+      # Extract and check URLs directly from token files (checking dumped file did not work)
       - name: Check token art URLs
         uses: lycheeverse/lychee-action@v2
         with:
           args: '${{env.lychee_args}} -- tokens.xml'
           jobSummary: true
 
+      # Extract picture URLs from tokens.xml
+      - name: Extract URLs
+        id: tokens_pic_urls
+        uses: lycheeverse/lychee-action@v2
+        with:
+          args: '--dump --exclude www.w3.org -- tokens.xml'
+          output: lychee/out.md
+          jobSummary: false
+
+      # Analyse extracted links (1/2)
+      - name: List duplicated image links
+        if: steps.tokens_pic_urls.outcome == 'success'
+        shell: bash
+        # Remove empty lines | trim trailing integers (Scryfall) | sort | count and list duplicates
+        run: |
+          echo "🪞 **Duplicated Image Links**" >> $GITHUB_STEP_SUMMARY
+          grep . lychee/out.md | sed 's/\.jpg?.*/.jpg/' | sort | uniq -cd >> $GITHUB_STEP_SUMMARY
+
+      # Analyse extracted links (2/2)
+      - name: List image hosting sources
+        if: steps.tokens_pic_urls.outcome == 'success'
+        shell: bash
+        # Extract 3rd field (domain name) | remove empty lines | sort | count duplicates and list with numbers | sort descending
+        run: |
+          echo "📊 **Image Hosting Statistics**" >> $GITHUB_STEP_SUMMARY
+          awk -F/ '{print $3}' lychee/out.md | grep . | sort | uniq -c | sort -nr >> $GITHUB_STEP_SUMMARY
+
       # Always save cache
       - name: Save lychee cache
         uses: actions/cache/save@v4