From 927c2cd7daf63d3fbac4de2a160ef9d64ec792df Mon Sep 17 00:00:00 2001 From: rjun Date: Fri, 26 Apr 2024 20:47:18 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=85build:=20add=20features=20to=20check?= =?UTF-8?q?=20links=20status=20and=20workflow=20to=20generate=20link=20sta?= =?UTF-8?q?tus=20page?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/link-status-report.yml | 36 ++++++ docs/index.md | 3 + utils/README.md | 17 +++ utils/link_status_report.py | 137 +++++++++++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 .github/workflows/link-status-report.yml create mode 100644 utils/README.md create mode 100644 utils/link_status_report.py diff --git a/.github/workflows/link-status-report.yml b/.github/workflows/link-status-report.yml new file mode 100644 index 0000000..0417df7 --- /dev/null +++ b/.github/workflows/link-status-report.yml @@ -0,0 +1,36 @@ +name: Generate Link Status Report + +on: + push: + branches: + - main + +jobs: + generate_report: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install aiohttp + + - name: Generate link status report + run: | + python utils/link_status_report.py + + - name: Deploy report + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GH_TOKEN }} + publish_dir: ./ + destination_dir: link-status-report + keep_files: true \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 49a343e..97a4060 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,3 +7,6 @@ > 作為自己學習 Computer Science 的紀錄與未來規劃,為自己在這茫茫碼海中留下一點痕跡 > 期許自己的血液裡都流著程式碼的那天 + + +> [**Link Status Report**](https://xxrjun.github.io/cs-resources/link-status-report/links_report.html) \ No newline at end of file diff --git a/utils/README.md b/utils/README.md new file mode 100644 index 0000000..541ae7d --- /dev/null +++ b/utils/README.md @@ -0,0 +1,17 @@ +> [!TIP] +> Use [./link_statuc_repot.py](./link_statuc_repot.py) to check all links status in this folder. + + +## Usage + +Install the required packages: + +```bash +pip install -r requirements.txt +``` + +Run the script: + +```bash +python link_statuc_repot.py +``` diff --git a/utils/link_status_report.py b/utils/link_status_report.py new file mode 100644 index 0000000..efa2b0d --- /dev/null +++ b/utils/link_status_report.py @@ -0,0 +1,137 @@ +import asyncio +import aiohttp +import re +import webbrowser +import os + +# Regular expression to find Markdown links +markdown_link_re = r"\[([^\]]+)\]\((http[s]?://[^\s]+)\)" + +async def is_link_active(url, session, semaphore): + async with semaphore: # Limit the number of concurrent requests + try: + async with session.head(url, allow_redirects=True, timeout=5) as response: + return ("✓", "green") if response.status == 200 else (f"{response.status}", "red") + except asyncio.TimeoutError: + return ("✕", "orange") + except aiohttp.ClientError as e: + return ("✕", "red") + +async def check_links_in_markdown(file_path): + results = [] + + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() + + links = re.findall(markdown_link_re, content) + + semaphore = asyncio.Semaphore(30) # Adjust the concurrency level as needed + + async with aiohttp.ClientSession() as session: + tasks = [is_link_active(url, session, semaphore) for _, url in links] + statuses = await asyncio.gather(*tasks) + + for (text, url), (status, color) in zip(links, statuses): + # remove '**' from text + text = text.replace('**', '') + results.append((text, status, url, color)) + + return results + +async def generate_report(folder_path, report_path): + markdown_files = [] + for root, dirs, files in os.walk(folder_path): + for file in files: + if file.endswith('.md'): + markdown_files.append(os.path.join(root, file)) + + html_content = """ + + + Link Status Report + + + +

Link Status Report

+ """ + + for file in markdown_files: + results = await check_links_in_markdown(file) + + html_content += f"

{file}

" + html_content += "" + + for text, status, url, color in results: + html_content += f"" + + html_content += "
Link TextStatusURL
{text}{status}{url}
" + + html_content += "" + + with open(report_path, 'w', encoding='utf-8') as report_file: + report_file.write(html_content) + + report_path = 'file://' + os.path.realpath(report_path) + webbrowser.open(report_path) + return report_path + +if __name__ == "__main__": + markdown_folder_path = '../docs' + report_path = '../links_status_report.html' + report_path = asyncio.run(generate_report(markdown_folder_path, report_path)) + print(f"Link status report generated for markdown files in {markdown_folder_path} at {report_path}") \ No newline at end of file