diff --git a/.github/workflows/update-pages.yml b/.github/workflows/update-pages.yml index b257b67f..01accd19 100644 --- a/.github/workflows/update-pages.yml +++ b/.github/workflows/update-pages.yml @@ -61,6 +61,20 @@ jobs: # copy dependencies cp -f ./node_modules/plotly.js/dist/plotly.min.js ./gh-pages/plotly.js + - name: Update + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_API_TOKEN }} + CROWDIN_TOKEN: ${{ secrets.CROWDIN_TOKEN }} + DISCORD_INVITE: ${{ secrets.DISCORD_INVITE }} + FACEBOOK_GROUP_ID: ${{ secrets.FACEBOOK_GROUP_ID }} + FACEBOOK_PAGE_ID: ${{ secrets.FACEBOOK_PAGE_ID }} + FACEBOOK_TOKEN: ${{ secrets.FACEBOOK_ACCESS_TOKEN }} + GITHUB_REPOSITORY_OWNER: ${{ secrets.GH_ORG_NAME }} + GITHUB_TOKEN: ${{ secrets.GH_BOT_TOKEN || secrets.GITHUB_TOKEN }} + READTHEDOCS_TOKEN: ${{ secrets.READTHEDOCS_TOKEN }} + run: | + python -u ./src/update.py + - name: Convert notebook env: GITHUB_TOKEN: ${{ secrets.GH_BOT_TOKEN || secrets.GITHUB_TOKEN }} diff --git a/notebook/dashboard.ipynb b/notebook/dashboard.ipynb index f2945811..1de60e3d 100644 --- a/notebook/dashboard.ipynb +++ b/notebook/dashboard.ipynb @@ -78,19 +78,20 @@ "# Imports\n", "\n", "# standard imports\n", + "import json\n", "import os\n", - "import numpy as np\n", + "import time\n", "\n", "# lib imports\n", "from dotenv import load_dotenv\n", "from github import Github, UnknownObjectException\n", "from IPython.display import HTML, display\n", "from itables import init_notebook_mode, show\n", + "import numpy as np\n", "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", - "import plotly.io as pio\n", - "import requests" + "import plotly.io as pio" ] }, { @@ -119,15 +120,14 @@ "\n", "# constants\n", "text_template = '%{text}'\n", - "uno_base_url = 'https://app.lizardbyte.dev/uno'\n", + "data_dir = os.path.join(os.path.dirname(os.getcwd()), 'gh-pages')\n", "\n", "# all readthedocs projects\n", "# readthedocs data\n", - "readthedocs_data_url = f'{uno_base_url}/readthedocs/projects.json'\n", - "readthedocs_response = requests.get(readthedocs_data_url)\n", - "if not readthedocs_response.ok:\n", - " raise LookupError(\"Failed to fetch data from uno\")\n", - "readthedocs_data = readthedocs_response.json()" + "readthedocs_path = os.path.join(data_dir, 'readthedocs', 'projects.json')\n", + "\n", + "with open(readthedocs_path, 'r') as f:\n", + " readthedocs_data = json.load(f)" ] }, { @@ -149,15 +149,13 @@ " open_issues = [issue for issue in open_issues if issue.pull_request is None]\n", "\n", " # coverage data\n", - " coverage_url = f'{uno_base_url}/codecov/{repo.name}.json'\n", - " coverage_response = requests.get(coverage_url)\n", " coverage = 0\n", - " if coverage_response.status_code == 200:\n", - " try:\n", - " coverage_data = coverage_response.json()\n", - " coverage = coverage_data['totals']['coverage']\n", - " except Exception:\n", - " pass\n", + " try:\n", + " with open(os.path.join(data_dir, 'codecov', f'{repo.name}.json')) as f:\n", + " coverage_data = json.load(f)\n", + " coverage = coverage_data['totals']['coverage']\n", + " except Exception:\n", + " pass\n", "\n", " # readthedocs data\n", " readthedocs_project = None\n", @@ -222,6 +220,7 @@ "outputs": [], "source": [ "# Initial Results\n", + "print(f'Last Updated: {time.strftime(\"%Y-%m-%d %H:%M:%S\", time.gmtime())} UTC')\n", "print(f'Total Repositories: {len(repo_data)}')\n", "print(f'Archived Repositories: {df[\"archived\"].sum()}')\n", "print(f'Forked Repositories: {df[\"fork\"].sum()}')\n", diff --git a/requirements.txt b/requirements.txt index 8ac99981..a86e9ba4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,14 @@ +cloudscraper==1.2.71 +crowdin-api-client==1.17.0 ipython==8.27.0 itables==2.1.4 notebook==7.2.2 pandas==2.2.2 +pillow==10.4.0 plotly==5.24.0 pygithub==2.4.0 python-dotenv==1.0.1 requests==2.32.3 +svgwrite==1.4.3 +tqdm==4.66.5 +unhandled_exit==1.0.0 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/update.py b/src/update.py new file mode 100644 index 00000000..b595333e --- /dev/null +++ b/src/update.py @@ -0,0 +1,542 @@ +# standard imports +import argparse +import json +import os +import pathlib +from threading import Thread +from typing import Union + +# lib imports +import cloudscraper +from crowdin_api import CrowdinClient +from dotenv import load_dotenv +from PIL import Image +import requests +from requests.adapters import HTTPAdapter +import svgwrite +from tqdm import tqdm +import unhandled_exit + +# setup environment if running locally +load_dotenv() + +# setup threading exception handling +unhandled_exit.activate() + +# setup requests session +s = cloudscraper.CloudScraper() # CloudScraper inherits from requests.Session +retry_adapter = HTTPAdapter(max_retries=5) +s.mount('https://', retry_adapter) + +# constants +BASE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'gh-pages') + + +# custom print +def debug_print( + *values: object, + sep: Union[str, None] = ' ', + end: Union[str, None] = '\n', +): + if os.getenv('ACTIONS_RUNNER_DEBUG') or os.getenv('ACTIONS_STEP_DEBUG'): + print(*values, sep=sep, end=end) + + +def save_image_from_url(file_path: str, file_extension: str, image_url: str, size_x: int = 0, size_y: int = 0): + """ + Write image data to file. If ``size_x`` and ``size_y`` are both supplied, a resized image will also be saved. + + Parameters + ---------- + file_path : str + The file path to save the file at. + file_extension : str + The extension of the file name. + image_url : str + The image url. + size_x : int + The ``x`` dimension to resize the image to. If used, ``size_y`` must also be defined. + size_y : int + The ``y`` dimension to resize the image to. If used, ``size_x`` must also be defined. + """ + debug_print(f'Saving image from {image_url}') + # determine the directory + directory = os.path.dirname(file_path) + + pathlib.Path(directory).mkdir(parents=True, exist_ok=True) + + og_img_data = s.get(url=image_url).content + + file_name_with_ext = f'{file_path}.{file_extension}' + with open(file_name_with_ext, 'wb') as handler: + handler.write(og_img_data) + + # resize the image + if size_x and size_y: + pil_img_data = Image.open(file_name_with_ext) + resized_img_data = pil_img_data.resize((size_x, size_y)) + resized_img_data.save(fp=f'{file_path}_{size_x}x{size_y}.{file_extension}') + + +def write_json_files(file_path: str, data: any): + """ + Write dictionary to json file. + + Parameters + ---------- + file_path : str + The file path to save the file at, excluding the file extension which will be `.json` + data + The dictionary data to write in the json file. + """ + debug_print(f'Writing json file at {file_path}') + # determine the directory + directory = os.path.dirname(file_path) + + pathlib.Path(directory).mkdir(parents=True, exist_ok=True) + + with open(f'{file_path}.json', 'w') as f: + json.dump(obj=data, fp=f, indent=args.indent) + + +def update_aur(): + """ + Cache and update data from aur API. + """ + aur_base_url = 'https://aur.archlinux.org/rpc?v=5&type=info&arg=' + aur_repos = ['sunshine'] + + for repo in tqdm( + iterable=aur_repos, + desc='Updating AUR data', + ): + url = f'{aur_base_url}{repo}' + response = s.get(url=url) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'aur', repo) + write_json_files(file_path=file_path, data=data) + + +def update_codecov(): + """ + Get code coverage data from Codecov API. + """ + headers = dict( + Accept='application/json', + Authorization=f'bearer {args.codecov_token}', + ) + base_url = f'https://codecov.io/api/v2/gh/{args.github_repository_owner}' + + url = f'{base_url}/repos?page_size=500' + + response = s.get(url=url, headers=headers) + data = response.json() + assert data['next'] is None, 'More than 500 repos found, need to implement pagination.' + + for repo in tqdm( + iterable=data['results'], + desc='Updating Codecov data', + ): + url = f'{base_url}/repos/{repo["name"]}' + response = s.get(url=url, headers=headers) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'codecov', repo['name']) + write_json_files(file_path=file_path, data=data) + + +def update_crowdin(): + """ + Cache and update data from Crowdin API, and generate completion graph. + """ + client = CrowdinClient(token=args.crowdin_token) + + # automatically collect crowdin projects + project_data = client.projects.list_projects()['data'] + + for project in tqdm( + iterable=project_data, + desc='Updating Crowdin data', + ): + project_name = project['data']['name'] + project_id = project['data']['id'] + data = client.translation_status.get_project_progress(projectId=project_id)['data'] + file_path = os.path.join(BASE_DIR, 'crowdin', project_name.replace(' ', '_')) + write_json_files(file_path=file_path, data=data) + + # sort data by approval progress first, then translation progress, then alphabetically + data.sort(key=lambda x: ( + -x['data']['approvalProgress'], + -x['data']['translationProgress'], + x['data']['language']['name'] + ), reverse=False) + + # ensure "en" is first, if it exists + try: + en_index = [x['data']['language']['id'] for x in data].index('en') + except ValueError: + pass + else: + data.insert(0, data.pop(en_index)) + + # generate translation and approval completion graph + line_height = 32 + bar_height = 16 + svg_width = 500 + label_width = 200 + progress_width = 160 + insert = 12 + bar_corner_radius = 0 + + dwg = svgwrite.Drawing(filename=f'{file_path}_graph.svg', size=(svg_width, len(data) * line_height)) + + # load css font + dwg.embed_stylesheet(""" + @import url(https://fonts.googleapis.com/css?family=Open+Sans); + .svg-font { + font-family: "Open Sans"; + font-size: 12px; + fill: #999; + } + """) + for lang_base in tqdm( + iterable=data, + desc=f'Generating Crowdin graph for project: {project_name}', + ): + language = lang_base['data'] + g = dwg.add(dwg.g( + class_="svg-font", + transform='translate(0,{})'.format(data.index(lang_base) * line_height) + )) + g.add(dwg.text( + f"{language['language']['name']} ({language['language']['id']})", + insert=(label_width, 18), + style='text-anchor:end;') + ) + + translation_progress = language['translationProgress'] / 100.0 + approval_progress = language['approvalProgress'] / 100.0 + + progress_insert = (label_width + insert, 6) + if translation_progress < 100: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#999', + style='filter:opacity(30%);') + ) + if translation_progress > 0 and approval_progress < 100: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width * translation_progress, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#5D89C3') + ) + if approval_progress > 0: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width * approval_progress, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#71C277') + ) + + g.add(dwg.text('{}%'.format(language['translationProgress']), + insert=(progress_insert[0] + progress_width + insert, bar_height))) + + # write the svg file + dwg.save(pretty=True) + + +def update_discord(): + """ + Cache and update data from Discord API. + """ + discord_urls = [ + f'https://discordapp.com/api/invites/{args.discord_invite}?with_counts=true', + ] + + for discord_url in tqdm( + iterable=discord_urls, + desc='Updating Discord data', + ): + response = s.get(url=discord_url) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'discord', 'invite') + write_json_files(file_path=file_path, data=data) + + +def update_fb(): + """ + Get number of Facebook page likes and group members. + """ + fb_base_url = 'https://graph.facebook.com/' + + fb_endpoints = dict( + group=f'{args.facebook_group_id}?fields=member_count,name,description&access_token={args.facebook_token}', + page=f'{args.facebook_page_id}/insights?metric=page_fans&access_token={args.facebook_token}' + ) + + for key, value in tqdm( + iterable=fb_endpoints.items(), + desc='Updating Facebook data', + ): + url = f'{fb_base_url}/{value}' + response = requests.get(url=url) + + data = response.json() + try: + data['paging'] + except KeyError: + pass + else: + # remove facebook token from data + del data['paging'] + + file_path = os.path.join(BASE_DIR, 'facebook', key) + write_json_files(file_path=file_path, data=data) + + +def update_github(): + """ + Cache and update GitHub Repo banners. + """ + url = f'https://api.github.com/users/{args.github_repository_owner}/repos' + per_page = 100 + repos = [] + + headers = dict( + accept='application/vnd.github.v3+json', + ) + + query_params = dict( + per_page=per_page, + page=1, + ) + + while True: + response = s.get( + url=url, + headers=headers, + params=query_params, + ) + response_data = response.json() + repos.extend(response_data) + + if len(response_data) < per_page: + break + + query_params['page'] += 1 + + file_path = os.path.join(BASE_DIR, 'github', 'repos') + write_json_files(file_path=file_path, data=repos) + + headers = dict( + Authorization=f'token {args.github_token}' + ) + url = 'https://api.github.com/graphql' + + for repo in tqdm( + iterable=repos, + desc='Updating GitHub data', + ): + # languages + response = s.get(url=repo['languages_url'], headers=headers) + # if TypeError, API limit has likely been exceeded or possible issue with GitHub API... + # https://www.githubstatus.com/ + # do not error handle, better that workflow fails + + languages = response.json() + + file_path = os.path.join(BASE_DIR, 'github', 'languages', repo['name']) + write_json_files(file_path=file_path, data=languages) + + # openGraphImages + query = """ + { + repository(owner: "%s", name: "%s") { + openGraphImageUrl + } + } + """ % (repo['owner']['login'], repo['name']) + + response = s.post(url=url, json={'query': query}, headers=headers) + repo_data = response.json() + try: + image_url = repo_data['data']['repository']['openGraphImageUrl'] + except KeyError: + raise SystemExit('"GITHUB_TOKEN" is invalid.') + if 'avatars' not in image_url: + file_path = os.path.join(BASE_DIR, 'github', 'openGraphImages', repo['name']) + save_image_from_url(file_path=file_path, file_extension='png', image_url=image_url, size_x=624, size_y=312) + + +def update_patreon(): + """ + Get patron count from Patreon. + + Patreon id can be obtained in browser developer console using the folliwng javascript: + `window.patreon.bootstrap.campaign.data.id` + """ + patreon_urls = [ + 'https://www.patreon.com/api/campaigns/6131567', + ] + + for patreon_url in tqdm( + iterable=patreon_urls, + desc='Updating Patreon data', + ): + response = s.get(url=patreon_url) + + data = response.json()['data']['attributes'] + + file_path = os.path.join(BASE_DIR, 'patreon', 'LizardByte') + write_json_files(file_path=file_path, data=data) + + +def readthedocs_loop(url: str, file_path: str) -> list: + headers = { + 'Authorization': f'token {args.readthedocs_token}', + 'Accept': 'application/json' + } + + results = [] + + while True: + response = s.get(url=url, headers=headers) + try: + data = response.json() + except requests.exceptions.JSONDecodeError: + break + + try: + results.extend(data['results']) + except KeyError: + pass + + try: + url = data['next'] + except KeyError: + url = None + + if not url: + break + + if results: + write_json_files(file_path=file_path, data=results) + + return results + + +def update_readthedocs(): + """ + Cache and update readthedocs info. + """ + url_base = 'https://readthedocs.org' + url = f'{url_base}/api/v3/projects/' + + file_path = os.path.join(BASE_DIR, 'readthedocs', 'projects') + projects = readthedocs_loop(url=url, file_path=file_path) + + for project in tqdm( + iterable=projects, + desc='Updating Readthedocs data', + ): + git_url = project['repository']['url'] + repo_name = git_url.rsplit('/', 1)[-1].rsplit('.git', 1)[0] + + for link in project['_links']: + if link == 'builds': + continue # skip builds, too much data and too slow + + file_path = os.path.join(BASE_DIR, 'readthedocs', link, repo_name) + + url = project['_links'][link] + readthedocs_loop(url=url, file_path=file_path) + + +def missing_arg(): + parser.print_help() + raise SystemExit(1) + + +if __name__ == '__main__': + # setup arguments using argparse + parser = argparse.ArgumentParser(description="Update github pages.") + parser.add_argument('--codecov_token', type=str, required=False, default=os.getenv('CODECOV_TOKEN'), + help='Codecov API token.') + parser.add_argument('--crowdin_token', type=str, required=False, default=os.getenv('CROWDIN_TOKEN'), + help='Crowdin API token.') + parser.add_argument('--discord_invite', type=str, required=False, default=os.getenv('DISCORD_INVITE'), + help='Discord invite code.') + parser.add_argument('--facebook_group_id', type=str, required=False, default=os.getenv('FACEBOOK_GROUP_ID'), + help='Facebook group ID.') + parser.add_argument('--facebook_page_id', type=str, required=False, default=os.getenv('FACEBOOK_PAGE_ID'), + help='Facebook page ID.') + parser.add_argument('--facebook_token', type=str, required=False, default=os.getenv('FACEBOOK_TOKEN'), + help='Facebook Token, requires `groups_access_member_info`, `read_insights`, and ' + '`pages_read_engagement`. Must be a `page` token, not a `user` token. Token owner must be ' + 'admin of the group.') + parser.add_argument('--github_repository_owner', type=str, required=False, + default=os.getenv('GITHUB_REPOSITORY_OWNER'), + help='GitHub Username. Can use environment variable "GITHUB_REPOSITORY_OWNER"') + parser.add_argument('--github_token', type=str, required=False, default=os.getenv('GITHUB_TOKEN'), + help='GitHub Token, no scope selection is necessary. Can use environment variable ' + '"GITHUB_TOKEN"') + parser.add_argument('--readthedocs_token', type=str, required=False, default=os.getenv('READTHEDOCS_TOKEN'), + help='Readthedocs API token. Can use environment variable "READTHEDOCS_TOKEN"') + parser.add_argument('-i', '--indent_json', action='store_true', help='Indent json files.') + + args = parser.parse_args() + args.indent = 4 if args.indent_json else None + + if not args.codecov_token or not args.discord_invite or not args.facebook_group_id or not args.facebook_page_id or \ + not args.facebook_token or not args.github_repository_owner or not args.github_token or \ + not args.readthedocs_token: + missing_arg() + + threads = [ + Thread( + name='aur', + target=update_aur, + ), + Thread( + name='codecov', + target=update_codecov, + ), + Thread( + name='crowdin', + target=update_crowdin, + ), + Thread( + name='discord', + target=update_discord, + ), + Thread( + name='facebook', + target=update_fb, + ), + Thread( + name='github', + target=update_github, + ), + Thread( + name='patreon', + target=update_patreon, + ), + Thread( + name='readthedocs', + target=update_readthedocs, + ), + ] + + for thread in tqdm( + iterable=threads, + desc='Starting threads', + ): + thread.start()