From 5366a3158fd089691f5e2528c90d436b1ac2fb79 Mon Sep 17 00:00:00 2001 From: ReenigneArcher <42013603+ReenigneArcher@users.noreply.github.com> Date: Tue, 3 Sep 2024 14:37:13 -0400 Subject: [PATCH] refactor(notebook): move code to standard python --- .github/workflows/update-pages.yml | 10 +- README.md | 5 + notebook/dashboard.ipynb | 537 ++-------------------------- src/__init__.py | 10 + src/dashboard.py | 554 +++++++++++++++++++++++++++++ src/helpers.py | 86 +++++ src/update.py | 542 ---------------------------- src/updater.py | 487 +++++++++++++++++++++++++ 8 files changed, 1179 insertions(+), 1052 deletions(-) create mode 100644 src/dashboard.py create mode 100644 src/helpers.py delete mode 100644 src/update.py create mode 100644 src/updater.py diff --git a/.github/workflows/update-pages.yml b/.github/workflows/update-pages.yml index c8c67f423..65768b954 100644 --- a/.github/workflows/update-pages.yml +++ b/.github/workflows/update-pages.yml @@ -64,8 +64,9 @@ jobs: # copy dependencies cp -f ./node_modules/plotly.js/dist/plotly.min.js ./gh-pages/plotly.js - - name: Update + - name: Convert notebook env: + DASHBOARD_AUR_REPOS: sunshine,sunshine-bin,sunshine-git CODECOV_TOKEN: ${{ secrets.CODECOV_API_TOKEN }} CROWDIN_TOKEN: ${{ secrets.CROWDIN_TOKEN }} DISCORD_INVITE: ${{ secrets.DISCORD_INVITE }} @@ -74,13 +75,8 @@ jobs: FACEBOOK_TOKEN: ${{ secrets.FACEBOOK_ACCESS_TOKEN }} GITHUB_REPOSITORY_OWNER: ${{ secrets.GH_ORG_NAME }} GITHUB_TOKEN: ${{ secrets.GH_BOT_TOKEN || secrets.GITHUB_TOKEN }} + PATREON_CAMPAIGN_ID: 6131567 READTHEDOCS_TOKEN: ${{ secrets.READTHEDOCS_TOKEN }} - run: | - python -u ./src/update.py - - - name: Convert notebook - env: - GITHUB_TOKEN: ${{ secrets.GH_BOT_TOKEN || secrets.GITHUB_TOKEN }} run: | jupyter nbconvert \ --debug \ diff --git a/README.md b/README.md index f7c27a3a0..d9d76cd65 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,11 @@ A Jupyter notebook that creates a dashboard for viewing LizardByte repository da ```bash find . -name '*.ipynb' -exec nb-clean clean {} \; ``` + + Or for a single notebook: + ```bash + nb-clean clean ./notebook/dashboard.ipynb + ``` 2. You can create a preview of the notebook in html by running the following commands: ```bash diff --git a/notebook/dashboard.ipynb b/notebook/dashboard.ipynb index 1de60e3d0..a1b81cc1c 100644 --- a/notebook/dashboard.ipynb +++ b/notebook/dashboard.ipynb @@ -41,10 +41,13 @@ " \n", " \"GitHub\n", " \n", - " \n", + " \n", + " \"NPM\"\n", + " \n", + " \n", " \"PyPI\"\n", " \n", - " \n", + " \n", " \"ReadTheDocs\"\n", " \n", " \n", @@ -68,32 +71,6 @@ "## Repository Data" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "957d644a7520fbd", - "metadata": {}, - "outputs": [], - "source": [ - "# Imports\n", - "\n", - "# standard imports\n", - "import json\n", - "import os\n", - "import time\n", - "\n", - "# lib imports\n", - "from dotenv import load_dotenv\n", - "from github import Github, UnknownObjectException\n", - "from IPython.display import HTML, display\n", - "from itables import init_notebook_mode, show\n", - "import numpy as np\n", - "import pandas as pd\n", - "import plotly.express as px\n", - "import plotly.graph_objects as go\n", - "import plotly.io as pio" - ] - }, { "cell_type": "code", "execution_count": null, @@ -102,94 +79,18 @@ "outputs": [], "source": [ "# Setup the environment\n", + "import os\n", + "import sys\n", + "import time\n", "\n", - "# Load environment variables from .env file\n", - "load_dotenv()\n", - "\n", - "# Authenticate with GitHub\n", - "token = os.getenv(\"GITHUB_TOKEN\")\n", - "g = Github(token)\n", - "\n", - "# set the default plotly template\n", - "pio.templates.default = \"plotly_dark\"\n", - "\n", - "# Fetch repository data\n", - "org_name = \"LizardByte\"\n", - "org = g.get_organization(org_name)\n", - "repos = org.get_repos()\n", - "\n", - "# constants\n", - "text_template = '%{text}'\n", - "data_dir = os.path.join(os.path.dirname(os.getcwd()), 'gh-pages')\n", - "\n", - "# all readthedocs projects\n", - "# readthedocs data\n", - "readthedocs_path = os.path.join(data_dir, 'readthedocs', 'projects.json')\n", - "\n", - "with open(readthedocs_path, 'r') as f:\n", - " readthedocs_data = json.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d804270c89d8280", - "metadata": {}, - "outputs": [], - "source": [ - "# Get Repo Data\n", - "repo_data = []\n", - "for repo in repos:\n", - " # get license\n", - " license_name = repo.license.name if repo.license else \"No License\"\n", - "\n", - " # split open issues and PRs\n", - " open_issues = repo.get_issues(state='open')\n", - " open_prs = [issue for issue in open_issues if issue.pull_request is not None]\n", - " open_issues = [issue for issue in open_issues if issue.pull_request is None]\n", - "\n", - " # coverage data\n", - " coverage = 0\n", - " try:\n", - " with open(os.path.join(data_dir, 'codecov', f'{repo.name}.json')) as f:\n", - " coverage_data = json.load(f)\n", - " coverage = coverage_data['totals']['coverage']\n", - " except Exception:\n", - " pass\n", - "\n", - " # readthedocs data\n", - " readthedocs_project = None\n", - " for project in readthedocs_data:\n", - " if project['repository']['url'] == repo.clone_url:\n", - " readthedocs_project = project\n", + "# Add the src directory to the path\n", + "sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), os.pardir)))\n", "\n", - " # has README.md or README.rst\n", - " # check if the repo has a README.md or README.rst\n", - " readme_file = None\n", - " try:\n", - " readme_file = repo.get_readme()\n", - " except UnknownObjectException:\n", - " pass\n", + "from src import dashboard # noqa: E402\n", "\n", - " repo_data.append({\n", - " \"repo\": repo.name,\n", - " \"stars\": repo.stargazers_count,\n", - " \"archived\": repo.archived,\n", - " \"fork\": repo.fork,\n", - " \"forks\": repo.forks_count,\n", - " \"issues\": open_issues,\n", - " \"topics\": repo.get_topics(),\n", - " \"languages\": repo.get_languages(),\n", - " \"license\": license_name,\n", - " \"prs\": open_prs,\n", - " \"created_at\": repo.created_at,\n", - " \"updated_at\": repo.updated_at,\n", - " \"coverage\": coverage,\n", - " \"readthedocs\": readthedocs_project,\n", - " \"has_readthedocs\": readthedocs_project is not None,\n", - " \"has_readme\": readme_file is not None,\n", - " \"_repo\": repo,\n", - " })" + "# disable TQDM output if not in debug mode\n", + "if not os.getenv('ACTIONS_RUNNER_DEBUG') and not os.getenv('ACTIONS_STEP_DEBUG'):\n", + " os.environ['TQDM_DISABLE'] = '1'" ] }, { @@ -199,17 +100,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Initial data frames\n", - "df = pd.DataFrame(repo_data)\n", - "df_repos = df[\n", - " (~df['archived']) &\n", - " (~df['topics'].apply(lambda topics: 'package-manager' in topics))\n", - "]\n", - "df_original_repos = df[\n", - " (~df['archived']) &\n", - " (~df['fork']) &\n", - " (~df['topics'].apply(lambda topics: 'package-manager' in topics))\n", - "]" + "# get repo data\n", + "dashboard.init()\n", + "df_all_repos = dashboard.get_repo_data()\n", + "df_repos = dashboard.get_df_repos()" ] }, { @@ -221,11 +115,11 @@ "source": [ "# Initial Results\n", "print(f'Last Updated: {time.strftime(\"%Y-%m-%d %H:%M:%S\", time.gmtime())} UTC')\n", - "print(f'Total Repositories: {len(repo_data)}')\n", - "print(f'Archived Repositories: {df[\"archived\"].sum()}')\n", - "print(f'Forked Repositories: {df[\"fork\"].sum()}')\n", - "print(f'Total Open Issues: {df[\"issues\"].apply(len).sum()}')\n", - "print(f'Total Open PRs: {df[\"prs\"].apply(len).sum()}')\n", + "print(f'Total Repositories: {len(df_all_repos)}')\n", + "print(f'Archived Repositories: {df_all_repos[\"archived\"].sum()}')\n", + "print(f'Forked Repositories: {df_all_repos[\"fork\"].sum()}')\n", + "print(f'Total Open Issues: {df_all_repos[\"issues\"].apply(len).sum()}')\n", + "print(f'Total Open PRs: {df_all_repos[\"prs\"].apply(len).sum()}')\n", "print(f'Open issues in active repositories: {df_repos[\"issues\"].apply(len).sum()}')\n", "print(f'Open PRs in active repositories: {df_repos[\"prs\"].apply(len).sum()}')" ] @@ -246,45 +140,7 @@ "outputs": [], "source": [ "# Stars\n", - "df_stars = df_repos.sort_values(\n", - " by='stars',\n", - " ascending=False,\n", - ")\n", - "df_stars['log_stars'] = np.log1p(df_stars['stars'])\n", - "fig = px.bar(\n", - " df_stars,\n", - " x='repo',\n", - " y='log_stars',\n", - " title='Stars',\n", - " text='stars',\n", - ")\n", - "fig.update_traces(\n", - " texttemplate=text_template,\n", - " textposition='inside',\n", - ")\n", - "fig.update_layout(\n", - " yaxis_title=None,\n", - " yaxis_showticklabels=False,\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ce69f164706dacf", - "metadata": {}, - "outputs": [], - "source": [ - "# Star History Data\n", - "stargazer_data = []\n", - "for repo in df_repos.to_dict('records'):\n", - " stargazers = repo['_repo'].get_stargazers_with_dates()\n", - " for stargazer in stargazers:\n", - " stargazer_data.append({\n", - " \"repo\": repo['repo'],\n", - " \"date\": stargazer.starred_at,\n", - " })" + "dashboard.show_star_gazers()" ] }, { @@ -295,19 +151,7 @@ "outputs": [], "source": [ "# Star History Visuals\n", - "df_stargazers = pd.DataFrame(stargazer_data)\n", - "df_stargazers = df_stargazers.sort_values(by=\"date\")\n", - "df_stargazers[\"cumulative_stars\"] = df_stargazers.groupby(\"repo\").cumcount() + 1\n", - "\n", - "fig = px.line(\n", - " df_stargazers,\n", - " x=\"date\",\n", - " y=\"cumulative_stars\",\n", - " color=\"repo\",\n", - " title=\"Star History\",\n", - " labels={\"date\": \"Date\", \"cumulative_stars\": \"Cumulative Stars\"},\n", - ")\n", - "fig.show()" + "dashboard.show_star_history()" ] }, { @@ -326,27 +170,7 @@ "outputs": [], "source": [ "# Forks\n", - "df_forks = df_repos.sort_values(\n", - " by='forks',\n", - " ascending=False,\n", - ")\n", - "df_forks['log_forks'] = np.log1p(df_forks['forks'])\n", - "fig = px.bar(\n", - " df_forks,\n", - " x='repo',\n", - " y='log_forks',\n", - " title='Forks',\n", - " text='forks',\n", - ")\n", - "fig.update_traces(\n", - " texttemplate=text_template,\n", - " textposition='inside',\n", - ")\n", - "fig.update_layout(\n", - " yaxis_title=None,\n", - " yaxis_showticklabels=False,\n", - ")\n", - "fig.show()" + "dashboard.show_forks()" ] }, { @@ -365,26 +189,7 @@ "outputs": [], "source": [ "# Open Issues\n", - "df_issues = df_repos.copy()\n", - "df_issues['issue_count'] = df_issues['issues'].apply(len)\n", - "df_issues = df_issues.sort_values(by='issue_count', ascending=False)\n", - "df_issues['log_issues'] = np.log1p(df_issues['issue_count'])\n", - "fig = px.bar(\n", - " df_issues,\n", - " x='repo',\n", - " y='log_issues',\n", - " title='Open Issues',\n", - " text='issue_count',\n", - ")\n", - "fig.update_traces(\n", - " texttemplate=text_template,\n", - " textposition='inside',\n", - ")\n", - "fig.update_layout(\n", - " yaxis_title=None,\n", - " yaxis_showticklabels=False,\n", - ")\n", - "fig.show()" + "dashboard.show_issues()" ] }, { @@ -395,41 +200,6 @@ "### Open PRs" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "3bde60fbd0b7e540", - "metadata": {}, - "outputs": [], - "source": [ - "# PR Data\n", - "pr_data = []\n", - "for repo in df_repos.to_dict('records'):\n", - " for pr in repo['prs']:\n", - " pr_details = repo['_repo'].get_pull(pr.number)\n", - "\n", - " # Check if the PR has been approved\n", - " reviews = pr_details.get_reviews()\n", - " approved = any(review.state == 'APPROVED' for review in reviews)\n", - "\n", - " # Get the milestone\n", - " milestone = pr_details.milestone.title if pr_details.milestone else None\n", - "\n", - " pr_data.append({\n", - " \"repo\": repo['repo'],\n", - " \"number\": pr_details.number,\n", - " \"title\": pr_details.title,\n", - " \"author\": pr_details.user.login,\n", - " \"labels\": [label.name for label in pr_details.labels],\n", - " \"assignees\": [assignee.login for assignee in pr_details.assignees],\n", - " \"created_at\": pr_details.created_at,\n", - " \"last_activity\": pr_details.updated_at,\n", - " \"status\": \"Draft\" if pr_details.draft else \"Ready\",\n", - " \"approved\": approved,\n", - " \"milestone\": milestone,\n", - " })" - ] - }, { "cell_type": "code", "execution_count": null, @@ -437,37 +207,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Create DataFrame for PR details\n", - "df_pr_details = pd.DataFrame(pr_data)\n", - "\n", - "# Group by repository and status to get the count of PRs\n", - "df_pr_counts = df_pr_details.groupby(['repo', 'status']).size().reset_index(name='pr_count')\n", - "\n", - "# Sort repositories by total PR count\n", - "df_pr_counts['total_prs'] = df_pr_counts.groupby('repo')['pr_count'].transform('sum')\n", - "df_pr_counts = df_pr_counts.sort_values(by='total_prs', ascending=False)\n", - "\n", - "# Create Stacked Bar Chart\n", - "fig_bar = px.bar(\n", - " df_pr_counts,\n", - " x='repo',\n", - " y='pr_count',\n", - " color='status',\n", - " title='Open Pull Requests',\n", - " labels={'pr_count': 'Count of PRs', 'repo': 'Repository', 'status': 'PR Status'},\n", - " text='pr_count',\n", - " category_orders={'repo': df_pr_counts['repo'].tolist()},\n", - ")\n", - "\n", - "fig_bar.update_layout(\n", - " yaxis_title='Open PRs',\n", - " xaxis_title='Repository',\n", - ")\n", - "fig_bar.update_traces(\n", - " texttemplate=text_template,\n", - " textposition='inside',\n", - ")\n", - "fig_bar.show()" + "# PR Graph\n", + "dashboard.show_pr_graph()" ] }, { @@ -478,38 +219,7 @@ "outputs": [], "source": [ "# PR Table\n", - "\n", - "# darken the column filter inputs\n", - "css = \"\"\"\n", - ".dt-column-title input[type=\"text\"] {\n", - " background-color: var(--jp-layout-color0);\n", - " border-color: rgb(64,67,70);\n", - " border-width: 1px;\n", - " color: var(--jp-ui-font-color1);\n", - "}\n", - "\"\"\"\n", - "display(HTML(f\"\"))\n", - "\n", - "init_notebook_mode(\n", - " all_interactive=True,\n", - " connected=False,\n", - ")\n", - "\n", - "# Display the DataFrame as an interactive table using itables\n", - "table_download_name = \"LizardByte-Pull-Requests\"\n", - "show(\n", - " df_pr_details,\n", - " buttons=[\n", - " \"pageLength\",\n", - " \"copyHtml5\",\n", - " {\"extend\": \"csvHtml5\", \"title\": table_download_name},\n", - " {\"extend\": \"excelHtml5\", \"title\": table_download_name},\n", - " ],\n", - " classes=\"display compact\",\n", - " column_filters=\"header\",\n", - " header=True,\n", - " layout={\"topEnd\": None},\n", - ")" + "dashboard.show_pr_table()" ] }, { @@ -528,16 +238,7 @@ "outputs": [], "source": [ "# License distribution\n", - "license_counts = df_repos.groupby(['license', 'repo']).size().reset_index(name='count')\n", - "\n", - "fig_treemap = px.treemap(\n", - " license_counts,\n", - " path=['license', 'repo'],\n", - " values='count',\n", - " title='License Distribution',\n", - " hover_data={'repo': True, 'count': False},\n", - ")\n", - "fig_treemap.show()" + "dashboard.show_license_distribution()" ] }, { @@ -556,28 +257,7 @@ "outputs": [], "source": [ "# Coverage\n", - "df_coverage = df_repos.sort_values(\n", - " by='coverage',\n", - " ascending=False,\n", - ")\n", - "\n", - "# inverse marker size, so higher coverage has smaller markers\n", - "df_coverage['marker_size'] = df_coverage['coverage'].apply(lambda x: 110 - x if x > 0 else 0)\n", - "\n", - "fig_scatter = px.scatter(\n", - " df_coverage,\n", - " x='repo',\n", - " y='coverage',\n", - " title='Coverage Percentage',\n", - " size='marker_size',\n", - " color='coverage',\n", - " color_continuous_scale=['red', 'yellow', 'green'], # red is low, green is high\n", - ")\n", - "fig_scatter.update_layout(\n", - " yaxis_title='Coverage Percentage',\n", - " xaxis_title='Repository',\n", - ")\n", - "fig_scatter.show()" + "dashboard.show_coverage()" ] }, { @@ -588,24 +268,6 @@ "### Programming Languages" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cf8ff484b1639d", - "metadata": {}, - "outputs": [], - "source": [ - "# Programming language data\n", - "language_data = []\n", - "for repo in df_repos.to_dict('records'):\n", - " for language, bytes_of_code in repo['languages'].items():\n", - " language_data.append({\n", - " \"repo\": repo['repo'],\n", - " \"language\": language,\n", - " \"bytes_of_code\": bytes_of_code,\n", - " })" - ] - }, { "cell_type": "code", "execution_count": null, @@ -614,68 +276,7 @@ "outputs": [], "source": [ "# Programming Languages\n", - "df_languages = pd.DataFrame(language_data)\n", - "\n", - "# Aggregate data by language and repo\n", - "language_counts_bytes = df_languages.groupby(['language', 'repo']).agg({\n", - " 'bytes_of_code': 'sum'\n", - "}).reset_index()\n", - "language_counts_repos = df_languages.groupby(['language', 'repo']).size().reset_index(name='repo_count')\n", - "\n", - "def create_language_figures(counts: pd.DataFrame, path_key: str, value_key: str):\n", - " _fig_treemap = px.treemap(\n", - " counts,\n", - " path=[path_key, 'repo'],\n", - " values=value_key,\n", - " )\n", - " _fig_sunburst = px.sunburst(\n", - " counts,\n", - " path=[path_key, 'repo'],\n", - " values=value_key,\n", - " )\n", - " return _fig_treemap, _fig_sunburst\n", - "\n", - "# List of tuples containing the data and titles for each figure\n", - "figures_data = [\n", - " (language_counts_bytes, 'language', 'bytes_of_code', 'Programming Languages by Bytes of Code'),\n", - " (language_counts_repos, 'language', 'repo_count', 'Programming Languages by Repo Count')\n", - "]\n", - "\n", - "# Loop through the list to create figures and add traces\n", - "for _counts, _path_key, value_key, title in figures_data:\n", - " fig_treemap, fig_sunburst = create_language_figures(counts=_counts, path_key=_path_key, value_key=value_key)\n", - "\n", - " fig = go.Figure()\n", - " fig.add_trace(fig_treemap.data[0])\n", - " fig.add_trace(fig_sunburst.data[0])\n", - " fig.data[1].visible = False\n", - "\n", - " fig.update_layout(\n", - " title=title,\n", - " updatemenus=[\n", - " {\n", - " \"buttons\": [\n", - " {\n", - " \"label\": \"Treemap\",\n", - " \"method\": \"update\",\n", - " \"args\": [\n", - " {\"visible\": [True, False]},\n", - " ],\n", - " },\n", - " {\n", - " \"label\": \"Sunburst\",\n", - " \"method\": \"update\",\n", - " \"args\": [\n", - " {\"visible\": [False, True]},\n", - " ],\n", - " },\n", - " ],\n", - " \"direction\": \"down\",\n", - " \"showactive\": True,\n", - " }\n", - " ]\n", - " )\n", - " fig.show()" + "dashboard.show_language_data()" ] }, { @@ -686,23 +287,6 @@ "### Documentation" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7ba218e5863deb7", - "metadata": {}, - "outputs": [], - "source": [ - "# Docs data\n", - "docs_data = []\n", - "for repo in df_repos.to_dict('records'):\n", - " docs_data.append({\n", - " \"repo\": repo['repo'],\n", - " \"has_readme\": repo['has_readme'],\n", - " \"has_readthedocs\": repo['has_readthedocs'],\n", - " })" - ] - }, { "cell_type": "code", "execution_count": null, @@ -711,60 +295,7 @@ "outputs": [], "source": [ "# Docs\n", - "df_docs = pd.DataFrame(docs_data)\n", - "readme_counts = df_docs.groupby(['has_readme', 'repo']).size().reset_index(name='repo_count')\n", - "readthedocs_counts = df_docs.groupby(['has_readthedocs', 'repo']).size().reset_index(name='repo_count')\n", - "\n", - "def create_figures(counts: pd.DataFrame, path_key: str):\n", - " _fig_treemap = px.treemap(\n", - " counts,\n", - " path=[path_key, 'repo'],\n", - " values='repo_count',\n", - " )\n", - " _fig_sunburst = px.sunburst(\n", - " counts,\n", - " path=[path_key, 'repo'],\n", - " values='repo_count',\n", - " )\n", - " return _fig_treemap, _fig_sunburst\n", - "\n", - "# List of tuples containing the data and titles for each figure\n", - "figures_data = [\n", - " (readme_counts, 'has_readme', 'Has README file'),\n", - " (readthedocs_counts, 'has_readthedocs', 'Uses ReadTheDocs')\n", - "]\n", - "\n", - "# Loop through the list to create figures and add traces\n", - "for _counts, _path_key, title in figures_data:\n", - " fig_treemap, fig_sunburst = create_figures(counts=_counts, path_key=_path_key)\n", - "\n", - " fig = go.Figure()\n", - " fig.add_trace(fig_treemap.data[0])\n", - " fig.add_trace(fig_sunburst.data[0])\n", - " fig.data[1].visible = False\n", - "\n", - " fig.update_layout(\n", - " title=title,\n", - " updatemenus=[\n", - " {\n", - " \"buttons\": [\n", - " {\n", - " \"label\": \"Treemap\",\n", - " \"method\": \"update\",\n", - " \"args\": [{\"visible\": [True, False]}],\n", - " },\n", - " {\n", - " \"label\": \"Sunburst\",\n", - " \"method\": \"update\",\n", - " \"args\": [{\"visible\": [False, True]}],\n", - " },\n", - " ],\n", - " \"direction\": \"down\",\n", - " \"showactive\": True,\n", - " }\n", - " ]\n", - " )\n", - " fig.show()" + "dashboard.show_docs_data()" ] } ], diff --git a/src/__init__.py b/src/__init__.py index e69de29bb..8ef445a3b 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,10 @@ +# standard imports +import os + +# lib imports +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +BASE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'gh-pages') diff --git a/src/dashboard.py b/src/dashboard.py new file mode 100644 index 000000000..59ab9b6a1 --- /dev/null +++ b/src/dashboard.py @@ -0,0 +1,554 @@ +# standard imports +import json +import os + +# lib imports +from github import Github, PaginatedList, UnknownObjectException +from IPython.display import HTML, display +from itables import init_notebook_mode, show +import numpy as np +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import plotly.io as pio + +# local imports +from src import BASE_DIR +from src import updater + +# Authenticate with GitHub +token = os.getenv("GITHUB_TOKEN") +g = Github(token) + +# set the default plotly template +pio.templates.default = "plotly_dark" + +# Fetch repository data +org_name = "LizardByte" +org = g.get_organization(org_name) + +# constants +text_template = '%{text}' + + +# globals +updated = False +repos = [] +df_all_repos = pd.DataFrame() +df_repos = pd.DataFrame() +df_pr_details = pd.DataFrame() +df_language_data = pd.DataFrame() +df_docs_data = pd.DataFrame() + + +def init(): + global updated + if not updated: + updater.update() + updated = True + + +def get_repos() -> PaginatedList: + global repos + repos = org.get_repos() + return repos + + +def get_repo_data() -> pd.DataFrame: + global df_all_repos + + if not repos: + get_repos() + + # all readthedocs projects + readthedocs_path = os.path.join(BASE_DIR, 'readthedocs', 'projects.json') + with open(readthedocs_path, 'r') as f: + readthedocs_data = json.load(f) + + repo_data = [] + for repo in repos: + # get license + license_name = repo.license.name if repo.license else "No License" + + # split open issues and PRs + open_issues = repo.get_issues(state='open') + open_prs = [issue for issue in open_issues if issue.pull_request is not None] + open_issues = [issue for issue in open_issues if issue.pull_request is None] + + # coverage data + coverage = 0 + try: + with open(os.path.join(BASE_DIR, 'codecov', f'{repo.name}.json')) as f: + coverage_data = json.load(f) + coverage = coverage_data['totals']['coverage'] + except Exception: + pass + + # readthedocs data + readthedocs_project = None + for project in readthedocs_data: + if project['repository']['url'] == repo.clone_url: + readthedocs_project = project + + # has README.md or README.rst + # check if the repo has a README.md or README.rst + readme_file = None + try: + readme_file = repo.get_readme() + except UnknownObjectException: + pass + + repo_data.append({ + "repo": repo.name, + "stars": repo.stargazers_count, + "archived": repo.archived, + "fork": repo.fork, + "forks": repo.forks_count, + "issues": open_issues, + "topics": repo.get_topics(), + "languages": repo.get_languages(), + "license": license_name, + "prs": open_prs, + "created_at": repo.created_at, + "updated_at": repo.updated_at, + "coverage": coverage, + "readthedocs": readthedocs_project, + "has_readthedocs": readthedocs_project is not None, + "has_readme": readme_file is not None, + "_repo": repo, + }) + + df_all_repos = pd.DataFrame(repo_data) + return df_all_repos + + +def get_df_repos() -> pd.DataFrame: + global df_repos + + if df_all_repos.empty: + get_repo_data() + + df_repos = df_all_repos[ + (~df_all_repos['archived']) & + (~df_all_repos['topics'].apply(lambda topics: 'package-manager' in topics)) + ] + return df_repos + + +def show_star_gazers(): + if df_repos.empty: + get_df_repos() + + df_stars = df_repos.sort_values( + by='stars', + ascending=False, + ) + df_stars['log_stars'] = np.log1p(df_stars['stars']) + fig = px.bar( + df_stars, + x='repo', + y='log_stars', + title='Stars', + text='stars', + ) + fig.update_traces( + texttemplate=text_template, + textposition='inside', + ) + fig.update_layout( + yaxis_title=None, + yaxis_showticklabels=False, + ) + fig.show() + + +def get_stargazer_data() -> list: + if df_repos.empty: + get_df_repos() + + stargazer_data = [] + for repo in df_repos.to_dict('records'): + stargazers = repo['_repo'].get_stargazers_with_dates() + for stargazer in stargazers: + stargazer_data.append({ + "repo": repo['repo'], + "date": stargazer.starred_at, + }) + + return stargazer_data + + +def show_star_history(): + df_stargazers = pd.DataFrame(get_stargazer_data()) + df_stargazers = df_stargazers.sort_values(by="date") + df_stargazers["cumulative_stars"] = df_stargazers.groupby("repo").cumcount() + 1 + + fig = px.line( + df_stargazers, + x="date", + y="cumulative_stars", + color="repo", + title="Star History", + labels={"date": "Date", "cumulative_stars": "Cumulative Stars"}, + ) + fig.show() + + +def show_forks(): + if df_repos.empty: + get_df_repos() + + df_forks = df_repos.sort_values( + by='forks', + ascending=False, + ) + df_forks['log_forks'] = np.log1p(df_forks['forks']) + fig = px.bar( + df_forks, + x='repo', + y='log_forks', + title='Forks', + text='forks', + ) + fig.update_traces( + texttemplate=text_template, + textposition='inside', + ) + fig.update_layout( + yaxis_title=None, + yaxis_showticklabels=False, + ) + fig.show() + + +def show_issues(): + if df_repos.empty: + get_df_repos() + + df_issues = df_repos.copy() + df_issues['issue_count'] = df_issues['issues'].apply(len) + df_issues = df_issues.sort_values(by='issue_count', ascending=False) + df_issues['log_issues'] = np.log1p(df_issues['issue_count']) + fig = px.bar( + df_issues, + x='repo', + y='log_issues', + title='Open Issues', + text='issue_count', + ) + fig.update_traces( + texttemplate=text_template, + textposition='inside', + ) + fig.update_layout( + yaxis_title=None, + yaxis_showticklabels=False, + ) + fig.show() + + +def get_pr_data() -> pd.DataFrame: + global df_pr_details + + if df_repos.empty: + get_df_repos() + + pr_data = [] + for repo in df_repos.to_dict('records'): + for pr in repo['prs']: + pr_details = repo['_repo'].get_pull(pr.number) + + # Check if the PR has been approved + reviews = pr_details.get_reviews() + approved = any(review.state == 'APPROVED' for review in reviews) + + # Get the milestone + milestone = pr_details.milestone.title if pr_details.milestone else None + + pr_data.append({ + "repo": repo['repo'], + "number": pr_details.number, + "title": pr_details.title, + "author": pr_details.user.login, + "labels": [label.name for label in pr_details.labels], + "assignees": [assignee.login for assignee in pr_details.assignees], + "created_at": pr_details.created_at, + "last_activity": pr_details.updated_at, + "status": "Draft" if pr_details.draft else "Ready", + "approved": approved, + "milestone": milestone, + }) + + df_pr_details = pd.DataFrame(pr_data) + return df_pr_details + + +def show_pr_graph(): + if df_pr_details.empty: + get_pr_data() + + # Group by repository and status to get the count of PRs + df_pr_counts = df_pr_details.groupby(['repo', 'status']).size().reset_index(name='pr_count') + + # Sort repositories by total PR count + df_pr_counts['total_prs'] = df_pr_counts.groupby('repo')['pr_count'].transform('sum') + df_pr_counts = df_pr_counts.sort_values(by='total_prs', ascending=False) + + # Create Stacked Bar Chart + fig_bar = px.bar( + df_pr_counts, + x='repo', + y='pr_count', + color='status', + title='Open Pull Requests', + labels={'pr_count': 'Count of PRs', 'repo': 'Repository', 'status': 'PR Status'}, + text='pr_count', + category_orders={'repo': df_pr_counts['repo'].tolist()}, + ) + + fig_bar.update_layout( + yaxis_title='Open PRs', + xaxis_title='Repository', + ) + fig_bar.update_traces( + texttemplate=text_template, + textposition='inside', + ) + fig_bar.show() + + +def show_pr_table(): + if df_pr_details.empty: + get_pr_data() + + # darken the column filter inputs + css = """ + .dt-column-title input[type="text"] { + background-color: var(--jp-layout-color0); + border-color: rgb(64,67,70); + border-width: 1px; + color: var(--jp-ui-font-color1); + } + """ + display(HTML(f"")) + + init_notebook_mode( + all_interactive=True, + connected=False, + ) + + # Display the DataFrame as an interactive table using itables + table_download_name = "LizardByte-Pull-Requests" + show( + df_pr_details, + buttons=[ + "pageLength", + "copyHtml5", + {"extend": "csvHtml5", "title": table_download_name}, + {"extend": "excelHtml5", "title": table_download_name}, + ], + classes="display compact", + column_filters="header", + header=True, + layout={"topEnd": None}, + ) + + +def show_license_distribution(): + if df_repos.empty: + get_df_repos() + + license_counts = df_repos.groupby(['license', 'repo']).size().reset_index(name='count') + + fig_treemap = px.treemap( + license_counts, + path=['license', 'repo'], + values='count', + title='License Distribution', + hover_data={'repo': True, 'count': False}, + ) + fig_treemap.show() + + +def show_coverage(): + if df_repos.empty: + get_df_repos() + + df_coverage = df_repos.sort_values( + by='coverage', + ascending=False, + ) + + # inverse marker size, so higher coverage has smaller markers + df_coverage['marker_size'] = df_coverage['coverage'].apply(lambda x: 110 - x if x > 0 else 0) + + fig_scatter = px.scatter( + df_coverage, + x='repo', + y='coverage', + title='Coverage Percentage', + size='marker_size', + color='coverage', + color_continuous_scale=['red', 'yellow', 'green'], # red is low, green is high + ) + fig_scatter.update_layout( + yaxis_title='Coverage Percentage', + xaxis_title='Repository', + ) + fig_scatter.show() + + +def get_language_data(): + global df_language_data + + language_data = [] + for repo in df_repos.to_dict('records'): + for language, bytes_of_code in repo['languages'].items(): + language_data.append({ + "repo": repo['repo'], + "language": language, + "bytes_of_code": bytes_of_code, + }) + + df_language_data = pd.DataFrame(language_data) + return df_language_data + + +def show_language_data(): + if df_language_data.empty: + get_language_data() + + # Aggregate data by language and repo + language_counts_bytes = df_language_data.groupby(['language', 'repo']).agg({ + 'bytes_of_code': 'sum' + }).reset_index() + language_counts_repos = df_language_data.groupby(['language', 'repo']).size().reset_index(name='repo_count') + + def create_language_figures(counts: pd.DataFrame, path_key: str, value_key: str): + _fig_treemap = px.treemap( + counts, + path=[path_key, 'repo'], + values=value_key, + ) + _fig_sunburst = px.sunburst( + counts, + path=[path_key, 'repo'], + values=value_key, + ) + return _fig_treemap, _fig_sunburst + + # List of tuples containing the data and titles for each figure + figures_data = [ + (language_counts_bytes, 'language', 'bytes_of_code', 'Programming Languages by Bytes of Code'), + (language_counts_repos, 'language', 'repo_count', 'Programming Languages by Repo Count') + ] + + # Loop through the list to create figures and add traces + for _counts, _path_key, value_key, title in figures_data: + fig_treemap, fig_sunburst = create_language_figures(counts=_counts, path_key=_path_key, value_key=value_key) + + fig = go.Figure() + fig.add_trace(fig_treemap.data[0]) + fig.add_trace(fig_sunburst.data[0]) + fig.data[1].visible = False + + fig.update_layout( + title=title, + updatemenus=[ + { + "buttons": [ + { + "label": "Treemap", + "method": "update", + "args": [ + {"visible": [True, False]}, + ], + }, + { + "label": "Sunburst", + "method": "update", + "args": [ + {"visible": [False, True]}, + ], + }, + ], + "direction": "down", + "showactive": True, + } + ] + ) + fig.show() + + +def get_docs_data(): + global df_docs_data + + docs_data = [] + for repo in df_repos.to_dict('records'): + docs_data.append({ + "repo": repo['repo'], + "has_readme": repo['has_readme'], + "has_readthedocs": repo['has_readthedocs'], + }) + + df_docs_data = pd.DataFrame(docs_data) + return df_docs_data + + +def show_docs_data(): + if df_docs_data.empty: + get_docs_data() + + readme_counts = df_docs_data.groupby(['has_readme', 'repo']).size().reset_index(name='repo_count') + readthedocs_counts = df_docs_data.groupby(['has_readthedocs', 'repo']).size().reset_index(name='repo_count') + + def create_figures(counts: pd.DataFrame, path_key: str): + _fig_treemap = px.treemap( + counts, + path=[path_key, 'repo'], + values='repo_count', + ) + _fig_sunburst = px.sunburst( + counts, + path=[path_key, 'repo'], + values='repo_count', + ) + return _fig_treemap, _fig_sunburst + + # List of tuples containing the data and titles for each figure + figures_data = [ + (readme_counts, 'has_readme', 'Has README file'), + (readthedocs_counts, 'has_readthedocs', 'Uses ReadTheDocs') + ] + + # Loop through the list to create figures and add traces + for _counts, _path_key, title in figures_data: + fig_treemap, fig_sunburst = create_figures(counts=_counts, path_key=_path_key) + + fig = go.Figure() + fig.add_trace(fig_treemap.data[0]) + fig.add_trace(fig_sunburst.data[0]) + fig.data[1].visible = False + + fig.update_layout( + title=title, + updatemenus=[ + { + "buttons": [ + { + "label": "Treemap", + "method": "update", + "args": [{"visible": [True, False]}], + }, + { + "label": "Sunburst", + "method": "update", + "args": [{"visible": [False, True]}], + }, + ], + "direction": "down", + "showactive": True, + } + ] + ) + fig.show() diff --git a/src/helpers.py b/src/helpers.py new file mode 100644 index 000000000..9dba3ee5a --- /dev/null +++ b/src/helpers.py @@ -0,0 +1,86 @@ +# standard imports +import json +import os +import pathlib +from typing import Union + +# lib imports +import cloudscraper +from PIL import Image +from requests.adapters import HTTPAdapter + + +# setup requests session +s = cloudscraper.CloudScraper() # CloudScraper inherits from requests.Session +retry_adapter = HTTPAdapter(max_retries=5) +s.mount('https://', retry_adapter) + + +def debug_print( + *values: object, + sep: Union[str, None] = ' ', + end: Union[str, None] = '\n', +): + if os.getenv('ACTIONS_RUNNER_DEBUG') or os.getenv('ACTIONS_STEP_DEBUG'): + print(*values, sep=sep, end=end) + + +def save_image_from_url(file_path: str, file_extension: str, image_url: str, size_x: int = 0, size_y: int = 0): + """ + Write image data to file. If ``size_x`` and ``size_y`` are both supplied, a resized image will also be saved. + + Parameters + ---------- + file_path : str + The file path to save the file at. + file_extension : str + The extension of the file name. + image_url : str + The image url. + size_x : int + The ``x`` dimension to resize the image to. If used, ``size_y`` must also be defined. + size_y : int + The ``y`` dimension to resize the image to. If used, ``size_x`` must also be defined. + """ + debug_print(f'Saving image from {image_url}') + # determine the directory + directory = os.path.dirname(file_path) + + pathlib.Path(directory).mkdir(parents=True, exist_ok=True) + + og_img_data = s.get(url=image_url).content + + file_name_with_ext = f'{file_path}.{file_extension}' + with open(file_name_with_ext, 'wb') as handler: + handler.write(og_img_data) + + # resize the image + if size_x and size_y: + pil_img_data = Image.open(file_name_with_ext) + resized_img_data = pil_img_data.resize((size_x, size_y)) + resized_img_data.save(fp=f'{file_path}_{size_x}x{size_y}.{file_extension}') + + +def write_json_files(file_path: str, data: any): + """ + Write dictionary to JSON file. + + Parameters + ---------- + file_path : str + The file path to save the file at, excluding the file extension which will be `.json` + data + The dictionary data to write in the JSON file. + """ + debug_print(f'Writing json file at {file_path}') + # determine the directory + directory = os.path.dirname(file_path) + + pathlib.Path(directory).mkdir(parents=True, exist_ok=True) + + with open(f'{file_path}.json', 'w') as f: + json.dump( + obj=data, + fp=f, + indent=4 if os.getenv('ACTIONS_RUNNER_DEBUG') or os.getenv('ACTIONS_STEP_DEBUG') else None, + ) diff --git a/src/update.py b/src/update.py deleted file mode 100644 index b595333e9..000000000 --- a/src/update.py +++ /dev/null @@ -1,542 +0,0 @@ -# standard imports -import argparse -import json -import os -import pathlib -from threading import Thread -from typing import Union - -# lib imports -import cloudscraper -from crowdin_api import CrowdinClient -from dotenv import load_dotenv -from PIL import Image -import requests -from requests.adapters import HTTPAdapter -import svgwrite -from tqdm import tqdm -import unhandled_exit - -# setup environment if running locally -load_dotenv() - -# setup threading exception handling -unhandled_exit.activate() - -# setup requests session -s = cloudscraper.CloudScraper() # CloudScraper inherits from requests.Session -retry_adapter = HTTPAdapter(max_retries=5) -s.mount('https://', retry_adapter) - -# constants -BASE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'gh-pages') - - -# custom print -def debug_print( - *values: object, - sep: Union[str, None] = ' ', - end: Union[str, None] = '\n', -): - if os.getenv('ACTIONS_RUNNER_DEBUG') or os.getenv('ACTIONS_STEP_DEBUG'): - print(*values, sep=sep, end=end) - - -def save_image_from_url(file_path: str, file_extension: str, image_url: str, size_x: int = 0, size_y: int = 0): - """ - Write image data to file. If ``size_x`` and ``size_y`` are both supplied, a resized image will also be saved. - - Parameters - ---------- - file_path : str - The file path to save the file at. - file_extension : str - The extension of the file name. - image_url : str - The image url. - size_x : int - The ``x`` dimension to resize the image to. If used, ``size_y`` must also be defined. - size_y : int - The ``y`` dimension to resize the image to. If used, ``size_x`` must also be defined. - """ - debug_print(f'Saving image from {image_url}') - # determine the directory - directory = os.path.dirname(file_path) - - pathlib.Path(directory).mkdir(parents=True, exist_ok=True) - - og_img_data = s.get(url=image_url).content - - file_name_with_ext = f'{file_path}.{file_extension}' - with open(file_name_with_ext, 'wb') as handler: - handler.write(og_img_data) - - # resize the image - if size_x and size_y: - pil_img_data = Image.open(file_name_with_ext) - resized_img_data = pil_img_data.resize((size_x, size_y)) - resized_img_data.save(fp=f'{file_path}_{size_x}x{size_y}.{file_extension}') - - -def write_json_files(file_path: str, data: any): - """ - Write dictionary to json file. - - Parameters - ---------- - file_path : str - The file path to save the file at, excluding the file extension which will be `.json` - data - The dictionary data to write in the json file. - """ - debug_print(f'Writing json file at {file_path}') - # determine the directory - directory = os.path.dirname(file_path) - - pathlib.Path(directory).mkdir(parents=True, exist_ok=True) - - with open(f'{file_path}.json', 'w') as f: - json.dump(obj=data, fp=f, indent=args.indent) - - -def update_aur(): - """ - Cache and update data from aur API. - """ - aur_base_url = 'https://aur.archlinux.org/rpc?v=5&type=info&arg=' - aur_repos = ['sunshine'] - - for repo in tqdm( - iterable=aur_repos, - desc='Updating AUR data', - ): - url = f'{aur_base_url}{repo}' - response = s.get(url=url) - data = response.json() - - file_path = os.path.join(BASE_DIR, 'aur', repo) - write_json_files(file_path=file_path, data=data) - - -def update_codecov(): - """ - Get code coverage data from Codecov API. - """ - headers = dict( - Accept='application/json', - Authorization=f'bearer {args.codecov_token}', - ) - base_url = f'https://codecov.io/api/v2/gh/{args.github_repository_owner}' - - url = f'{base_url}/repos?page_size=500' - - response = s.get(url=url, headers=headers) - data = response.json() - assert data['next'] is None, 'More than 500 repos found, need to implement pagination.' - - for repo in tqdm( - iterable=data['results'], - desc='Updating Codecov data', - ): - url = f'{base_url}/repos/{repo["name"]}' - response = s.get(url=url, headers=headers) - data = response.json() - - file_path = os.path.join(BASE_DIR, 'codecov', repo['name']) - write_json_files(file_path=file_path, data=data) - - -def update_crowdin(): - """ - Cache and update data from Crowdin API, and generate completion graph. - """ - client = CrowdinClient(token=args.crowdin_token) - - # automatically collect crowdin projects - project_data = client.projects.list_projects()['data'] - - for project in tqdm( - iterable=project_data, - desc='Updating Crowdin data', - ): - project_name = project['data']['name'] - project_id = project['data']['id'] - data = client.translation_status.get_project_progress(projectId=project_id)['data'] - file_path = os.path.join(BASE_DIR, 'crowdin', project_name.replace(' ', '_')) - write_json_files(file_path=file_path, data=data) - - # sort data by approval progress first, then translation progress, then alphabetically - data.sort(key=lambda x: ( - -x['data']['approvalProgress'], - -x['data']['translationProgress'], - x['data']['language']['name'] - ), reverse=False) - - # ensure "en" is first, if it exists - try: - en_index = [x['data']['language']['id'] for x in data].index('en') - except ValueError: - pass - else: - data.insert(0, data.pop(en_index)) - - # generate translation and approval completion graph - line_height = 32 - bar_height = 16 - svg_width = 500 - label_width = 200 - progress_width = 160 - insert = 12 - bar_corner_radius = 0 - - dwg = svgwrite.Drawing(filename=f'{file_path}_graph.svg', size=(svg_width, len(data) * line_height)) - - # load css font - dwg.embed_stylesheet(""" - @import url(https://fonts.googleapis.com/css?family=Open+Sans); - .svg-font { - font-family: "Open Sans"; - font-size: 12px; - fill: #999; - } - """) - for lang_base in tqdm( - iterable=data, - desc=f'Generating Crowdin graph for project: {project_name}', - ): - language = lang_base['data'] - g = dwg.add(dwg.g( - class_="svg-font", - transform='translate(0,{})'.format(data.index(lang_base) * line_height) - )) - g.add(dwg.text( - f"{language['language']['name']} ({language['language']['id']})", - insert=(label_width, 18), - style='text-anchor:end;') - ) - - translation_progress = language['translationProgress'] / 100.0 - approval_progress = language['approvalProgress'] / 100.0 - - progress_insert = (label_width + insert, 6) - if translation_progress < 100: - g.add(dwg.rect( - insert=progress_insert, - size=(progress_width, bar_height), - rx=bar_corner_radius, - ry=bar_corner_radius, - fill='#999', - style='filter:opacity(30%);') - ) - if translation_progress > 0 and approval_progress < 100: - g.add(dwg.rect( - insert=progress_insert, - size=(progress_width * translation_progress, bar_height), - rx=bar_corner_radius, - ry=bar_corner_radius, - fill='#5D89C3') - ) - if approval_progress > 0: - g.add(dwg.rect( - insert=progress_insert, - size=(progress_width * approval_progress, bar_height), - rx=bar_corner_radius, - ry=bar_corner_radius, - fill='#71C277') - ) - - g.add(dwg.text('{}%'.format(language['translationProgress']), - insert=(progress_insert[0] + progress_width + insert, bar_height))) - - # write the svg file - dwg.save(pretty=True) - - -def update_discord(): - """ - Cache and update data from Discord API. - """ - discord_urls = [ - f'https://discordapp.com/api/invites/{args.discord_invite}?with_counts=true', - ] - - for discord_url in tqdm( - iterable=discord_urls, - desc='Updating Discord data', - ): - response = s.get(url=discord_url) - data = response.json() - - file_path = os.path.join(BASE_DIR, 'discord', 'invite') - write_json_files(file_path=file_path, data=data) - - -def update_fb(): - """ - Get number of Facebook page likes and group members. - """ - fb_base_url = 'https://graph.facebook.com/' - - fb_endpoints = dict( - group=f'{args.facebook_group_id}?fields=member_count,name,description&access_token={args.facebook_token}', - page=f'{args.facebook_page_id}/insights?metric=page_fans&access_token={args.facebook_token}' - ) - - for key, value in tqdm( - iterable=fb_endpoints.items(), - desc='Updating Facebook data', - ): - url = f'{fb_base_url}/{value}' - response = requests.get(url=url) - - data = response.json() - try: - data['paging'] - except KeyError: - pass - else: - # remove facebook token from data - del data['paging'] - - file_path = os.path.join(BASE_DIR, 'facebook', key) - write_json_files(file_path=file_path, data=data) - - -def update_github(): - """ - Cache and update GitHub Repo banners. - """ - url = f'https://api.github.com/users/{args.github_repository_owner}/repos' - per_page = 100 - repos = [] - - headers = dict( - accept='application/vnd.github.v3+json', - ) - - query_params = dict( - per_page=per_page, - page=1, - ) - - while True: - response = s.get( - url=url, - headers=headers, - params=query_params, - ) - response_data = response.json() - repos.extend(response_data) - - if len(response_data) < per_page: - break - - query_params['page'] += 1 - - file_path = os.path.join(BASE_DIR, 'github', 'repos') - write_json_files(file_path=file_path, data=repos) - - headers = dict( - Authorization=f'token {args.github_token}' - ) - url = 'https://api.github.com/graphql' - - for repo in tqdm( - iterable=repos, - desc='Updating GitHub data', - ): - # languages - response = s.get(url=repo['languages_url'], headers=headers) - # if TypeError, API limit has likely been exceeded or possible issue with GitHub API... - # https://www.githubstatus.com/ - # do not error handle, better that workflow fails - - languages = response.json() - - file_path = os.path.join(BASE_DIR, 'github', 'languages', repo['name']) - write_json_files(file_path=file_path, data=languages) - - # openGraphImages - query = """ - { - repository(owner: "%s", name: "%s") { - openGraphImageUrl - } - } - """ % (repo['owner']['login'], repo['name']) - - response = s.post(url=url, json={'query': query}, headers=headers) - repo_data = response.json() - try: - image_url = repo_data['data']['repository']['openGraphImageUrl'] - except KeyError: - raise SystemExit('"GITHUB_TOKEN" is invalid.') - if 'avatars' not in image_url: - file_path = os.path.join(BASE_DIR, 'github', 'openGraphImages', repo['name']) - save_image_from_url(file_path=file_path, file_extension='png', image_url=image_url, size_x=624, size_y=312) - - -def update_patreon(): - """ - Get patron count from Patreon. - - Patreon id can be obtained in browser developer console using the folliwng javascript: - `window.patreon.bootstrap.campaign.data.id` - """ - patreon_urls = [ - 'https://www.patreon.com/api/campaigns/6131567', - ] - - for patreon_url in tqdm( - iterable=patreon_urls, - desc='Updating Patreon data', - ): - response = s.get(url=patreon_url) - - data = response.json()['data']['attributes'] - - file_path = os.path.join(BASE_DIR, 'patreon', 'LizardByte') - write_json_files(file_path=file_path, data=data) - - -def readthedocs_loop(url: str, file_path: str) -> list: - headers = { - 'Authorization': f'token {args.readthedocs_token}', - 'Accept': 'application/json' - } - - results = [] - - while True: - response = s.get(url=url, headers=headers) - try: - data = response.json() - except requests.exceptions.JSONDecodeError: - break - - try: - results.extend(data['results']) - except KeyError: - pass - - try: - url = data['next'] - except KeyError: - url = None - - if not url: - break - - if results: - write_json_files(file_path=file_path, data=results) - - return results - - -def update_readthedocs(): - """ - Cache and update readthedocs info. - """ - url_base = 'https://readthedocs.org' - url = f'{url_base}/api/v3/projects/' - - file_path = os.path.join(BASE_DIR, 'readthedocs', 'projects') - projects = readthedocs_loop(url=url, file_path=file_path) - - for project in tqdm( - iterable=projects, - desc='Updating Readthedocs data', - ): - git_url = project['repository']['url'] - repo_name = git_url.rsplit('/', 1)[-1].rsplit('.git', 1)[0] - - for link in project['_links']: - if link == 'builds': - continue # skip builds, too much data and too slow - - file_path = os.path.join(BASE_DIR, 'readthedocs', link, repo_name) - - url = project['_links'][link] - readthedocs_loop(url=url, file_path=file_path) - - -def missing_arg(): - parser.print_help() - raise SystemExit(1) - - -if __name__ == '__main__': - # setup arguments using argparse - parser = argparse.ArgumentParser(description="Update github pages.") - parser.add_argument('--codecov_token', type=str, required=False, default=os.getenv('CODECOV_TOKEN'), - help='Codecov API token.') - parser.add_argument('--crowdin_token', type=str, required=False, default=os.getenv('CROWDIN_TOKEN'), - help='Crowdin API token.') - parser.add_argument('--discord_invite', type=str, required=False, default=os.getenv('DISCORD_INVITE'), - help='Discord invite code.') - parser.add_argument('--facebook_group_id', type=str, required=False, default=os.getenv('FACEBOOK_GROUP_ID'), - help='Facebook group ID.') - parser.add_argument('--facebook_page_id', type=str, required=False, default=os.getenv('FACEBOOK_PAGE_ID'), - help='Facebook page ID.') - parser.add_argument('--facebook_token', type=str, required=False, default=os.getenv('FACEBOOK_TOKEN'), - help='Facebook Token, requires `groups_access_member_info`, `read_insights`, and ' - '`pages_read_engagement`. Must be a `page` token, not a `user` token. Token owner must be ' - 'admin of the group.') - parser.add_argument('--github_repository_owner', type=str, required=False, - default=os.getenv('GITHUB_REPOSITORY_OWNER'), - help='GitHub Username. Can use environment variable "GITHUB_REPOSITORY_OWNER"') - parser.add_argument('--github_token', type=str, required=False, default=os.getenv('GITHUB_TOKEN'), - help='GitHub Token, no scope selection is necessary. Can use environment variable ' - '"GITHUB_TOKEN"') - parser.add_argument('--readthedocs_token', type=str, required=False, default=os.getenv('READTHEDOCS_TOKEN'), - help='Readthedocs API token. Can use environment variable "READTHEDOCS_TOKEN"') - parser.add_argument('-i', '--indent_json', action='store_true', help='Indent json files.') - - args = parser.parse_args() - args.indent = 4 if args.indent_json else None - - if not args.codecov_token or not args.discord_invite or not args.facebook_group_id or not args.facebook_page_id or \ - not args.facebook_token or not args.github_repository_owner or not args.github_token or \ - not args.readthedocs_token: - missing_arg() - - threads = [ - Thread( - name='aur', - target=update_aur, - ), - Thread( - name='codecov', - target=update_codecov, - ), - Thread( - name='crowdin', - target=update_crowdin, - ), - Thread( - name='discord', - target=update_discord, - ), - Thread( - name='facebook', - target=update_fb, - ), - Thread( - name='github', - target=update_github, - ), - Thread( - name='patreon', - target=update_patreon, - ), - Thread( - name='readthedocs', - target=update_readthedocs, - ), - ] - - for thread in tqdm( - iterable=threads, - desc='Starting threads', - ): - thread.start() diff --git a/src/updater.py b/src/updater.py new file mode 100644 index 000000000..40a0ed995 --- /dev/null +++ b/src/updater.py @@ -0,0 +1,487 @@ +# standard imports +import os +from threading import Thread + +# lib imports +from crowdin_api import CrowdinClient +import requests +import svgwrite +from tqdm import tqdm +import unhandled_exit + +# local imports +from src import BASE_DIR +from src import helpers + + +def update_aur(aur_repos: list): + """ + Cache and update data from aur API. + """ + aur_base_url = 'https://aur.archlinux.org/rpc?v=5&type=info&arg=' + + for repo in tqdm( + iterable=aur_repos, + desc='Updating AUR data', + ): + url = f'{aur_base_url}{repo}' + response = helpers.s.get(url=url) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'aur', repo) + helpers.write_json_files(file_path=file_path, data=data) + + +def update_codecov(): + """ + Get code coverage data from Codecov API. + """ + headers = dict( + Accept='application/json', + Authorization=f'bearer {os.environ["CODECOV_TOKEN"]}', + ) + base_url = f'https://codecov.io/api/v2/gh/{os.environ["GITHUB_REPOSITORY_OWNER"]}' + + url = f'{base_url}/repos?page_size=500' + + response = helpers.s.get(url=url, headers=headers) + data = response.json() + + if response.status_code != 200: + raise requests.exceptions.HTTPError(f'Error: {data["detail"]}') + + assert data['next'] is None, 'More than 500 repos found, need to implement pagination.' + + for repo in tqdm( + iterable=data['results'], + desc='Updating Codecov data', + ): + url = f'{base_url}/repos/{repo["name"]}' + response = helpers.s.get(url=url, headers=headers) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'codecov', repo['name']) + helpers.write_json_files(file_path=file_path, data=data) + + +def sort_crowdin_data(data): + data.sort(key=lambda x: ( + -x['data']['approvalProgress'], + -x['data']['translationProgress'], + x['data']['language']['name'] + ), reverse=False) + + try: + en_index = [x['data']['language']['id'] for x in data].index('en') + except ValueError: + pass + else: + data.insert(0, data.pop(en_index)) + + +def generate_crowdin_svg_graph(data, file_path, project_name): + line_height = 32 + bar_height = 16 + svg_width = 500 + label_width = 200 + progress_width = 160 + insert = 12 + bar_corner_radius = 0 + + dwg = svgwrite.Drawing(filename=f'{file_path}_graph.svg', size=(svg_width, len(data) * line_height)) + + dwg.embed_stylesheet(""" + @import url(https://fonts.googleapis.com/css?family=Open+Sans); + .svg-font { + font-family: "Open Sans"; + font-size: 12px; + fill: #999; + } + """) + + for lang_base in tqdm( + iterable=data, + desc=f'Generating Crowdin graph for project: {project_name}', + ): + language = lang_base['data'] + g = dwg.add(dwg.g( + class_="svg-font", + transform='translate(0,{})'.format(data.index(lang_base) * line_height) + )) + g.add(dwg.text( + f"{language['language']['name']} ({language['language']['id']})", + insert=(label_width, 18), + style='text-anchor:end;') + ) + + translation_progress = language['translationProgress'] / 100.0 + approval_progress = language['approvalProgress'] / 100.0 + + progress_insert = (label_width + insert, 6) + if translation_progress < 100: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#999', + style='filter:opacity(30%);') + ) + if translation_progress > 0 and approval_progress < 100: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width * translation_progress, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#5D89C3') + ) + if approval_progress > 0: + g.add(dwg.rect( + insert=progress_insert, + size=(progress_width * approval_progress, bar_height), + rx=bar_corner_radius, + ry=bar_corner_radius, + fill='#71C277') + ) + + g.add(dwg.text('{}%'.format(language['translationProgress']), + insert=(progress_insert[0] + progress_width + insert, bar_height))) + + dwg.save(pretty=True) + + +def update_crowdin(): + """ + Cache and update data from Crowdin API, and generate completion graph. + """ + client = CrowdinClient(token=os.environ['CROWDIN_TOKEN']) + + project_data = client.projects.list_projects()['data'] + + for project in tqdm( + iterable=project_data, + desc='Updating Crowdin data', + ): + project_name = project['data']['name'] + project_id = project['data']['id'] + data = client.translation_status.get_project_progress(projectId=project_id)['data'] + file_path = os.path.join(BASE_DIR, 'crowdin', project_name.replace(' ', '_')) + helpers.write_json_files(file_path=file_path, data=data) + + sort_crowdin_data(data) + generate_crowdin_svg_graph(data, file_path, project_name) + + +def update_discord(): + """ + Cache and update data from Discord API. + """ + discord_urls = [ + f'https://discordapp.com/api/invites/{os.environ["DISCORD_INVITE"]}?with_counts=true', + ] + + for discord_url in tqdm( + iterable=discord_urls, + desc='Updating Discord data', + ): + response = helpers.s.get(url=discord_url) + data = response.json() + + file_path = os.path.join(BASE_DIR, 'discord', 'invite') + helpers.write_json_files(file_path=file_path, data=data) + + +def update_fb(): + """ + Get the number of Facebook page likes and group members. + """ + fb_base_url = 'https://graph.facebook.com/' + + fb_endpoints = dict() + + if os.getenv('FACEBOOK_GROUP_ID'): + fb_endpoints['group'] = (f'{os.environ["FACEBOOK_GROUP_ID"]}?' + f'fields=member_count,name,description&access_token={os.environ["FACEBOOK_TOKEN"]}') + if os.getenv('FACEBOOK_PAGE_ID'): + fb_endpoints['page'] = (f'{os.environ["FACEBOOK_PAGE_ID"]}/' + f'insights?metric=page_fans&access_token={os.environ["FACEBOOK_TOKEN"]}') + + for key, value in tqdm( + iterable=fb_endpoints.items(), + desc='Updating Facebook data', + ): + url = f'{fb_base_url}/{value}' + response = requests.get(url=url) + + data = response.json() + try: + data['paging'] + except KeyError: + pass + else: + # remove facebook token from data + del data['paging'] + + file_path = os.path.join(BASE_DIR, 'facebook', key) + helpers.write_json_files(file_path=file_path, data=data) + + +def update_github(): + """ + Cache and update GitHub Repo banners and data. + """ + url = f'https://api.github.com/users/{os.environ["GITHUB_REPOSITORY_OWNER"]}/repos' + per_page = 100 + repos = [] + + headers = dict( + accept='application/vnd.github.v3+json', + ) + + query_params = dict( + per_page=per_page, + page=1, + ) + + while True: + response = helpers.s.get( + url=url, + headers=headers, + params=query_params, + ) + response_data = response.json() + repos.extend(response_data) + + if len(response_data) < per_page: + break + + query_params['page'] += 1 + + file_path = os.path.join(BASE_DIR, 'github', 'repos') + helpers.write_json_files(file_path=file_path, data=repos) + + headers = dict( + Authorization=f'token {os.environ["GITHUB_TOKEN"]}', + ) + url = 'https://api.github.com/graphql' + + for repo in tqdm( + iterable=repos, + desc='Updating GitHub data', + ): + # languages + response = helpers.s.get(url=repo['languages_url'], headers=headers) + # if TypeError, API limit has likely been exceeded or possible issue with GitHub API... + # https://www.githubstatus.com/ + # do not error handle, better that workflow fails + + languages = response.json() + + file_path = os.path.join(BASE_DIR, 'github', 'languages', repo['name']) + helpers.write_json_files(file_path=file_path, data=languages) + + # openGraphImages + query = """ + { + repository(owner: "%s", name: "%s") { + openGraphImageUrl + } + } + """ % (repo['owner']['login'], repo['name']) + + response = helpers.s.post(url=url, json={'query': query}, headers=headers) + repo_data = response.json() + try: + image_url = repo_data['data']['repository']['openGraphImageUrl'] + except KeyError: + raise SystemExit('"GITHUB_TOKEN" is invalid.') + if 'avatars' not in image_url: + file_path = os.path.join(BASE_DIR, 'github', 'openGraphImages', repo['name']) + helpers.save_image_from_url( + file_path=file_path, + file_extension='png', + image_url=image_url, + size_x=624, + size_y=312, + ) + + +def update_patreon(): + """ + Get patron count from Patreon. + + Patreon id can be obtained in browser developer console using the following javascript: + `window.patreon.bootstrap.campaign.data.id` + """ + patreon_urls = [ + F'https://www.patreon.com/api/campaigns/{os.environ["PATREON_CAMPAIGN_ID"]}', + ] + + for patreon_url in tqdm( + iterable=patreon_urls, + desc='Updating Patreon data', + ): + response = helpers.s.get(url=patreon_url) + + data = response.json()['data']['attributes'] + + file_path = os.path.join(BASE_DIR, 'patreon', 'LizardByte') + helpers.write_json_files(file_path=file_path, data=data) + + +def readthedocs_loop(url: str, file_path: str) -> list: + headers = { + 'Authorization': f'token {os.environ["READTHEDOCS_TOKEN"]}', + 'Accept': 'application/json' + } + + results = [] + + while True: + response = helpers.s.get(url=url, headers=headers) + try: + data = response.json() + except requests.exceptions.JSONDecodeError: + break + + try: + results.extend(data['results']) + except KeyError: + pass + + try: + url = data['next'] + except KeyError: + url = None + + if not url: + break + + if results: + helpers.write_json_files(file_path=file_path, data=results) + + return results + + +def update_readthedocs(): + """ + Cache and update readthedocs info. + """ + url_base = 'https://readthedocs.org' + url = f'{url_base}/api/v3/projects/' + + file_path = os.path.join(BASE_DIR, 'readthedocs', 'projects') + projects = readthedocs_loop(url=url, file_path=file_path) + + for project in tqdm( + iterable=projects, + desc='Updating Readthedocs data', + ): + git_url = project['repository']['url'] + repo_name = git_url.rsplit('/', 1)[-1].rsplit('.git', 1)[0] + + for link in project['_links']: + if link == 'builds': + continue # skip builds, too much data and too slow + + file_path = os.path.join(BASE_DIR, 'readthedocs', link, repo_name) + + url = project['_links'][link] + readthedocs_loop(url=url, file_path=file_path) + + +def update(): + threads = [] + + # AUR repo data + if os.getenv('DASHBOARD_AUR_REPOS'): + aur_repos = os.getenv('DASHBOARD_AUR_REPOS').split(',') + threads.append( + Thread( + name='aur', + target=update_aur, + kwargs=dict(aur_repos=aur_repos), + ) + ) + + # Codecov data + if os.getenv('CODECOV_TOKEN') and os.getenv('GITHUB_REPOSITORY_OWNER'): + threads.append( + Thread( + name='codecov', + target=update_codecov, + ) + ) + + # Crowdin data + if os.getenv('CROWDIN_TOKEN'): + threads.append( + Thread( + name='crowdin', + target=update_crowdin, + ) + ) + + # Discord data + if os.getenv('DISCORD_INVITE'): + threads.append( + Thread( + name='discord', + target=update_discord, + ) + ) + + # Facebook data + if os.getenv('FACEBOOK_TOKEN') and (os.getenv('FACEBOOK_GROUP_ID') or os.getenv('FACEBOOK_PAGE_ID')): + threads.append( + Thread( + name='facebook', + target=update_fb, + ) + ) + + # GitHub data + if os.getenv('GITHUB_TOKEN') and os.getenv('GITHUB_REPOSITORY_OWNER'): + threads.append( + Thread( + name='github', + target=update_github, + ) + ) + + # Patreon data + if os.getenv('PATREON_CAMPAIGN_ID'): + threads.append( + Thread( + name='patreon', + target=update_patreon, + ) + ) + + # Readthedocs data + if os.getenv('READTHEDOCS_TOKEN'): + threads.append( + Thread( + name='readthedocs', + target=update_readthedocs, + ) + ) + + # setup threading exception handling + unhandled_exit.activate() + + for thread in tqdm( + iterable=threads, + desc='Starting threads', + ): + thread.start() + + # wait for all threads to finish + for thread in tqdm( + iterable=threads, + desc='Waiting for threads to finish', + ): + thread.join() + + # deactivate threading exception handling + unhandled_exit.deactivate()