Skip to content

Commit

Permalink
use github token for api calls; fix auto pr (#4)
Browse files Browse the repository at this point in the history
* add auth to session header

* update push branch name

* minor change

* change back to init

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug
  • Loading branch information
RemyLau authored Jan 31, 2024
1 parent 33f935a commit 0ce3146
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 29 deletions.
29 changes: 15 additions & 14 deletions .github/workflows/update_software_info.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,34 @@ jobs:
with:
python-version: '3.10'

- name: Install dependencies
run: |
pip install pip -U
pip install -r requirements.txt
- name: Checkout a new branch
- name: Setup branch name and gh config
run: |
TIMESTAMP=$(date -u "+%Y-%m-%d")
BRANCH_NAME="update-software-info-${TIMESTAMP}"
git checkout -b $BRANCH_NAME
- name: Obtain latest software info summary
run: python src/software_info.py
# Export to env so that they can be accessed later
echo "TIMESTAMP=$TIMESTAMP" >> $GITHUB_ENV
echo "BRANCH_NAME=$BRANCH_NAME" >> $GITHUB_ENV
- name: Commit and push changes
run: |
git config --global user.name 'github-actions'
git config --global user.email '[email protected]'
git add software_info
git commit -m "[${TIMESTAMP}] update software info"
git push origin HEAD
- name: Install dependencies
run: |
pip install pip -U
pip install -r requirements.txt
- name: Obtain latest software info summary
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python src/software_info.py

- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
branch: ${{ env.BRANCH_NAME }}
commit-message: "[${{ env.TIMESTAMP }}] update software info"
title: "Automated software info update"
body: "This is an auto-generated PR with updates to the software info."
draft: false
30 changes: 18 additions & 12 deletions src/software_info.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import base64
import os
import re
import warnings
from typing import Dict, Union

import jinja2
import pandas as pd
import requests
from tqdm import tqdm

from config import GITHUB_API_URL, GITHUB_ORG, HOMEDIR, PYPISTATS_API_URL
Expand All @@ -19,12 +21,12 @@ def get_basic_info(repo) -> Dict[str, Union[str, float]]:
}


def get_packge_info(repo_contents) -> Dict[str, Union[str, int]]:
def get_packge_info(repo_contents, session=None) -> Dict[str, Union[str, int]]:
for content in repo_contents:
if content["name"] not in ("setup.cfg", "pyproject.toml"):
continue

file_b64 = try_get_json(content["git_url"])["content"]
file_b64 = try_get_json(content["git_url"], session=session)["content"]
file_text = base64.b64decode(file_b64).decode()

# Try to extract package name
Expand All @@ -41,6 +43,7 @@ def get_packge_info(repo_contents) -> Dict[str, Union[str, int]]:

# Get PyPI stats given the package name
pypi_stats = try_get_json(join_url(PYPISTATS_API_URL, "packages", pkg_name, "recent"))["data"]

return {
"Package name": pkg_name,
"Weekly downloads": int(pypi_stats["last_week"]),
Expand All @@ -50,12 +53,12 @@ def get_packge_info(repo_contents) -> Dict[str, Union[str, int]]:
return {}


def get_zenodo_info(repo_contents) -> Dict[str, str]:
def get_zenodo_info(repo_contents, session=None) -> Dict[str, str]:
for content in repo_contents:
if content["name"] != "README.md":
continue

file_b64 = try_get_json(content["git_url"])["content"]
file_b64 = try_get_json(content["git_url"], session=session)["content"]
file_text = base64.b64decode(file_b64).decode()

pattern = re.compile(r"\[!\[DOI\]\(https://zenodo.org/badge/DOI/[\w.//]*\)\]\([\w.://]*\)")
Expand All @@ -66,17 +69,20 @@ def get_zenodo_info(repo_contents) -> Dict[str, str]:


def get_software_info_summary() -> pd.DataFrame:
repos = try_get_json(join_url(GITHUB_API_URL, "orgs", GITHUB_ORG, "repos"))
with requests.Session() as s:
if (gh_token := os.environ.get("GH_TOKEN")):
s.headers.update({"Authorization": f"Bearer {gh_token}"})
repos = try_get_json(join_url(GITHUB_API_URL, "orgs", GITHUB_ORG, "repos"), s)

stats_list = []
for repo in tqdm(repos):
repo_contents = try_get_json(join_url(repo["url"], "contents"))
stats_list = []
for repo in tqdm(repos):
repo_contents = try_get_json(join_url(repo["url"], "contents"), s)

repo_stats = get_basic_info(repo)
repo_stats.update(get_packge_info(repo_contents))
repo_stats.update(get_zenodo_info(repo_contents))
repo_stats = get_basic_info(repo)
repo_stats.update(get_packge_info(repo_contents, session=s))
repo_stats.update(get_zenodo_info(repo_contents, session=s))

stats_list.append(repo_stats)
stats_list.append(repo_stats)

return (
pd
Expand Down
7 changes: 4 additions & 3 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from datetime import datetime
from typing import Tuple
from typing import Optional, Tuple

import requests
from requests.exceptions import RequestException
Expand All @@ -10,8 +10,9 @@ def join_url(*parts: Tuple[str]) -> str:
return "/".join(parts)


def try_get_json(url: str):
with requests.get(url) as r:
def try_get_json(url: str, session: Optional[requests.Session] = None):
get = requests.get if session is None else session.get
with get(url) as r:
if not r.ok:
raise RequestException(f"{r!r}: {url}\n{r.text}")
content = r.json()
Expand Down

0 comments on commit 0ce3146

Please sign in to comment.