Skip to content

Commit

Permalink
Support Git references in subset merger (#987)
Browse files Browse the repository at this point in the history
* Support Git references in subset merger

In YAML, you can now do:
repo:
  slug: notofonts/latin-greek-cyrillic
  ref: e7f1736c5ad0dc2abfc4dcd49ebca50abf612b29

Where before repo could only take a string (being the repo slug)
This is still supported with the old behaviour of assuming you want the latest of the  branch

* Simplify reference specifying approach

Keep the repo key as a string, interpret anything after @ as the git ref

* Type hinting fairy visits

* Support '@latest' to get latest GitHub release for a subset

* Check if sources are already downloaded after resolving 'latest'

* Return resolved ref to fix designspace discovery

* Resolve 'latest' to tag early and don't use official zipball URL

Add GitHubClient.get_latest_tag_name()

This massively simplifies code paths and doesn't require SubsetMerger.download_for_subsetting
to have to do more than it should
This also allows us to know the name of the top level folder within the zip file in all scenarios
download_for_subsetting code is now a fair bit simpler and far more similar to before the ref feature
was added
  • Loading branch information
RickyDaMa authored Sep 5, 2024
1 parent 1d670ec commit 184c192
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 19 deletions.
4 changes: 4 additions & 0 deletions Lib/gftools/gfgithub.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ def get_blob(self, file_sha):
return response

def get_latest_release(self):
"""https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release"""
return self._get(self.rest_url("releases/latest"))

def get_latest_release_tag(self) -> str:
return self.get_latest_release()["tag_name"]

def open_prs(self, pr_head: str, pr_base_branch: str) -> typing.List:
return self._get(
self.rest_url("pulls", state="open", head=pr_head, base=pr_base_branch)
Expand Down
7 changes: 6 additions & 1 deletion Lib/gftools/scripts/add_ds_subsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ def main(args=None):

parser.add_argument("--yaml", "-y", help="YAML file describing subsets")

parser.add_argument("--repo", "-r", help="GitHub repository to use for subsetting")
parser.add_argument(
"--repo",
"-r",
help="GitHub repository slug to use for subsetting. Use @ after slug to specify branch/tag/commit, e.g. org/[email protected]; 'latest' is supported for latest release",
)
parser.add_argument("--file", "-f", help="Source file within GitHub repository")
parser.add_argument("--name", "-n", help="Name of subset to use from glyphset")
parser.add_argument(
Expand Down Expand Up @@ -107,6 +111,7 @@ def main(args=None):
print("Must specify --name or --codepoints")
sys.exit(1)
# And then construct the YAML-like object ourselves
# See subsets_schema in ..subsetmerger
subsets = [
{
"from": {
Expand Down
76 changes: 58 additions & 18 deletions Lib/gftools/subsetmerger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import logging
import os
import re
Expand All @@ -6,12 +8,14 @@
from collections import defaultdict
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
from zipfile import ZipFile

import ufoLib2
import yaml
from fontmake.font_project import FontProject
from fontTools.designspaceLib import DesignSpaceDocument
from gftools.gfgithub import GitHubClient
from glyphsets import unicodes_per_glyphset
from strictyaml import HexInt, Int, Map, Optional, Seq, Str, Enum
from ufomerge import merge_ufos
Expand All @@ -22,7 +26,9 @@
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

SUBSET_SOURCES = {
FALLBACK_BRANCH_NAME = "main"

SUBSET_SOURCES: dict[str, tuple[str, str]] = {
"Noto Sans": ("notofonts/latin-greek-cyrillic", "sources/NotoSans.glyphspackage"),
"Noto Serif": ("notofonts/latin-greek-cyrillic", "sources/NotoSerif.glyphspackage"),
"Noto Sans Devanagari": (
Expand Down Expand Up @@ -143,7 +149,7 @@ def add_subsets(self):

ds.write(self.output)

def add_subset(self, target_ufo, ds, ds_source, subset):
def add_subset(self, target_ufo, ds, ds_source, subset) -> bool:
# First, we find a donor UFO that matches the location of the
# UFO to merge.
location = dict(ds_source.location)
Expand Down Expand Up @@ -171,22 +177,37 @@ def add_subset(self, target_ufo, ds, ds_source, subset):
)
return True

def obtain_upstream(self, upstream, location):
def obtain_upstream(
self, upstream: str | dict[str, Any], location
) -> ufoLib2.Font | None:
# Either the upstream is a string, in which case we try looking
# it up in the SUBSET_SOURCES table, or it's a dict, in which
# case it's a repository / path pair.
if isinstance(upstream, str):
if upstream not in SUBSET_SOURCES:
raise ValueError("Unknown subsetting font %s" % upstream)
repo, path = SUBSET_SOURCES[upstream]
font_name = upstream
ref = FALLBACK_BRANCH_NAME
font_name = f"{upstream}/{ref}"
else:
repo = upstream["repo"]
repo: str = upstream["repo"]
parts = repo.split("@", 1)
if len(parts) == 1:
# Repo was already just the slug, use fallback ref
ref = FALLBACK_BRANCH_NAME
else:
# Guaranteed to be 2 parts
repo, ref = parts
if ref == "latest":
# Resolve latest release's tag name
ref = GitHubClient.from_url(
f"https://github.com/{repo}"
).get_latest_release_tag()
path = upstream["path"]
font_name = "%s/%s" % (repo, path)
path = os.path.join(self.cache_dir, repo, path)
font_name = f"{repo}/{ref}/{path}"
path = os.path.join(self.cache_dir, repo, ref, path)

self.download_for_subsetting(repo)
self.download_for_subsetting(repo, ref)

# We're doing a UFO-UFO merge, so Glyphs files will need to be converted
if path.endswith((".glyphs", ".glyphspackage")):
Expand All @@ -206,7 +227,7 @@ def obtain_upstream(self, upstream, location):
return open_ufo(source_ufo.path)
return None

def glyphs_to_ufo(self, source, directory=None):
def glyphs_to_ufo(self, source: str, directory: Path | None = None) -> str:
source = Path(source)
if directory is None:
directory = source.resolve().parent
Expand Down Expand Up @@ -310,15 +331,34 @@ def generate_subset_instances(self, source_ds, font_name, instance):
os.path.dirname(source_ds.path), instance.filename
)

def download_for_subsetting(self, fullrepo):
dest = os.path.join(self.cache_dir, fullrepo)
def download_for_subsetting(self, fullrepo: str, ref: str) -> None:
"""Downloads a GitHub repository at a given reference"""
dest = os.path.join(self.cache_dir, f"{fullrepo}/{ref}")
if os.path.exists(dest):
# Assume sources exist & are up-to-date (we have no good way of
# checking this); do nothing
logger.info("Subset files present on disk, skipping download")
return
user, repo = fullrepo.split("/")
os.makedirs(os.path.join(self.cache_dir, user), exist_ok=True)
repo_zipball = f"https://github.com/{fullrepo}/archive/refs/heads/main.zip"
logger.info(f"Downloading {fullrepo}")
# Make the parent folder to dest but not dest itself. This means that
# the shutil.move at the end of this function won't create
# dest/repo-ref, instead having dest contain the contents of repo-ref
os.makedirs(os.path.join(self.cache_dir, fullrepo), exist_ok=True)

# This URL scheme doesn't appear to be 100% official for tags &
# branches, but it seems to accept any valid git reference
# See https://stackoverflow.com/a/13636954 and
# https://docs.github.com/en/repositories/working-with-files/using-files/downloading-source-code-archives#source-code-archive-urls
repo_zipball = f"https://github.com/{fullrepo}/archive/{ref}.zip"
logger.info(f"Downloading {fullrepo} {ref}")

repo_zip = ZipFile(download_file(repo_zipball))
with TemporaryDirectory() as d:
repo_zip.extractall(d)
shutil.move(os.path.join(d, repo + "-main"), dest)
_user, repo = fullrepo.split("/", 1)
# If the tag name began with a "v" and looked like a version (i.e. has a
# digit immediately afterwards), the "v" is stripped by GitHub. We have
# to match this behaviour to get the correct name of the top-level
# directory within the zip file
if re.match(r"^v\d", ref):
ref = ref[1:]
with TemporaryDirectory() as temp_dir:
repo_zip.extractall(temp_dir)
shutil.move(os.path.join(temp_dir, f"{repo}-{ref}"), dest)

0 comments on commit 184c192

Please sign in to comment.