Skip to content

Commit

Permalink
Add subset merger
Browse files Browse the repository at this point in the history
  • Loading branch information
simoncozens committed Sep 20, 2023
1 parent ca7ae7b commit f166584
Show file tree
Hide file tree
Showing 3 changed files with 397 additions and 0 deletions.
126 changes: 126 additions & 0 deletions Lib/gftools/scripts/add_ds_subsets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
import re
import sys
from textwrap import dedent, wrap

from strictyaml import load

from gftools.subsetmerger import SUBSET_SOURCES, SubsetMerger, subsets_schema


def rewrap(text):
paras = text.split("\n\n")
return "\n\n".join("\n".join(wrap(dedent(para), width=72)) for para in paras)

EXAMPLES = """
gftools-add-ds-subsets \\
--repo notofonts/latin-greek-cyrillic \\
--file sources/NotoSans-Italic.glyphspackage \\
--name "GF_Latin_Core" \\
-o full/NotoSansElymaic.designspace NotoSansElymaic.designspace
gftools-add-ds-subsets \\
--yaml subsets.yaml \\
-o full/NotoSansCypriot.designspace NotoSansCypriot.designspace
Where subsets.yaml is:
- from: Noto Sans
name: GF_Latin_Core
- from: Noto Sans Linear B
ranges:
- start: 0x10100
end: 0x10133
"""
def main(args=None):
import argparse

parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=rewrap(
f"""
Add a subset from another font to a designspace file and save.
If --yaml is given, it should be a YAML file describing the subsets to add.
Otherwise, --repo and --file must be given to specify the source repository
(as username/repo pair from from GitHub) and file name within the repository,
and then either --name (to use a named subset from the GF glyphsets) or
--codepoints (to specify a range of codepoints to subset, in the form
<start>-<end>,<start>-<end>,... where `start` and `end` are Unicode hex
codepoints) must be given.
The YAML file should be a list of subsets, each of which should have a `from`
key to specify the donor font, and either a `name` key (to use a named
subset from the GF glyphsets) or a `ranges` key (to specify a range of
codepoints to subset). The `from` key can either be a string (one of
{", ".join([f'"{k}"' for k in SUBSET_SOURCES.keys()])} or a dictionary
with a `repo` key specifying the GitHub repository (as username/repo pair)
and a `path` key specifying the file within the repository.
Example usage:
"""
) + EXAMPLES,
)
parser.add_argument(
"--googlefonts",
help="Restrict donor instances to Google Fonts styles",
action="store_true",
)

parser.add_argument("--yaml", "-y", help="YAML file describing subsets")

parser.add_argument("--repo", "-r", help="GitHub repository to use for subsetting")
parser.add_argument("--file", "-f", help="Source file within GitHub repository")
parser.add_argument("--name", "-n", help="Name of subset to use from glyphset")
parser.add_argument("--codepoints", "-c", help="Range of codepoints to subset")

parser.add_argument("--output", "-o", help="Output designspace file")

parser.add_argument("input", help="Input designspace file")
args = parser.parse_args(args)

if os.path.dirname(args.output) == os.path.dirname(args.input):
print("Output file must be in a different directory from input file")
sys.exit(1)

if args.yaml:
subsets = load(open(args.yaml).read(), subsets_schema).data
else:
# It's a one-shot operation, check repo/file/name/codepoints are all given
if not args.repo or not args.file:
print("Must specify --repo and --file")
sys.exit(1)
if not args.name and not args.codepoints:
print("Must specify --name or --codepoints")
sys.exit(1)
# And then construct the YAML-like object ourselves
subsets = [
{
"from": {
"repo": args.repo,
"path": args.file,
}
}
]
if args.name:
subsets[0]["name"] = args.name
else:
subsets[0]["ranges"] = []
for range in re.split(r"[\w,]+", args.codepoints):
if not range:
continue
start, end = range.split("-")
subsets[0]["ranges"].append(
{
"start": int(start, 16),
"end": int(end, 16),
}
)
SubsetMerger(
args.input, args.output, subsets, googlefonts=args.googlefonts
).add_subsets()


if __name__ == "__main__":
main()
270 changes: 270 additions & 0 deletions Lib/gftools/subsetmerger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
import logging
import os
import re
import sys
from collections import defaultdict
from pathlib import Path

import pygit2
import ufoLib2
import yaml
from fontmake.font_project import FontProject
from fontTools.designspaceLib import DesignSpaceDocument
from glyphsets import GFGlyphData
from strictyaml import HexInt, Map, Optional, Seq, Str, Enum
from ufomerge import merge_ufos

from gftools.util.styles import STYLE_NAMES

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

SUBSET_SOURCES = {
"Noto Sans": ("notofonts/latin-greek-cyrillic", "sources/NotoSans.glyphspackage"),
"Noto Serif": ("notofonts/latin-greek-cyrillic", "sources/NotoSerif.glyphspackage"),
"Noto Sans Devanagari": ("notofonts/devanagari", "sources/NotoSansDevanagari.glyphspackage"),
"Noto Serif Devanagari": ("notofonts/devanagari", "sources/NotoSerifDevanagari.glyphspackage"),
"Noto Sans Linear B": ("notofonts/linear-b", "sources/NotoSansLinearB.designspace")
}


subsets_schema = Seq(
Map(
{
"from": Enum(SUBSET_SOURCES.keys()) | Map({"repo": Str(), "path": Str()}),
Optional("name"): Str(),
Optional("ranges"): Seq(Map({"start": HexInt(), "end": HexInt()})),
Optional("layoutHandling"): Str(),
Optional("force"): Str(),
}
)
)


def prepare_minimal_subsets(subsets):
# Turn a list of subsets into a minimal set of merges by gathering all
# codepoints with the same "donor" font and options. This allows the
# user to specify multiple subsets from the same font, and they will
# be merged into a single merge operation.
unicodes_by_donor = defaultdict(set)
for subset in subsets:
# Resolved named subsets to a set of Unicode using glyphsets data
if "name" in subset:
unicodes = [
x["unicode"]
for x in GFGlyphData.glyphs_in_glyphsets([subset["name"]])
if x["unicode"]
]
if not unicodes:
raise ValueError("No glyphs found for subset " + subset["name"])
else:
unicodes = []
for r in subset["ranges"]:
for cp in range(r["start"], r["end"] + 1):
unicodes.append(cp)
key = (yaml.dump(subset["from"]), subset.get("layoutHandling"), subset.get("force"))
unicodes_by_donor[key] |= set(unicodes)

# Now rebuild the subset dictionary, but this time with the codepoints
# amalgamated into minimal sets.
newsubsets = []
for (donor, layouthandling, force), unicodes in unicodes_by_donor.items():
newsubsets.append({
"from": yaml.safe_load(donor),
"unicodes": list(unicodes)
})
if layouthandling:
newsubsets[-1]["layoutHandling"] = layouthandling
if force:
newsubsets[-1]["force"] = force
return newsubsets


class SubsetMerger:
def __init__(self, input_ds, output_ds, subsets, googlefonts=False, cache="../subset-files"):
self.input = input_ds
self.output = output_ds
self.subsets = prepare_minimal_subsets(subsets)
self.googlefonts = googlefonts
self.cache_dir = cache
self.subset_instances = {}

def add_subsets(self):
"""Adds the specified subsets to the designspace file and saves it to the output path"""
ds = DesignSpaceDocument.fromfile(self.input)
outpath = Path(self.output).parent
added_subsets = False
for master in ds.sources:
# Clone the UFO before doing anything clever with it.
newpath = os.path.join(
outpath, os.path.basename(master.path)
)
original_ufo = ufoLib2.Font.open(master.path)
original_ufo.save(newpath, overwrite=True)

master.path = newpath

for subset in self.subsets:
added_subsets |= self.add_subset(ds, master, subset)
if not added_subsets:
raise ValueError("Could not match *any* subsets for this font")

for instance in ds.instances:
instance.filename = instance.path = os.path.join(
outpath, os.path.basename(instance.filename)
)

ds.write(self.output)

def add_subset(self, ds, ds_source, subset):
# First, we find a donor UFO that matches the location of the
# UFO to merge.
location = dict(ds_source.location)
for axis in ds.axes:
location[axis.name] = axis.map_backward(location[axis.name])
source_ufo = self.obtain_upstream(subset["from"], location)
if not source_ufo:
return False

# Open it up and send it to ufomerge, using the options supplied.
target_ufo = ufoLib2.Font.open(ds_source.path)
existing_handling = "skip"
if subset.get("force"):
existing_handling = "replace"
layout_handling = subset.get("layoutHandling", "subset")
logger.info(f"Merge {subset['from']} from {source_ufo} into {ds_source.filename} with {existing_handling} and {layout_handling}")
merge_ufos(
target_ufo,
source_ufo,
codepoints=subset["unicodes"],
existing_handling=existing_handling,
layout_handling=layout_handling,
)
target_ufo.save(ds_source.path, overwrite=True)
return True

def obtain_upstream(self, upstream, location):
# Either the upstream is a string, in which case we try looking
# it up in the SUBSET_SOURCES table, or it's a dict, in which
# case it's a repository / path pair.
if isinstance(upstream, str):
if upstream not in SUBSET_SOURCES:
raise ValueError("Unknown subsetting font %s" % upstream)
repo, path = SUBSET_SOURCES[upstream]
font_name = upstream
else:
repo = upstream["repo"]
path = upstream["path"]
font_name = "%s/%s" % (repo, path)
path = os.path.join(self.cache_dir, repo, path)

self.clone_for_subsetting(repo)

# We're doing a UFO-UFO merge, so Glyphs files will need to be converted
if path.endswith((".glyphs", ".glyphspackage")):
ds_path = re.sub(r".glyphs(package)?", ".designspace", path)
if os.path.exists(ds_path):
path = ds_path
else:
logger.info("Building UFO file for subset font " + font_name)
path = self.glyphs_to_ufo(path)

# Now we have an appropriate designspace containing the subset;
# find the actual UFO that corresponds to the location we are
# trying to add to.
source_ds = DesignSpaceDocument.fromfile(path)
source_ufo = self.find_source_for_location(source_ds, location, font_name)
if source_ufo:
return ufoLib2.Font.open(source_ufo.path)
return None

def glyphs_to_ufo(self, source, directory=None):
source = Path(source)
if directory is None:
directory = source.resolve().parent
output = str(Path(directory) / source.with_suffix(".designspace").name)
FontProject().run_from_glyphs(
str(source.resolve()),
**{
"format": ["ufo"],
"output": ["ufo"],
"output_dir": directory,
"master_dir": directory,
"designspace_path": output,
}
)
if self.googlefonts:
ds = DesignSpaceDocument.fromfile(output)
ds.instances = [i for i in ds.instances if i.styleName in STYLE_NAMES]
ds.write(output)

return str(output)

def find_source_for_location(self, source_ds, location, font_name):
source_mappings = {ax.name: ax.map_forward for ax in source_ds.axes}
target = None

# Assume a source is good for this location unless proved otherwise.
# This is useful for merging single-master donors into a multiple
# master font.
for source in source_ds.sources:
match = True
for axis, loc in location.items():
if (
axis in source.location
and axis in source_mappings
and source.location[axis] != source_mappings[axis](loc)
):
match = False
if match:
target = source
break

if not target:
logger.info(f"Couldn't find a master from {font_name} for location {location}, trying instances")
# We didn't find an exact match in the masters; maybe we will
# be able to interpolate an instance which matches.
for instance in source_ds.instances:
if all(
axis in instance.location
and axis in source_mappings
and instance.location[axis] == source_mappings[axis](loc)
for axis, loc in location.items()
):
self.generate_subset_instances(source_ds, font_name, instance)
target = instance
break

if target:
logger.info(f"Adding subset from {font_name} for location {location}")
return target

raise ValueError(
f"Could not find master in {font_name} for location {location}"
)
return None

def generate_subset_instances(self, source_ds, font_name, instance):
# Instance generation takes ages, cache which ones we've already
# done on this run.
if source_ds in self.subset_instances:
return

logger.info(f"Generate UFO instances for {font_name}")
ufos = FontProject().interpolate_instance_ufos(source_ds, include=instance.name)
self.subset_instances[source_ds] = ufos

# We won't return an individual instance; instead we update the
# path in the donor's designspace object so that it can be taken from there
for instance, ufo in zip(source_ds.instances, ufos):
instance.path = os.path.join(
os.path.dirname(source_ds.path), instance.filename
)

def clone_for_subsetting(self, repo):
dest = os.path.join(self.cache_dir, repo)
if os.path.exists(dest):
return
os.makedirs(dest)
logger.info(f"Cloning {repo}")
pygit2.clone_repository(f"https://github.com/{repo}", dest)
Loading

0 comments on commit f166584

Please sign in to comment.