Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Builder subset operation: exclude glyphs by name/codepoint/file #1020

Merged
merged 7 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Lib/gftools/scripts/add_ds_subsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,22 @@ def main(args=None):
parser.add_argument(
"--json", "-j", action="store_true", help="Use JSON structured UFOs"
)
parser.add_argument(
"--exclude-codepoints", help="Space-delimited unicodes to exclude"
)
parser.add_argument(
"--exclude-codepoints-file",
help="Newline delimited file with unicodes to exclude. "
"Allows for comments with either # or //",
)
parser.add_argument(
"--exclude-glyphs", help="Space-delimited glyph names to exclude"
)
parser.add_argument(
"--exclude-glyphs-file",
help="Newline delimited file with glyph names to exclude. "
"Allows for comments with either # or //",
)

parser.add_argument("--output", "-o", help="Output designspace file")

Expand All @@ -107,6 +123,10 @@ def main(args=None):
"from": {
"repo": args.repo,
"path": args.file,
"exclude_codepoints": args.exclude_codepoints,
"exclude_codepoints_file": args.exclude_codepoints_file,
"exclude_glyphs": args.exclude_glyphs,
"exclude_glyphs_file": args.exclude_glyphs_file,
}
}
]
Expand Down
79 changes: 74 additions & 5 deletions Lib/gftools/subsetmerger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ufomerge import merge_ufos

from gftools.util.styles import STYLE_NAMES
from gftools.utils import download_file, open_ufo
from gftools.utils import download_file, open_ufo, parse_codepoint

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -47,6 +47,10 @@
),
Optional("layoutHandling"): Str(),
Optional("force"): Str(),
Optional("exclude_glyphs"): Str(),
Optional("exclude_codepoints"): Str(),
Optional("exclude_glyphs_file"): Str(),
Optional("exclude_codepoints_file"): Str(),
}
)
)
Expand All @@ -57,7 +61,15 @@ def prepare_minimal_subsets(subsets):
# codepoints with the same "donor" font and options. This allows the
# user to specify multiple subsets from the same font, and they will
# be merged into a single merge operation.
unicodes_by_donor = defaultdict(set)
incl_excl_by_donor: dict[
tuple[str, str, str],
tuple[
# Unicodes to include
set[int],
# Glyph names to exclude
set[str],
],
] = defaultdict(lambda: (set(), set()))
for subset in subsets:
# Resolved named subsets to a set of Unicode using glyphsets data
if "name" in subset:
Expand All @@ -69,18 +81,74 @@ def prepare_minimal_subsets(subsets):
for r in subset["ranges"]:
for cp in range(r["start"], r["end"] + 1):
unicodes.append(cp)

# Parse in manual exclusions
excluded_codepoints = set()
if exclude_inline := subset.get("exclude_codepoints"):
for raw_value in exclude_inline.split():
raw_value = raw_value.strip()
if raw_value == "":
continue
excluded_codepoints.add(parse_codepoint(raw_value))
if exclude_file := subset.get("exclude_codepoints_file"):
for line in Path(exclude_file).read_text().splitlines():
line = line.strip()
if line != "" and not line.startswith(("#", "//")):
continue
# Remove in-line comments
line = line.split("#", 1)[0]
line = line.split("//", 1)[0]
line = line.rstrip()
excluded_codepoints.add(parse_codepoint(line))

# Filter unicodes by excluded_codepoints
unicodes = [
unicode for unicode in unicodes if unicode not in excluded_codepoints
]

# Load excluded glyphs by name
exclude_glyphs = set()
if exclude_inline := subset.get("exclude_glyphs"):
for glyph_name in exclude_inline.split():
glyph_name = glyph_name.strip()
if glyph_name == "":
continue
exclude_glyphs.add(glyph_name)
if exclude_file := subset.get("exclude_glyphs_file"):
for line in Path(exclude_file).read_text().splitlines():
line = line.strip()
if line != "" and not line.startswith(("#", "//")):
continue
# Remove in-line comments
line = line.split("#", 1)[0]
line = line.split("//", 1)[0]
line = line.rstrip()
exclude_glyphs.add(line)

# Update incl_excl_by_donor
key = (
yaml.dump(subset["from"]),
subset.get("layoutHandling"),
subset.get("force"),
)
unicodes_by_donor[key] |= set(unicodes)
unicodes_incl, glyph_names_excl = incl_excl_by_donor[key]
unicodes_incl |= set(unicodes)
glyph_names_excl |= exclude_glyphs

# Now rebuild the subset dictionary, but this time with the codepoints
# amalgamated into minimal sets.
newsubsets = []
for (donor, layouthandling, force), unicodes in unicodes_by_donor.items():
newsubsets.append({"from": yaml.safe_load(donor), "unicodes": list(unicodes)})
for (donor, layouthandling, force), (
unicodes_incl,
glyph_names_excl,
) in incl_excl_by_donor.items():
newsubsets.append(
{
"from": yaml.safe_load(donor),
"unicodes": list(unicodes_incl),
"exclude_glyphs": list(glyph_names_excl),
}
)
if layouthandling:
newsubsets[-1]["layoutHandling"] = layouthandling
if force:
Expand Down Expand Up @@ -163,6 +231,7 @@ def add_subset(self, target_ufo, ds, ds_source, subset):
merge_ufos(
target_ufo,
source_ufo,
exclude_glyphs=subset["exclude_glyphs"],
codepoints=subset["unicodes"],
existing_handling=existing_handling,
layout_handling=layout_handling,
Expand Down
8 changes: 8 additions & 0 deletions Lib/gftools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,3 +685,11 @@ def has_gh_token():
if "GH_TOKEN" in os.environ:
return True
return False


def parse_codepoint(codepoint: str) -> int:
# https://github.com/googlefonts/ufomerge/blob/2257a1d3807a4eec9b515aa98e059383f7814d9a/Lib/ufomerge/cli.py#L118-L126
if codepoint.startswith(("U+", "u+", "0x", "0X")):
return int(codepoint[2:], 16)
else:
return int(codepoint)
15 changes: 15 additions & 0 deletions docs/gftools-builder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,21 @@ build process by leaving a `graph.png` file in the `sources` directory:
- *subspace*: Runs `fonttools varLib.instancer` to subspace a variable font according to the values in `axes`. `args` are added to the command line.
- *hbsubset*: Uses `hb-subset` to slim down a font binary.
- *addSubset*: Adds a subset from another font using `gftools-add-ds-subsets`
- `directory`: the intermediary folder used to store the source(s) the subset(s) is taken from
- `subsets`: a list of subset configurations to merge in
- `from` (required): can be a pre-configured Noto source ("Noto Sans", "Noto Serif", "Noto Sans Devanagari", "Noto Sans Linear B"), or:
- `repo`: the GitHub slug for the repository, e.g. `googlefonts/gftools`. You can specify a git revision by suffixing this with `@v1.0.0`, or use `@latest` for the latest *published* release
- `path`: the path within the repo that has the source file
- `name`: a named Google Fonts subset, e.g. `GF_Latin_Core`
- `ranges`: a list unicode codepoint range to include
- `start`: the start of the range (as hex or decimal)
- `end`: the end of the range (as hex or decimal)
- `layoutHandling`: "subset", "closure" or "ignore" ([further reading](https://github.com/googlefonts/ufomerge/blob/bb9a82ff3039b8aa0cba58372158bd3c0e5cb770/Lib/ufomerge/__init__.py#L512-L521))
- `force`: replace existing glyphs in your sources, instead of skipping them
- `exclude_glyphs`: whitespace-delimited glyph names to exclude from merging
- `exclude_glyphs_file`: path to a file with glyphs names to exclude from merging, one per line (comments using `#` or `//` allowed)
- `exclude_codepoints`: whitespace-delimited unicode codepoints to exclude from merging
- `exclude_codepoints_file`: path to a file with with unicode codepoints to exclude from merging, one per line (comments using `#` or `//` allowed)
- *buildVTT*: Uses `gftools-build-vtt` with the configuration file provided in `vttfile` to add VTT hinting to a font binary.
- *remap*: Uses `gftools-remap-font` to alter a font binary's `cmap` table.
- *paintcompiler*: Runs paintcompiler on a font to add a COLRv1 table.
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ dependencies = [
'ninja',
'networkx',
'ruamel.yaml',
# Used for subset merging, and preferred over the home-grown UFO merge script,
# which is deprecated.
# Pin avoids bug googlefonts/ufomerge#28.
'ufomerge>=1.8.1'
]

[project.optional-dependencies]
Expand Down