From 1732bf710c29a5db3ef68ddf3175bf59489defa4 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Fri, 16 Aug 2024 12:14:14 +0100 Subject: [PATCH 1/7] Add utility to parse codepoints Based on logic from ufo_merge --- Lib/gftools/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/gftools/utils.py b/Lib/gftools/utils.py index b142396c..923dcccb 100644 --- a/Lib/gftools/utils.py +++ b/Lib/gftools/utils.py @@ -685,3 +685,11 @@ def has_gh_token(): if "GH_TOKEN" in os.environ: return True return False + + +def parse_codepoint(codepoint: str) -> int: + # https://github.com/googlefonts/ufomerge/blob/2257a1d3807a4eec9b515aa98e059383f7814d9a/Lib/ufomerge/cli.py#L118-L126 + if codepoint.startswith(("U+", "u+", "0x", "0X")): + return int(codepoint[2:], 16) + else: + return int(codepoint) From 659c1b37c00e269b65037aec0fcce2c399e9557f Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Fri, 16 Aug 2024 12:15:01 +0100 Subject: [PATCH 2/7] builder: subset: support excluding codepoints inline or from file --- Lib/gftools/subsetmerger.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Lib/gftools/subsetmerger.py b/Lib/gftools/subsetmerger.py index b32b6fbc..f266774f 100644 --- a/Lib/gftools/subsetmerger.py +++ b/Lib/gftools/subsetmerger.py @@ -17,7 +17,7 @@ from ufomerge import merge_ufos from gftools.util.styles import STYLE_NAMES -from gftools.utils import download_file, open_ufo +from gftools.utils import download_file, open_ufo, parse_codepoint logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -47,6 +47,8 @@ ), Optional("layoutHandling"): Str(), Optional("force"): Str(), + Optional("exclude_codepoints"): Str(), + Optional("exclude_codepoints_file"): Str(), } ) ) @@ -69,6 +71,31 @@ def prepare_minimal_subsets(subsets): for r in subset["ranges"]: for cp in range(r["start"], r["end"] + 1): unicodes.append(cp) + + # Parse in manual exclusions + excluded_codepoints = set() + if exclude_inline := subset.get("exclude_codepoints"): + for raw_value in exclude_inline.split(): + raw_value = raw_value.strip() + if raw_value == "": + continue + excluded_codepoints.add(parse_codepoint(raw_value)) + if exclude_file := subset.get("exclude_codepoints_file"): + for line in Path(exclude_file).read_text().splitlines(): + line = line.strip() + if line != "" and not line.startswith(("#", "//")): + continue + # Remove in-line comments + line = line.split("#", 1)[0] + line = line.split("//", 1)[0] + line = line.rstrip() + excluded_codepoints.add(parse_codepoint(line)) + + # Filter unicodes by excluded_codepoints + unicodes = [ + unicode for unicode in unicodes if unicode not in excluded_codepoints + ] + key = ( yaml.dump(subset["from"]), subset.get("layoutHandling"), From 1730a2ad69b5af934d8528e7fbbf073116f54ba7 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Fri, 16 Aug 2024 12:35:50 +0100 Subject: [PATCH 3/7] add-ds-subsets: add --exclude-codepoints & --exclude-codepoints-file --- Lib/gftools/scripts/add_ds_subsets.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/gftools/scripts/add_ds_subsets.py b/Lib/gftools/scripts/add_ds_subsets.py index 2e106793..f1b329a4 100644 --- a/Lib/gftools/scripts/add_ds_subsets.py +++ b/Lib/gftools/scripts/add_ds_subsets.py @@ -81,6 +81,14 @@ def main(args=None): parser.add_argument( "--json", "-j", action="store_true", help="Use JSON structured UFOs" ) + parser.add_argument( + "--exclude-codepoints", help="Space-delimited unicodes to exclude" + ) + parser.add_argument( + "--exclude-codepoints-file", + help="Newline delimited file with unicodes to exclude. " + "Allows for comments with either # or //", + ) parser.add_argument("--output", "-o", help="Output designspace file") @@ -107,6 +115,8 @@ def main(args=None): "from": { "repo": args.repo, "path": args.file, + "exclude_codepoints": args.exclude_codepoints, + "exclude_codepoints_file": args.exclude_codepoints_file, } } ] From ea2080054ce2fd064b16883589ff9330983f2c05 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Fri, 16 Aug 2024 16:35:53 +0100 Subject: [PATCH 4/7] builder: subset: support excluding glyphs by name inline or from file Leverages/Expands on the (formally called) unicodes_by_donor to handle glyphs names also. Some type soup involved --- Lib/gftools/subsetmerger.py | 50 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/Lib/gftools/subsetmerger.py b/Lib/gftools/subsetmerger.py index f266774f..b9086029 100644 --- a/Lib/gftools/subsetmerger.py +++ b/Lib/gftools/subsetmerger.py @@ -47,7 +47,9 @@ ), Optional("layoutHandling"): Str(), Optional("force"): Str(), + Optional("exclude_glyphs"): Str(), Optional("exclude_codepoints"): Str(), + Optional("exclude_glyphs_file"): Str(), Optional("exclude_codepoints_file"): Str(), } ) @@ -59,7 +61,15 @@ def prepare_minimal_subsets(subsets): # codepoints with the same "donor" font and options. This allows the # user to specify multiple subsets from the same font, and they will # be merged into a single merge operation. - unicodes_by_donor = defaultdict(set) + incl_excl_by_donor: dict[ + tuple[str, str, str], + tuple[ + # Unicodes to include + set[int], + # Glyph names to exclude + set[str], + ], + ] = defaultdict(lambda: (set(), set())) for subset in subsets: # Resolved named subsets to a set of Unicode using glyphsets data if "name" in subset: @@ -96,18 +106,49 @@ def prepare_minimal_subsets(subsets): unicode for unicode in unicodes if unicode not in excluded_codepoints ] + # Load excluded glyphs by name + exclude_glyphs = set() + if exclude_inline := subset.get("exclude_glyphs"): + for glyph_name in exclude_inline.split(): + glyph_name = glyph_name.strip() + if glyph_name == "": + continue + exclude_glyphs.add(glyph_name) + if exclude_file := subset.get("exclude_glyphs_file"): + for line in Path(exclude_file).read_text().splitlines(): + line = line.strip() + if line != "" and not line.startswith(("#", "//")): + continue + # Remove in-line comments + line = line.split("#", 1)[0] + line = line.split("//", 1)[0] + line = line.rstrip() + exclude_glyphs.add(line) + + # Update incl_excl_by_donor key = ( yaml.dump(subset["from"]), subset.get("layoutHandling"), subset.get("force"), ) - unicodes_by_donor[key] |= set(unicodes) + unicodes_incl, glyph_names_excl = incl_excl_by_donor[key] + unicodes_incl |= set(unicodes) + glyph_names_excl |= exclude_glyphs # Now rebuild the subset dictionary, but this time with the codepoints # amalgamated into minimal sets. newsubsets = [] - for (donor, layouthandling, force), unicodes in unicodes_by_donor.items(): - newsubsets.append({"from": yaml.safe_load(donor), "unicodes": list(unicodes)}) + for (donor, layouthandling, force), ( + unicodes_incl, + glyph_names_excl, + ) in incl_excl_by_donor.items(): + newsubsets.append( + { + "from": yaml.safe_load(donor), + "unicodes": list(unicodes_incl), + "exclude_glyphs": list(glyph_names_excl), + } + ) if layouthandling: newsubsets[-1]["layoutHandling"] = layouthandling if force: @@ -190,6 +231,7 @@ def add_subset(self, target_ufo, ds, ds_source, subset): merge_ufos( target_ufo, source_ufo, + exclude_glyphs=subset["exclude_glyphs"], codepoints=subset["unicodes"], existing_handling=existing_handling, layout_handling=layout_handling, From 455c61a07890f4d10eea56b7f7f84cbdc869a108 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Thu, 22 Aug 2024 11:21:16 +0100 Subject: [PATCH 5/7] add-ds-subsets: add --exclude-glyphs & --exclude-glyphs-file --- Lib/gftools/scripts/add_ds_subsets.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/gftools/scripts/add_ds_subsets.py b/Lib/gftools/scripts/add_ds_subsets.py index f1b329a4..9224356a 100644 --- a/Lib/gftools/scripts/add_ds_subsets.py +++ b/Lib/gftools/scripts/add_ds_subsets.py @@ -89,6 +89,14 @@ def main(args=None): help="Newline delimited file with unicodes to exclude. " "Allows for comments with either # or //", ) + parser.add_argument( + "--exclude-glyphs", help="Space-delimited glyph names to exclude" + ) + parser.add_argument( + "--exclude-glyphs-file", + help="Newline delimited file with glyph names to exclude. " + "Allows for comments with either # or //", + ) parser.add_argument("--output", "-o", help="Output designspace file") @@ -117,6 +125,8 @@ def main(args=None): "path": args.file, "exclude_codepoints": args.exclude_codepoints, "exclude_codepoints_file": args.exclude_codepoints_file, + "exclude_glyphs": args.exclude_glyphs, + "exclude_glyphs_file": args.exclude_glyphs_file, } } ] From d975503a54560b51ffcc9ab8b3a01612e9daa9a0 Mon Sep 17 00:00:00 2001 From: Ricky Atkins Date: Tue, 10 Sep 2024 11:54:28 +0100 Subject: [PATCH 6/7] Document subset operation options --- docs/gftools-builder/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/gftools-builder/README.md b/docs/gftools-builder/README.md index ec22228d..7b573963 100644 --- a/docs/gftools-builder/README.md +++ b/docs/gftools-builder/README.md @@ -328,6 +328,21 @@ build process by leaving a `graph.png` file in the `sources` directory: - *subspace*: Runs `fonttools varLib.instancer` to subspace a variable font according to the values in `axes`. `args` are added to the command line. - *hbsubset*: Uses `hb-subset` to slim down a font binary. - *addSubset*: Adds a subset from another font using `gftools-add-ds-subsets` + - `directory`: the intermediary folder used to store the source(s) the subset(s) is taken from + - `subsets`: a list of subset configurations to merge in + - `from` (required): can be a pre-configured Noto source ("Noto Sans", "Noto Serif", "Noto Sans Devanagari", "Noto Sans Linear B"), or: + - `repo`: the GitHub slug for the repository, e.g. `googlefonts/gftools`. You can specify a git revision by suffixing this with `@v1.0.0`, or use `@latest` for the latest *published* release + - `path`: the path within the repo that has the source file + - `name`: a named Google Fonts subset, e.g. `GF_Latin_Core` + - `ranges`: a list unicode codepoint range to include + - `start`: the start of the range (as hex or decimal) + - `end`: the end of the range (as hex or decimal) + - `layoutHandling`: "subset", "closure" or "ignore" ([further reading](https://github.com/googlefonts/ufomerge/blob/bb9a82ff3039b8aa0cba58372158bd3c0e5cb770/Lib/ufomerge/__init__.py#L512-L521)) + - `force`: replace existing glyphs in your sources, instead of skipping them + - `exclude_glyphs`: whitespace-delimited glyph names to exclude from merging + - `exclude_glyphs_file`: path to a file with glyphs names to exclude from merging, one per line (comments using `#` or `//` allowed) + - `exclude_codepoints`: whitespace-delimited unicode codepoints to exclude from merging + - `exclude_codepoints_file`: path to a file with with unicode codepoints to exclude from merging, one per line (comments using `#` or `//` allowed) - *buildVTT*: Uses `gftools-build-vtt` with the configuration file provided in `vttfile` to add VTT hinting to a font binary. - *remap*: Uses `gftools-remap-font` to alter a font binary's `cmap` table. - *paintcompiler*: Runs paintcompiler on a font to add a COLRv1 table. From 1e0521a06ac8c7b9f653e353e6b914a19b4b842f Mon Sep 17 00:00:00 2001 From: Harry Dalton Date: Wed, 11 Sep 2024 17:17:37 +0100 Subject: [PATCH 7/7] Add explicit ufomerge dependency, pinning to avoid bug See feedback on googlefonts/gftools#1020 --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a77c1350..2f57c091 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,10 @@ dependencies = [ 'ninja', 'networkx', 'ruamel.yaml', + # Used for subset merging, and preferred over the home-grown UFO merge script, + # which is deprecated. + # Pin avoids bug googlefonts/ufomerge#28. + 'ufomerge>=1.8.1' ] [project.optional-dependencies]