From 1732bf710c29a5db3ef68ddf3175bf59489defa4 Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Fri, 16 Aug 2024 12:14:14 +0100
Subject: [PATCH 1/7] Add utility to parse codepoints

Based on logic from ufo_merge
---
 Lib/gftools/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Lib/gftools/utils.py b/Lib/gftools/utils.py
index b142396c..923dcccb 100644
--- a/Lib/gftools/utils.py
+++ b/Lib/gftools/utils.py
@@ -685,3 +685,11 @@ def has_gh_token():
     if "GH_TOKEN" in os.environ:
         return True
     return False
+
+
+def parse_codepoint(codepoint: str) -> int:
+    # https://github.com/googlefonts/ufomerge/blob/2257a1d3807a4eec9b515aa98e059383f7814d9a/Lib/ufomerge/cli.py#L118-L126
+    if codepoint.startswith(("U+", "u+", "0x", "0X")):
+        return int(codepoint[2:], 16)
+    else:
+        return int(codepoint)

From 659c1b37c00e269b65037aec0fcce2c399e9557f Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Fri, 16 Aug 2024 12:15:01 +0100
Subject: [PATCH 2/7] builder: subset: support excluding codepoints inline or
 from file

---
 Lib/gftools/subsetmerger.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Lib/gftools/subsetmerger.py b/Lib/gftools/subsetmerger.py
index b32b6fbc..f266774f 100644
--- a/Lib/gftools/subsetmerger.py
+++ b/Lib/gftools/subsetmerger.py
@@ -17,7 +17,7 @@
 from ufomerge import merge_ufos
 
 from gftools.util.styles import STYLE_NAMES
-from gftools.utils import download_file, open_ufo
+from gftools.utils import download_file, open_ufo, parse_codepoint
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -47,6 +47,8 @@
             ),
             Optional("layoutHandling"): Str(),
             Optional("force"): Str(),
+            Optional("exclude_codepoints"): Str(),
+            Optional("exclude_codepoints_file"): Str(),
         }
     )
 )
@@ -69,6 +71,31 @@ def prepare_minimal_subsets(subsets):
             for r in subset["ranges"]:
                 for cp in range(r["start"], r["end"] + 1):
                     unicodes.append(cp)
+
+        # Parse in manual exclusions
+        excluded_codepoints = set()
+        if exclude_inline := subset.get("exclude_codepoints"):
+            for raw_value in exclude_inline.split():
+                raw_value = raw_value.strip()
+                if raw_value == "":
+                    continue
+                excluded_codepoints.add(parse_codepoint(raw_value))
+        if exclude_file := subset.get("exclude_codepoints_file"):
+            for line in Path(exclude_file).read_text().splitlines():
+                line = line.strip()
+                if line != "" and not line.startswith(("#", "//")):
+                    continue
+                # Remove in-line comments
+                line = line.split("#", 1)[0]
+                line = line.split("//", 1)[0]
+                line = line.rstrip()
+                excluded_codepoints.add(parse_codepoint(line))
+
+        # Filter unicodes by excluded_codepoints
+        unicodes = [
+            unicode for unicode in unicodes if unicode not in excluded_codepoints
+        ]
+
         key = (
             yaml.dump(subset["from"]),
             subset.get("layoutHandling"),

From 1730a2ad69b5af934d8528e7fbbf073116f54ba7 Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Fri, 16 Aug 2024 12:35:50 +0100
Subject: [PATCH 3/7] add-ds-subsets: add --exclude-codepoints &
 --exclude-codepoints-file

---
 Lib/gftools/scripts/add_ds_subsets.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Lib/gftools/scripts/add_ds_subsets.py b/Lib/gftools/scripts/add_ds_subsets.py
index 2e106793..f1b329a4 100644
--- a/Lib/gftools/scripts/add_ds_subsets.py
+++ b/Lib/gftools/scripts/add_ds_subsets.py
@@ -81,6 +81,14 @@ def main(args=None):
     parser.add_argument(
         "--json", "-j", action="store_true", help="Use JSON structured UFOs"
     )
+    parser.add_argument(
+        "--exclude-codepoints", help="Space-delimited unicodes to exclude"
+    )
+    parser.add_argument(
+        "--exclude-codepoints-file",
+        help="Newline delimited file with unicodes to exclude. "
+        "Allows for comments with either # or //",
+    )
 
     parser.add_argument("--output", "-o", help="Output designspace file")
 
@@ -107,6 +115,8 @@ def main(args=None):
                 "from": {
                     "repo": args.repo,
                     "path": args.file,
+                    "exclude_codepoints": args.exclude_codepoints,
+                    "exclude_codepoints_file": args.exclude_codepoints_file,
                 }
             }
         ]

From ea2080054ce2fd064b16883589ff9330983f2c05 Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Fri, 16 Aug 2024 16:35:53 +0100
Subject: [PATCH 4/7] builder: subset: support excluding glyphs by name inline
 or from file

Leverages/Expands on the (formally called) unicodes_by_donor to handle
glyphs names also. Some type soup involved
---
 Lib/gftools/subsetmerger.py | 50 ++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/Lib/gftools/subsetmerger.py b/Lib/gftools/subsetmerger.py
index f266774f..b9086029 100644
--- a/Lib/gftools/subsetmerger.py
+++ b/Lib/gftools/subsetmerger.py
@@ -47,7 +47,9 @@
             ),
             Optional("layoutHandling"): Str(),
             Optional("force"): Str(),
+            Optional("exclude_glyphs"): Str(),
             Optional("exclude_codepoints"): Str(),
+            Optional("exclude_glyphs_file"): Str(),
             Optional("exclude_codepoints_file"): Str(),
         }
     )
@@ -59,7 +61,15 @@ def prepare_minimal_subsets(subsets):
     # codepoints with the same "donor" font and options. This allows the
     # user to specify multiple subsets from the same font, and they will
     # be merged into a single merge operation.
-    unicodes_by_donor = defaultdict(set)
+    incl_excl_by_donor: dict[
+        tuple[str, str, str],
+        tuple[
+            # Unicodes to include
+            set[int],
+            # Glyph names to exclude
+            set[str],
+        ],
+    ] = defaultdict(lambda: (set(), set()))
     for subset in subsets:
         # Resolved named subsets to a set of Unicode using glyphsets data
         if "name" in subset:
@@ -96,18 +106,49 @@ def prepare_minimal_subsets(subsets):
             unicode for unicode in unicodes if unicode not in excluded_codepoints
         ]
 
+        # Load excluded glyphs by name
+        exclude_glyphs = set()
+        if exclude_inline := subset.get("exclude_glyphs"):
+            for glyph_name in exclude_inline.split():
+                glyph_name = glyph_name.strip()
+                if glyph_name == "":
+                    continue
+                exclude_glyphs.add(glyph_name)
+        if exclude_file := subset.get("exclude_glyphs_file"):
+            for line in Path(exclude_file).read_text().splitlines():
+                line = line.strip()
+                if line != "" and not line.startswith(("#", "//")):
+                    continue
+                # Remove in-line comments
+                line = line.split("#", 1)[0]
+                line = line.split("//", 1)[0]
+                line = line.rstrip()
+                exclude_glyphs.add(line)
+
+        # Update incl_excl_by_donor
         key = (
             yaml.dump(subset["from"]),
             subset.get("layoutHandling"),
             subset.get("force"),
         )
-        unicodes_by_donor[key] |= set(unicodes)
+        unicodes_incl, glyph_names_excl = incl_excl_by_donor[key]
+        unicodes_incl |= set(unicodes)
+        glyph_names_excl |= exclude_glyphs
 
     # Now rebuild the subset dictionary, but this time with the codepoints
     # amalgamated into minimal sets.
     newsubsets = []
-    for (donor, layouthandling, force), unicodes in unicodes_by_donor.items():
-        newsubsets.append({"from": yaml.safe_load(donor), "unicodes": list(unicodes)})
+    for (donor, layouthandling, force), (
+        unicodes_incl,
+        glyph_names_excl,
+    ) in incl_excl_by_donor.items():
+        newsubsets.append(
+            {
+                "from": yaml.safe_load(donor),
+                "unicodes": list(unicodes_incl),
+                "exclude_glyphs": list(glyph_names_excl),
+            }
+        )
         if layouthandling:
             newsubsets[-1]["layoutHandling"] = layouthandling
         if force:
@@ -190,6 +231,7 @@ def add_subset(self, target_ufo, ds, ds_source, subset):
         merge_ufos(
             target_ufo,
             source_ufo,
+            exclude_glyphs=subset["exclude_glyphs"],
             codepoints=subset["unicodes"],
             existing_handling=existing_handling,
             layout_handling=layout_handling,

From 455c61a07890f4d10eea56b7f7f84cbdc869a108 Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Thu, 22 Aug 2024 11:21:16 +0100
Subject: [PATCH 5/7] add-ds-subsets: add --exclude-glyphs &
 --exclude-glyphs-file

---
 Lib/gftools/scripts/add_ds_subsets.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Lib/gftools/scripts/add_ds_subsets.py b/Lib/gftools/scripts/add_ds_subsets.py
index f1b329a4..9224356a 100644
--- a/Lib/gftools/scripts/add_ds_subsets.py
+++ b/Lib/gftools/scripts/add_ds_subsets.py
@@ -89,6 +89,14 @@ def main(args=None):
         help="Newline delimited file with unicodes to exclude. "
         "Allows for comments with either # or //",
     )
+    parser.add_argument(
+        "--exclude-glyphs", help="Space-delimited glyph names to exclude"
+    )
+    parser.add_argument(
+        "--exclude-glyphs-file",
+        help="Newline delimited file with glyph names to exclude. "
+        "Allows for comments with either # or //",
+    )
 
     parser.add_argument("--output", "-o", help="Output designspace file")
 
@@ -117,6 +125,8 @@ def main(args=None):
                     "path": args.file,
                     "exclude_codepoints": args.exclude_codepoints,
                     "exclude_codepoints_file": args.exclude_codepoints_file,
+                    "exclude_glyphs": args.exclude_glyphs,
+                    "exclude_glyphs_file": args.exclude_glyphs_file,
                 }
             }
         ]

From d975503a54560b51ffcc9ab8b3a01612e9daa9a0 Mon Sep 17 00:00:00 2001
From: Ricky Atkins <ricky.atkins@daltonmaag.com>
Date: Tue, 10 Sep 2024 11:54:28 +0100
Subject: [PATCH 6/7] Document subset operation options

---
 docs/gftools-builder/README.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/gftools-builder/README.md b/docs/gftools-builder/README.md
index ec22228d..7b573963 100644
--- a/docs/gftools-builder/README.md
+++ b/docs/gftools-builder/README.md
@@ -328,6 +328,21 @@ build process by leaving a `graph.png` file in the `sources` directory:
 - *subspace*: Runs `fonttools varLib.instancer` to subspace a variable font according to the values in `axes`. `args` are added to the command line.
 - *hbsubset*: Uses `hb-subset` to slim down a font binary.
 - *addSubset*: Adds a subset from another font using `gftools-add-ds-subsets`
+    - `directory`: the intermediary folder used to store the source(s) the subset(s) is taken from
+    - `subsets`: a list of subset configurations to merge in
+        - `from` (required): can be a pre-configured Noto source ("Noto Sans", "Noto Serif", "Noto Sans Devanagari", "Noto Sans Linear B"), or:
+            - `repo`: the GitHub slug for the repository, e.g. `googlefonts/gftools`. You can specify a git revision by suffixing this with `@v1.0.0`, or use `@latest` for the latest *published* release
+            - `path`: the path within the repo that has the source file
+        - `name`: a named Google Fonts subset, e.g. `GF_Latin_Core`
+        - `ranges`: a list unicode codepoint range to include
+            - `start`: the start of the range (as hex or decimal)
+            - `end`: the end of the range (as hex or decimal)
+        - `layoutHandling`: "subset", "closure" or "ignore" ([further reading](https://github.com/googlefonts/ufomerge/blob/bb9a82ff3039b8aa0cba58372158bd3c0e5cb770/Lib/ufomerge/__init__.py#L512-L521))
+        - `force`: replace existing glyphs in your sources, instead of skipping them
+        - `exclude_glyphs`: whitespace-delimited glyph names to exclude from merging
+        - `exclude_glyphs_file`: path to a file with glyphs names to exclude from merging, one per line (comments using `#` or `//` allowed)
+        - `exclude_codepoints`: whitespace-delimited unicode codepoints to exclude from merging
+        - `exclude_codepoints_file`: path to a file with with unicode codepoints to exclude from merging, one per line (comments using `#` or `//` allowed)
 - *buildVTT*: Uses `gftools-build-vtt` with the configuration file provided in `vttfile` to add VTT hinting to a font binary.
 - *remap*: Uses `gftools-remap-font` to alter a font binary's `cmap` table.
 - *paintcompiler*: Runs paintcompiler on a font to add a COLRv1 table.

From 1e0521a06ac8c7b9f653e353e6b914a19b4b842f Mon Sep 17 00:00:00 2001
From: Harry Dalton <harry.dalton@daltonmaag.com>
Date: Wed, 11 Sep 2024 17:17:37 +0100
Subject: [PATCH 7/7] Add explicit ufomerge dependency, pinning to avoid bug

See feedback on googlefonts/gftools#1020
---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index a77c1350..2f57c091 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,10 @@ dependencies = [
   'ninja',
   'networkx',
   'ruamel.yaml',
+  # Used for subset merging, and preferred over the home-grown UFO merge script,
+  # which is deprecated.
+  # Pin avoids bug googlefonts/ufomerge#28.
+  'ufomerge>=1.8.1'
 ]
 
 [project.optional-dependencies]