👌 Improve performance of needs builders (#1054)

This commit refactors the `needs` and `needs_id` builders, so that they can completely skip the "write" phase of the Sphinx build. The main aim of these builders is to output the final needs data, in a particular format. The needs data is collected during the "read" phase of the Sphinx build, i.e. when all documents are read, then after all needs data has been collected, a number of "post-processing" functions are applied to the needs data. This post-processing is independant of (a) the individual documents, (b) the output format, and so does not require the Sphinx "write" phase to run. Here we combine all of the post-processing into a single `post_process_needs_data` function that can be called directly by these builders. Note, ideally this would be called in a [Sphinx event](https://www.sphinx-doc.org/en/master/extdev/appapi.html#emitting-events) occuring between the "read" and "write" events, however, this does not currently exist (the closest is `env-check-consistency`, but this is not triggered on "write-only" rebuilds). The only corner case for skipping the "write" phase, is when the `needs` builder is required to also export filters, due to the presence of `export_id` options on certain directives. These filters are currently computed during the "write" phase, so here we do run that, but emit a warning, to let the user know of this degradation in performance.
useblocks · Oct 31, 2023 · 6d0952b · 6d0952b
1 parent bc0b8c9
commit 6d0952b
Show file tree

Hide file tree

Showing 9 changed files with 447 additions and 137 deletions.
diff --git a/sphinx_needs/builder.py b/sphinx_needs/builder.py
@@ -1,5 +1,5 @@
 import os
-from typing import Iterable, List, Optional, Set
+from typing import Iterable, List, Optional, Sequence, Set
 
 from docutils import nodes
 from sphinx import version_info
@@ -8,28 +8,58 @@
 
 from sphinx_needs.config import NeedsSphinxConfig
 from sphinx_needs.data import NeedsInfoType, SphinxNeedsData
+from sphinx_needs.directives.need import post_process_needs_data
 from sphinx_needs.logging import get_logger
 from sphinx_needs.needsfile import NeedsList
 
-log = get_logger(__name__)
+LOGGER = get_logger(__name__)
 
 
 class NeedsBuilder(Builder):
+    """Output the needs data as a JSON file,
+    filtering by the ``needs_builder_filter`` config option if set,
+    and writing to ``needs.json`` (or the ``needs_file`` config option if set)
+    in the output folder.
+
+    Note this builder normally completely skips the write phase,
+    where all documents are post-transformed, to improve performance.
+    It is assumed all need data is already read in the read phase,
+    and the post-processing of the data is done in the finish phase.
+
+    However, if the ``export_id`` option is set for any directives,
+    the write phase is still run, since this filter data is currently only added there.
+    A warning is issued in this case.
+    """
+
     name = "needs"
     format = "needs"
     file_suffix = ".txt"
     links_suffix = None
 
-    def write_doc(self, docname: str, doctree: nodes.document) -> None:
-        pass
+    def get_outdated_docs(self) -> Iterable[str]:
+        return []
+
+    def write(
+        self,
+        build_docnames: Iterable[str],
+        updated_docnames: Sequence[str],
+        method: str = "update",
+    ) -> None:
+        if not SphinxNeedsData(self.env).has_export_filters:
+            return
+        LOGGER.warning(
+            "At least one use of `export_id` directive option, requires a slower build", type="needs", subtype="build"
+        )
+        return super().write(build_docnames, updated_docnames, method)
 
     def finish(self) -> None:
-        env = self.env
-        data = SphinxNeedsData(env)
+        post_process_needs_data(self.app)
+
+        data = SphinxNeedsData(self.env)
+        needs_config = NeedsSphinxConfig(self.env.config)
         filters = data.get_or_create_filters()
-        version = getattr(env.config, "version", "unset")
-        needs_list = NeedsList(env.config, self.outdir, self.srcdir)
-        needs_config = NeedsSphinxConfig(env.config)
+        version = getattr(self.env.config, "version", "unset")
+        needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)
 
         if needs_config.file:
             needs_file = needs_config.file
@@ -38,7 +68,7 @@ def finish(self) -> None:
             # check if needs.json file exists in conf.py directory
             needs_json = os.path.join(self.srcdir, "needs.json")
             if os.path.exists(needs_json):
-                log.info("needs.json found, but will not be used because needs_file not configured.")
+                LOGGER.info("needs.json found, but will not be used because needs_file not configured.")
 
         # Clean needs_list from already stored needs of the current version.
         # This is needed as needs could have been removed from documentation and if this is the case,
@@ -62,25 +92,30 @@ def finish(self) -> None:
         try:
             needs_list.write_json()
         except Exception as e:
-            log.error(f"Error during writing json file: {e}")
+            LOGGER.error(f"Error during writing json file: {e}")
         else:
-            log.info("Needs successfully exported")
+            LOGGER.info("Needs successfully exported")
 
-    def get_outdated_docs(self) -> Iterable[str]:
-        return []
+    def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
+        # only needed if the write phase is run
+        return ""
 
     def prepare_writing(self, _docnames: Set[str]) -> None:
+        # only needed if the write phase is run
+        pass
+
+    def write_doc(self, docname: str, doctree: nodes.document) -> None:
+        # only needed if the write phase is run
         pass
 
     def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
+        # only needed if the write phase is run
         pass
 
     def cleanup(self) -> None:
+        # only needed if the write phase is run
         pass
 
-    def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
-        return ""
-
 
 def build_needs_json(app: Sphinx, _exception: Exception) -> None:
     env = app.env
@@ -101,7 +136,87 @@ def build_needs_json(app: Sphinx, _exception: Exception) -> None:
     needs_builder.finish()
 
 
+class NeedsIdBuilder(Builder):
+    """Output the needs data as multiple JSON files, one per need,
+    filtering by the ``needs_builder_filter`` config option if set,
+    and writing to the ``needs_id`` folder (or the ``build_json_per_id_path`` config option if set)
+    in the output folder.
+
+    Note this builder completely skips the write phase,
+    where all documents are post-transformed, to improve performance.
+    It is assumed all need data is already read in the read phase,
+    and the post-processing of the data is done in the finish phase.
+    """
+
+    name = "needs_id"
+    format = "needs"
+    file_suffix = ".txt"
+    links_suffix = None
+
+    def get_outdated_docs(self) -> Iterable[str]:
+        return []
+
+    def write(
+        self,
+        build_docnames: Iterable[str],
+        updated_docnames: Sequence[str],
+        method: str = "update",
+    ) -> None:
+        pass
+
+    def finish(self) -> None:
+        post_process_needs_data(self.app)
+
+        data = SphinxNeedsData(self.env)
+        needs = data.get_or_create_needs().values()  # We need a list of needs for later filter checks
+        version = getattr(self.env.config, "version", "unset")
+        needs_config = NeedsSphinxConfig(self.env.config)
+        filter_string = needs_config.builder_filter
+        from sphinx_needs.filter_common import filter_needs
+
+        filtered_needs = filter_needs(needs, needs_config, filter_string)
+        needs_build_json_per_id_path = needs_config.build_json_per_id_path
+        needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
+        if not os.path.exists(needs_dir):
+            os.makedirs(needs_dir, exist_ok=True)
+        for need in filtered_needs:
+            needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)
+            needs_list.wipe_version(version)
+            needs_list.add_need(version, need)
+            id = need["id"]
+            try:
+                file_name = f"{id}.json"
+                needs_list.write_json(file_name, needs_dir)
+            except Exception as e:
+                LOGGER.error(f"Needs-ID Builder {id} error: {e}")
+        LOGGER.info("Needs_id successfully exported")
+
+
+def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
+    env = app.env
+
+    if not NeedsSphinxConfig(env.config).build_json_per_id:
+        return
+
+    # Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
+    if isinstance(app.builder, NeedsIdBuilder):
+        return
+    try:
+        needs_id_builder = NeedsIdBuilder(app, env)
+    except TypeError:
+        needs_id_builder = NeedsIdBuilder(app)
+        needs_id_builder.set_environment(env)
+
+    needs_id_builder.finish()
+
+
 class NeedumlsBuilder(Builder):
+    """Write generated PlantUML input files to the output dir,
+    that were generated by need directives,
+    if they have a ``save`` field set,
+    denoting the path relative to the output folder.
+    """
+
     name = "needumls"
 
     def write_doc(self, docname: str, doctree: nodes.document) -> None:
@@ -120,7 +235,7 @@ def finish(self) -> None:
                 if not os.path.exists(save_dir):
                     os.makedirs(save_dir, exist_ok=True)
 
-                log.info(f"Storing needuml data to file {save_path}.")
+                LOGGER.info(f"Storing needuml data to file {save_path}.")
                 with open(save_path, "w") as f:
                     f.write(puml_content)
 
@@ -161,74 +276,3 @@ def build_needumls_pumls(app: Sphinx, _exception: Exception) -> None:
         needs_builder.set_environment(env)
 
     needs_builder.finish()
-
-
-class NeedsIdBuilder(Builder):
-    """Json builder for needs, which creates separate json-files per need"""
-
-    name = "needs_id"
-    format = "needs"
-    file_suffix = ".txt"
-    links_suffix = None
-
-    def write_doc(self, docname: str, doctree: nodes.document) -> None:
-        pass
-
-    def finish(self) -> None:
-        env = self.env
-        data = SphinxNeedsData(env)
-        needs = data.get_or_create_needs().values()  # We need a list of needs for later filter checks
-        version = getattr(env.config, "version", "unset")
-        needs_config = NeedsSphinxConfig(env.config)
-        filter_string = needs_config.builder_filter
-        from sphinx_needs.filter_common import filter_needs
-
-        filtered_needs = filter_needs(needs, needs_config, filter_string)
-        needs_build_json_per_id_path = needs_config.build_json_per_id_path
-        needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
-        if not os.path.exists(needs_dir):
-            os.makedirs(needs_dir, exist_ok=True)
-        for need in filtered_needs:
-            needs_list = NeedsList(env.config, self.outdir, self.srcdir)
-            needs_list.wipe_version(version)
-            needs_list.add_need(version, need)
-            id = need["id"]
-            try:
-                file_name = f"{id}.json"
-                needs_list.write_json(file_name, needs_dir)
-            except Exception as e:
-                log.error(f"Needs-ID Builder {id} error: {e}")
-        log.info("Needs_id successfully exported")
-
-    def get_outdated_docs(self) -> Iterable[str]:
-        return []
-
-    def prepare_writing(self, _docnames: Set[str]) -> None:
-        pass
-
-    def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
-        pass
-
-    def cleanup(self) -> None:
-        pass
-
-    def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
-        return ""
-
-
-def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
-    env = app.env
-
-    if not NeedsSphinxConfig(env.config).build_json_per_id:
-        return
-
-    # Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
-    if isinstance(app.builder, NeedsIdBuilder):
-        return
-    try:
-        needs_id_builder = NeedsIdBuilder(app, env)
-    except TypeError:
-        needs_id_builder = NeedsIdBuilder(app)
-        needs_id_builder.set_environment(env)
-
-    needs_id_builder.finish()
diff --git a/sphinx_needs/data.py b/sphinx_needs/data.py
@@ -25,9 +25,10 @@ class NeedsFilterType(TypedDict):
     status: list[str]
     tags: list[str]
     types: list[str]
-    export_id: str
     result: list[str]
     amount: int
+    export_id: str
+    """If set, the filter is exported with this ID in the needs.json file."""
 
 
 class NeedsBaseDataType(TypedDict):
@@ -271,6 +272,7 @@ class NeedsFilteredBaseType(NeedsBaseDataType):
     filter_code: list[str]
     filter_func: None | str
     export_id: str
+    """If set, the filter is exported with this ID in the needs.json file."""
 
 
 class NeedsFilteredDiagramBaseType(NeedsFilteredBaseType):
@@ -409,6 +411,18 @@ def get_or_create_needs(self) -> dict[str, NeedsInfoType]:
             self.env.needs_all_needs = {}
         return self.env.needs_all_needs
 
+    @property
+    def has_export_filters(self) -> bool:
+        """Whether any filters have export IDs."""
+        try:
+            return self.env.needs_filters_export_id
+        except AttributeError:
+            return False
+
+    @has_export_filters.setter
+    def has_export_filters(self, value: bool) -> None:
+        self.env.needs_filters_export_id = value
+
     def get_or_create_filters(self) -> dict[str, NeedsFilterType]:
         """Get all filters, mapped by ID.
 
@@ -593,6 +607,8 @@ def merge_data(_app: Sphinx, env: BuildEnvironment, _docnames: list[str], other:
     needs = SphinxNeedsData(env).get_or_create_needs()
     other_needs = SphinxNeedsData(other).get_or_create_needs()
     needs.update(other_needs)
+    if SphinxNeedsData(other).has_export_filters:
+        SphinxNeedsData(env).has_export_filters = True
 
     def _merge(name: str, is_complex_dict: bool = False) -> None:
         # Update global needs dict