Skip to content

Commit

Permalink
👌 Improve performance of needs builders (#1054)
Browse files Browse the repository at this point in the history
This commit refactors the `needs` and `needs_id` builders, so that they can completely skip the "write" phase of the Sphinx build.

The main aim of these builders is to output the final needs data, in a particular format.
The needs data is collected during the "read" phase of the Sphinx build, i.e. when all documents are read, then after all needs data has been collected, a number of "post-processing" functions are applied to the needs data.
This post-processing is independant of (a) the individual documents, (b) the output format, and so does not require the Sphinx "write" phase to run.

Here we combine all of the post-processing into a single `post_process_needs_data` function that can be called directly by these builders.
Note, ideally this would be called in a [Sphinx event](https://www.sphinx-doc.org/en/master/extdev/appapi.html#emitting-events) occuring between the "read" and "write" events, however, this does not currently exist (the closest is `env-check-consistency`, but this is not triggered on "write-only" rebuilds).

The only corner case for skipping the "write" phase, is when the `needs` builder is required to also export filters, due to the presence of `export_id` options on certain directives.
These filters are currently computed during the "write" phase, so here we do run that, but emit a warning, to let the user know of this degradation in performance.
  • Loading branch information
chrisjsewell authored Oct 31, 2023
1 parent bc0b8c9 commit 6d0952b
Show file tree
Hide file tree
Showing 9 changed files with 447 additions and 137 deletions.
222 changes: 133 additions & 89 deletions sphinx_needs/builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Iterable, List, Optional, Set
from typing import Iterable, List, Optional, Sequence, Set

from docutils import nodes
from sphinx import version_info
Expand All @@ -8,28 +8,58 @@

from sphinx_needs.config import NeedsSphinxConfig
from sphinx_needs.data import NeedsInfoType, SphinxNeedsData
from sphinx_needs.directives.need import post_process_needs_data
from sphinx_needs.logging import get_logger
from sphinx_needs.needsfile import NeedsList

log = get_logger(__name__)
LOGGER = get_logger(__name__)


class NeedsBuilder(Builder):
"""Output the needs data as a JSON file,
filtering by the ``needs_builder_filter`` config option if set,
and writing to ``needs.json`` (or the ``needs_file`` config option if set)
in the output folder.
Note this builder normally completely skips the write phase,
where all documents are post-transformed, to improve performance.
It is assumed all need data is already read in the read phase,
and the post-processing of the data is done in the finish phase.
However, if the ``export_id`` option is set for any directives,
the write phase is still run, since this filter data is currently only added there.
A warning is issued in this case.
"""

name = "needs"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def write_doc(self, docname: str, doctree: nodes.document) -> None:
pass
def get_outdated_docs(self) -> Iterable[str]:
return []

def write(
self,
build_docnames: Iterable[str],
updated_docnames: Sequence[str],
method: str = "update",
) -> None:
if not SphinxNeedsData(self.env).has_export_filters:
return
LOGGER.warning(
"At least one use of `export_id` directive option, requires a slower build", type="needs", subtype="build"
)
return super().write(build_docnames, updated_docnames, method)

def finish(self) -> None:
env = self.env
data = SphinxNeedsData(env)
post_process_needs_data(self.app)

data = SphinxNeedsData(self.env)
needs_config = NeedsSphinxConfig(self.env.config)
filters = data.get_or_create_filters()
version = getattr(env.config, "version", "unset")
needs_list = NeedsList(env.config, self.outdir, self.srcdir)
needs_config = NeedsSphinxConfig(env.config)
version = getattr(self.env.config, "version", "unset")
needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)

if needs_config.file:
needs_file = needs_config.file
Expand All @@ -38,7 +68,7 @@ def finish(self) -> None:
# check if needs.json file exists in conf.py directory
needs_json = os.path.join(self.srcdir, "needs.json")
if os.path.exists(needs_json):
log.info("needs.json found, but will not be used because needs_file not configured.")
LOGGER.info("needs.json found, but will not be used because needs_file not configured.")

# Clean needs_list from already stored needs of the current version.
# This is needed as needs could have been removed from documentation and if this is the case,
Expand All @@ -62,25 +92,30 @@ def finish(self) -> None:
try:
needs_list.write_json()
except Exception as e:
log.error(f"Error during writing json file: {e}")
LOGGER.error(f"Error during writing json file: {e}")
else:
log.info("Needs successfully exported")
LOGGER.info("Needs successfully exported")

def get_outdated_docs(self) -> Iterable[str]:
return []
def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
# only needed if the write phase is run
return ""

def prepare_writing(self, _docnames: Set[str]) -> None:
# only needed if the write phase is run
pass

def write_doc(self, docname: str, doctree: nodes.document) -> None:
# only needed if the write phase is run
pass

def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
# only needed if the write phase is run
pass

def cleanup(self) -> None:
# only needed if the write phase is run
pass

def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
return ""


def build_needs_json(app: Sphinx, _exception: Exception) -> None:
env = app.env
Expand All @@ -101,7 +136,87 @@ def build_needs_json(app: Sphinx, _exception: Exception) -> None:
needs_builder.finish()


class NeedsIdBuilder(Builder):
"""Output the needs data as multiple JSON files, one per need,
filtering by the ``needs_builder_filter`` config option if set,
and writing to the ``needs_id`` folder (or the ``build_json_per_id_path`` config option if set)
in the output folder.
Note this builder completely skips the write phase,
where all documents are post-transformed, to improve performance.
It is assumed all need data is already read in the read phase,
and the post-processing of the data is done in the finish phase.
"""

name = "needs_id"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def get_outdated_docs(self) -> Iterable[str]:
return []

def write(
self,
build_docnames: Iterable[str],
updated_docnames: Sequence[str],
method: str = "update",
) -> None:
pass

def finish(self) -> None:
post_process_needs_data(self.app)

data = SphinxNeedsData(self.env)
needs = data.get_or_create_needs().values() # We need a list of needs for later filter checks
version = getattr(self.env.config, "version", "unset")
needs_config = NeedsSphinxConfig(self.env.config)
filter_string = needs_config.builder_filter
from sphinx_needs.filter_common import filter_needs

filtered_needs = filter_needs(needs, needs_config, filter_string)
needs_build_json_per_id_path = needs_config.build_json_per_id_path
needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
if not os.path.exists(needs_dir):
os.makedirs(needs_dir, exist_ok=True)
for need in filtered_needs:
needs_list = NeedsList(self.env.config, self.outdir, self.srcdir)
needs_list.wipe_version(version)
needs_list.add_need(version, need)
id = need["id"]
try:
file_name = f"{id}.json"
needs_list.write_json(file_name, needs_dir)
except Exception as e:
LOGGER.error(f"Needs-ID Builder {id} error: {e}")
LOGGER.info("Needs_id successfully exported")


def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
env = app.env

if not NeedsSphinxConfig(env.config).build_json_per_id:
return

# Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
if isinstance(app.builder, NeedsIdBuilder):
return
try:
needs_id_builder = NeedsIdBuilder(app, env)
except TypeError:
needs_id_builder = NeedsIdBuilder(app)
needs_id_builder.set_environment(env)

needs_id_builder.finish()


class NeedumlsBuilder(Builder):
"""Write generated PlantUML input files to the output dir,
that were generated by need directives,
if they have a ``save`` field set,
denoting the path relative to the output folder.
"""

name = "needumls"

def write_doc(self, docname: str, doctree: nodes.document) -> None:
Expand All @@ -120,7 +235,7 @@ def finish(self) -> None:
if not os.path.exists(save_dir):
os.makedirs(save_dir, exist_ok=True)

log.info(f"Storing needuml data to file {save_path}.")
LOGGER.info(f"Storing needuml data to file {save_path}.")
with open(save_path, "w") as f:
f.write(puml_content)

Expand Down Expand Up @@ -161,74 +276,3 @@ def build_needumls_pumls(app: Sphinx, _exception: Exception) -> None:
needs_builder.set_environment(env)

needs_builder.finish()


class NeedsIdBuilder(Builder):
"""Json builder for needs, which creates separate json-files per need"""

name = "needs_id"
format = "needs"
file_suffix = ".txt"
links_suffix = None

def write_doc(self, docname: str, doctree: nodes.document) -> None:
pass

def finish(self) -> None:
env = self.env
data = SphinxNeedsData(env)
needs = data.get_or_create_needs().values() # We need a list of needs for later filter checks
version = getattr(env.config, "version", "unset")
needs_config = NeedsSphinxConfig(env.config)
filter_string = needs_config.builder_filter
from sphinx_needs.filter_common import filter_needs

filtered_needs = filter_needs(needs, needs_config, filter_string)
needs_build_json_per_id_path = needs_config.build_json_per_id_path
needs_dir = os.path.join(self.outdir, needs_build_json_per_id_path)
if not os.path.exists(needs_dir):
os.makedirs(needs_dir, exist_ok=True)
for need in filtered_needs:
needs_list = NeedsList(env.config, self.outdir, self.srcdir)
needs_list.wipe_version(version)
needs_list.add_need(version, need)
id = need["id"]
try:
file_name = f"{id}.json"
needs_list.write_json(file_name, needs_dir)
except Exception as e:
log.error(f"Needs-ID Builder {id} error: {e}")
log.info("Needs_id successfully exported")

def get_outdated_docs(self) -> Iterable[str]:
return []

def prepare_writing(self, _docnames: Set[str]) -> None:
pass

def write_doc_serialized(self, _docname: str, _doctree: nodes.document) -> None:
pass

def cleanup(self) -> None:
pass

def get_target_uri(self, _docname: str, _typ: Optional[str] = None) -> str:
return ""


def build_needs_id_json(app: Sphinx, _exception: Exception) -> None:
env = app.env

if not NeedsSphinxConfig(env.config).build_json_per_id:
return

# Do not create an additional needs_json for every needs_id, if builder is already "needs_id".
if isinstance(app.builder, NeedsIdBuilder):
return
try:
needs_id_builder = NeedsIdBuilder(app, env)
except TypeError:
needs_id_builder = NeedsIdBuilder(app)
needs_id_builder.set_environment(env)

needs_id_builder.finish()
18 changes: 17 additions & 1 deletion sphinx_needs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ class NeedsFilterType(TypedDict):
status: list[str]
tags: list[str]
types: list[str]
export_id: str
result: list[str]
amount: int
export_id: str
"""If set, the filter is exported with this ID in the needs.json file."""


class NeedsBaseDataType(TypedDict):
Expand Down Expand Up @@ -271,6 +272,7 @@ class NeedsFilteredBaseType(NeedsBaseDataType):
filter_code: list[str]
filter_func: None | str
export_id: str
"""If set, the filter is exported with this ID in the needs.json file."""


class NeedsFilteredDiagramBaseType(NeedsFilteredBaseType):
Expand Down Expand Up @@ -409,6 +411,18 @@ def get_or_create_needs(self) -> dict[str, NeedsInfoType]:
self.env.needs_all_needs = {}
return self.env.needs_all_needs

@property
def has_export_filters(self) -> bool:
"""Whether any filters have export IDs."""
try:
return self.env.needs_filters_export_id
except AttributeError:
return False

@has_export_filters.setter
def has_export_filters(self, value: bool) -> None:
self.env.needs_filters_export_id = value

def get_or_create_filters(self) -> dict[str, NeedsFilterType]:
"""Get all filters, mapped by ID.
Expand Down Expand Up @@ -593,6 +607,8 @@ def merge_data(_app: Sphinx, env: BuildEnvironment, _docnames: list[str], other:
needs = SphinxNeedsData(env).get_or_create_needs()
other_needs = SphinxNeedsData(other).get_or_create_needs()
needs.update(other_needs)
if SphinxNeedsData(other).has_export_filters:
SphinxNeedsData(env).has_export_filters = True

def _merge(name: str, is_complex_dict: bool = False) -> None:
# Update global needs dict
Expand Down
Loading

0 comments on commit 6d0952b

Please sign in to comment.