Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner authored and ubmarco committed Jan 19, 2024
1 parent f29cfaf commit 957b4da
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 58 deletions.
111 changes: 62 additions & 49 deletions sphinx/builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,10 @@ class Builder:
#: post transformation, to be merged to the main builder in
#: :py:class:`~sphinx.builders.Builder.merge_builder_post_transform`.
#: Attributes in the list must be pickleable.
#: The approach improves performance when
#: pickling and sending data over pipes because only a
#: subset of the builder attributes are commonly needed for merging
#: to the main process builder instance.
post_transform_merge_attr: list[str] = []
#: The approach improves performance when pickling and sending data
#: over pipes because only a subset of the builder attributes
#: are commonly needed for merging to the main process builder instance.
post_transform_merge_attr: tuple[str, ...] = ()

def __init__(self, app: Sphinx, env: BuildEnvironment) -> None:
self.srcdir = app.srcdir
Expand Down Expand Up @@ -135,19 +134,21 @@ def init(self) -> None:
pass

def merge_builder_post_transform(self, new_attrs: dict[str, Any]) -> None:
"""Give builders the option to merge any post-transform information
coming from a parallel sub-process back to the main process builder.
This can be useful for extensions that consume that information
in the build-finish phase.
"""Merge post-transform data into the master builder.
This method allows builders to merge any post-transform information
coming from parallel subprocesses back into the builder in
the main process. This can be useful for extensions that consume
that information in the build-finish phase.
The function is called once for each finished subprocess.
Builders that implement this function must also define
the class attribute
:py:attr:`~sphinx.builders.Builder.post_transform_merge_attr` as it defines
which builder attributes shall be returned to the main process for merging.
Builders that implement this function must also define the class
attribute :py:attr:`~sphinx.builders.Builder.post_transform_merge_attr`
as it defines which builder attributes shall be returned to
the main process for merging.
The default implementation does nothing.
:param new_attrs: the attributes from the parallel subprocess to be
:param new_attrs: The attributes from the parallel subprocess to be
updated in the main builder
"""
pass
Expand Down Expand Up @@ -591,7 +592,8 @@ def write(

if self.parallel_ok:
# number of subprocesses is parallel-1 because the main process
# is busy loading and post-transforming doctrees and doing write_doc_serialized();
# is busy loading and post-transforming doctrees and doing
# write_doc_serialized();
# in case the global configuration enable_parallel_post_transform
# is active the main process does nothing
self._write_parallel(sorted(docnames),
Expand All @@ -610,6 +612,34 @@ def _write_serial(self, docnames: Sequence[str]) -> None:
self.write_doc(docname, doctree)

def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None:
def write_process_serial_post_transform(
docs: list[tuple[str, nodes.document]],
) -> None:
self.app.phase = BuildPhase.WRITING
# The doctree has been post-transformed (incl. write_doc_serialized)
# in the main process, only write_doc() is needed here
for docname, doctree in docs:
self.write_doc(docname, doctree)

def write_process_parallel_post_transform(docs: list[str]) -> bytes:
assert self.env.config.enable_parallel_post_transform
self.app.phase = BuildPhase.WRITING
# run post-transform, doctree-resolved and write_doc_serialized in parallel
for docname in docs:
doctree = self.env.get_and_resolve_doctree(docname, self)
# write_doc_serialized is assumed to be safe for all Sphinx
# internal builders. Some builders merge information from post-transform
# and write_doc_serialized back to the main process using
# Builder.post_transform_merge_attr and
# Builder.merge_builder_post_transform
self.write_doc_serialized(docname, doctree)
self.write_doc(docname, doctree)
merge_attr = {
attr: getattr(self, attr, None)
for attr in self.post_transform_merge_attr
}
return pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL)

# warm up caches/compile templates using the first document
firstname, docnames = docnames[0], docnames[1:]
self.app.phase = BuildPhase.RESOLVING
Expand All @@ -618,30 +648,6 @@ def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None:
self.write_doc_serialized(firstname, doctree)
self.write_doc(firstname, doctree)

def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None:
self.app.phase = BuildPhase.WRITING
if self.env.config.enable_parallel_post_transform:
# run post-transform, doctree-resolved and write_doc_serialized in parallel
for docname, _ in docs:
doctree = self.env.get_and_resolve_doctree(docname, self)
# write_doc_serialized is assumed to be safe for all Sphinx
# internal builders. Some builders merge information from post-transform
# and write_doc_serialized back to the main process using
# Builder.post_transform_merge_attr and
# Builder.merge_builder_post_transform
self.write_doc_serialized(docname, doctree)
self.write_doc(docname, doctree)
merge_attr = {
attr: getattr(self, attr, None)
for attr in self.post_transform_merge_attr
}
return pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL)
for docname, doctree in docs:
# doctree has been post-transformed (incl. write_doc_serialized)
# in the main process, only write_doc is needed here
self.write_doc(docname, doctree)
return None

tasks = ParallelTasks(nproc)
chunks = make_chunks(docnames, nproc)

Expand All @@ -650,23 +656,30 @@ def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None:
progress = status_iterator(chunks, __('writing output... '), "darkgreen",
len(chunks), self.app.verbosity)

def merge_builder(args: list[tuple[str, NoneType]], new_attrs_pickle: bytes) -> None:
if self.env.config.enable_parallel_post_transform:
new_attrs: dict[str, Any] = pickle.loads(new_attrs_pickle)
self.merge_builder_post_transform(new_attrs)
def on_chunk_done(args: list[tuple[str, NoneType]], result: NoneType) -> None:
next(progress)

def merge_builder(
args: list[tuple[str, NoneType]], pickled_new_attrs: bytes, /,
) -> None:
assert self.env.config.enable_parallel_post_transform
new_attrs: dict[str, Any] = pickle.loads(pickled_new_attrs)
self.merge_builder_post_transform(new_attrs)
next(progress)

self.app.phase = BuildPhase.RESOLVING
for chunk in chunks:
arg: list[tuple[str, nodes.document | None]] = []
for docname in chunk:
if not self.env.config.enable_parallel_post_transform:
if self.env.config.enable_parallel_post_transform:
tasks.add_task(write_process_parallel_post_transform,
chunk, merge_builder)
else:
arg = []
for docname in chunk:
doctree = self.env.get_and_resolve_doctree(docname, self)
self.write_doc_serialized(docname, doctree)
arg.append((docname, doctree))
else:
arg.append((docname, None))
tasks.add_task(write_process, arg, merge_builder)
tasks.add_task(write_process_serial_post_transform,
arg, on_chunk_done)

# make sure all threads have finished
tasks.join()
Expand Down
2 changes: 1 addition & 1 deletion sphinx/builders/_epub_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class EpubBuilder(StandaloneHTMLBuilder):
refuri_re = REFURI_RE
template_dir = ""
doctype = ""
post_transform_merge_attr = ['images']
post_transform_merge_attr = ('images',)

def init(self) -> None:
super().init()
Expand Down
16 changes: 9 additions & 7 deletions sphinx/builders/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ class StandaloneHTMLBuilder(Builder):

imgpath: str = ''
domain_indices: list[DOMAIN_INDEX_TYPE] = []
post_transform_merge_attr = ['images', 'indexer']
post_transform_merge_attr: tuple[str, ...] = ('images', 'indexer')

def __init__(self, app: Sphinx, env: BuildEnvironment) -> None:
super().__init__(app, env)
Expand Down Expand Up @@ -255,12 +255,14 @@ def merge_builder_post_transform(self, new_attrs: dict[str, Any]) -> None:
self.indexer = IndexBuilder(self.env, lang,
self.config.html_search_options,
self.config.html_search_scorer)
self.indexer._all_titles.update(new_attrs['indexer']._all_titles)
self.indexer._filenames.update(new_attrs['indexer']._filenames)
self.indexer._index_entries.update(new_attrs['indexer']._index_entries)
self.indexer._mapping.update(new_attrs['indexer']._mapping)
self.indexer._title_mapping.update(new_attrs['indexer']._title_mapping)
self.indexer._titles.update(new_attrs['indexer']._titles)
indexer_data = new_attrs['indexer']
self.indexer._all_titles |= indexer_data._all_titles
self.indexer._filenames |= indexer_data._filenames
self.indexer._index_entries |= indexer_data._index_entries
self.indexer._mapping |= indexer_data._mapping
self.indexer._title_mapping |= indexer_data._title_mapping
self.indexer._titles |= indexer_data._titles

# handle images
for filepath, filename in new_attrs['images'].items():
if filepath not in self.images:
Expand Down
2 changes: 1 addition & 1 deletion sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class CheckExternalLinksBuilder(DummyBuilder):
epilog = __('Look for any errors in the above output or in '
'%(outdir)s/output.txt')

post_transform_merge_attr = ['hyperlinks']
post_transform_merge_attr = ('hyperlinks',)

def init(self) -> None:
self.broken_hyperlinks = 0
Expand Down
1 change: 1 addition & 0 deletions sphinx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ class Config:
'gettext_allow_fuzzy_translations': _Opt(False, 'gettext', ()),
'translation_progress_classes': _Opt(
False, 'env', ENUM(True, False, 'translated', 'untranslated')),

'master_doc': _Opt('index', 'env', ()),
'root_doc': _Opt(lambda config: config.master_doc, 'env', ()),
# ``source_suffix`` type is actually ``dict[str, str | None]``:
Expand Down

0 comments on commit 957b4da

Please sign in to comment.