Skip to content

Commit

Permalink
Working split impl
Browse files Browse the repository at this point in the history
  • Loading branch information
ubmarco committed Nov 1, 2023
1 parent 2a829b6 commit b05988e
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 22 deletions.
52 changes: 31 additions & 21 deletions sphinx/builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from sphinx.util.build_phase import BuildPhase
from sphinx.util.console import bold # type: ignore[attr-defined]
from sphinx.util.display import progress_message, status_iterator
from sphinx.util.docutils import sphinx_domains
from sphinx.util.docutils import LoggingReporter, sphinx_domains
from sphinx.util.i18n import CatalogInfo, CatalogRepository, docname_to_domain
from sphinx.util.osutil import SEP, ensuredir, relative_uri, relpath
from sphinx.util.parallel import ParallelTasks, SerialTasks, make_chunks, parallel_available
Expand Down Expand Up @@ -77,7 +77,7 @@ class Builder:
supported_data_uri_images = False
#: Builder attributes that should be returned from parallel
#: post transformation, to be merged to the main builder in
#: merge_env_post_transform(). Attributes in the list must
#: merge_builder_post_tranform(). Attributes in the list must
#: be pickleable. The approach improves performance when
#: pickling and sending data over pipes because only a
#: small subset of the builder is commonly needed for merging.
Expand Down Expand Up @@ -133,7 +133,7 @@ def init(self) -> None:
"""
pass

def merge_env_post_transform(self, new_attrs: dict[str, Any]) -> None:
def merge_builder_post_tranform(self, new_attrs: dict[str, Any]) -> None:
"""Give builders the option to merge any parallel post-transform
information to the main builder. This can be useful for the
build-finish phase. The function is called once for each finished
Expand Down Expand Up @@ -619,44 +619,55 @@ def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None:
# create a status_iterator to step progressbar after post-transforming
progress_post_transform = status_iterator(chunks, __('post-transforming... '), "darkgreen",

Check failure on line 620 in sphinx/builders/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

sphinx/builders/__init__.py:620:96: E501 Line too long (99 > 95 characters)
len(chunks), self.app.verbosity)
transformed_doctrees = {}
if self.parallel_post_transform_ok:
def transform_chunk_done(args: list[tuple[str, NoneType]], attrs_pickle: bytes) -> None:
new_attrs: dict[str, Any] = pickle.loads(attrs_pickle)
self.merge_env_post_transform(new_attrs)
def transform_chunk_done(args: list[tuple[str, NoneType]], pickled_return: bytes) -> None:

Check failure on line 624 in sphinx/builders/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

sphinx/builders/__init__.py:624:96: E501 Line too long (102 > 95 characters)
doctrees: dict[str, dict]
new_attrs: dict[str, Any]
doctrees, new_attrs = pickle.loads(pickled_return)
for docname, doctree in doctrees.items():
# re-attach docutils reporter
doctree.settings.env = self.env
doctree.reporter = LoggingReporter(self.env.doc2path(docname))
transformed_doctrees[docname] = doctree
self.merge_builder_post_tranform(new_attrs)
next(progress_post_transform)

def post_transform(docs: list[tuple[str, nodes.document]]) -> bytes | None:
self.app.phase = BuildPhase.WRITING
transformed_doctrees = []
doctrees = {}
for docname, doctree in docs:
doctree = self.env.get_and_resolve_doctree(docname, self, doctree)
transformed_doctrees.append(doctree)
doctrees[docname] = doctree
merge_attr = {
attr: getattr(self, attr, None)
for attr in self.post_transform_merge_attr
}
return pickle.dumps(), pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL)
return pickle.dumps((doctrees, merge_attr), pickle.HIGHEST_PROTOCOL)
for chunk in chunks:
arg = []
args = []
for docname in chunk:
doctree = self.env.get_doctree_write(docname)
arg.append((docname, doctree))
tasks.add_task(post_transform, arg, transform_chunk_done)
args.append((docname, doctree))
tasks.add_task(post_transform, args, transform_chunk_done)

# make sure all subprocesses have finished for post-transformers
tasks.join()
else:
# run post-transformers and doctree-resolved serialized in the main process
for chunk in chunks:
for docname in chunk:
self.env.get_and_resolve_doctree(docname, self)
doctree = self.env.get_and_resolve_doctree(docname, self)
transformed_doctrees[docname] = doctree

next(progress_post_transform)

# run serialized tasks between post-transform and writing (images, search index)
for chunk in chunks:
for docname in chunk:
doctree = self.env.get_doctree_write(docname)
self.write_doc_serialized(docname, doctree)
progress_write_serial = status_iterator(transformed_doctrees, __('write doc serialized... '), "darkgreen",

Check failure on line 666 in sphinx/builders/__init__.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

sphinx/builders/__init__.py:666:96: E501 Line too long (114 > 95 characters)
len(transformed_doctrees), self.app.verbosity)
for docname, doctree in transformed_doctrees.items():
self.write_doc_serialized(docname, doctree)
next(progress_write_serial)

self.app.phase = BuildPhase.WRITING

Expand All @@ -670,11 +681,10 @@ def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None:
for docname, doctree in docs:
self.write_doc(docname, doctree)
for chunk in chunks:
arg = []
args = []
for docname in chunk:
doctree = self.env.get_doctree_write(docname)
arg.append((docname, doctree))
tasks.add_task(write_process, arg, write_chunk_done)
args.append((docname, transformed_doctrees[docname]))
tasks.add_task(write_process, args, write_chunk_done)

# make sure all subprocesses have finished for writing
tasks.join()
Expand Down
2 changes: 1 addition & 1 deletion sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def init(self) -> None:
# set a timeout for non-responding servers
socket.setdefaulttimeout(5.0)

def merge_env_post_transform(self, new_attrs: dict[str, Any]) -> None:
def merge_builder_post_tranform(self, new_attrs: dict[str, Any]) -> None:
"""Merge hyperlinks back to the main builder after parallel
post-transformation.
Expand Down

0 comments on commit b05988e

Please sign in to comment.