From b05988e0595201cb97b0a7dbba6e845d2478f7d4 Mon Sep 17 00:00:00 2001 From: Marco Heinemann Date: Wed, 1 Nov 2023 21:11:41 +0100 Subject: [PATCH] Working split impl --- sphinx/builders/__init__.py | 52 +++++++++++++++++++++--------------- sphinx/builders/linkcheck.py | 2 +- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 834fb7eaed3..5a7b79ebf25 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -19,7 +19,7 @@ from sphinx.util.build_phase import BuildPhase from sphinx.util.console import bold # type: ignore[attr-defined] from sphinx.util.display import progress_message, status_iterator -from sphinx.util.docutils import sphinx_domains +from sphinx.util.docutils import LoggingReporter, sphinx_domains from sphinx.util.i18n import CatalogInfo, CatalogRepository, docname_to_domain from sphinx.util.osutil import SEP, ensuredir, relative_uri, relpath from sphinx.util.parallel import ParallelTasks, SerialTasks, make_chunks, parallel_available @@ -77,7 +77,7 @@ class Builder: supported_data_uri_images = False #: Builder attributes that should be returned from parallel #: post transformation, to be merged to the main builder in - #: merge_env_post_transform(). Attributes in the list must + #: merge_builder_post_tranform(). Attributes in the list must #: be pickleable. The approach improves performance when #: pickling and sending data over pipes because only a #: small subset of the builder is commonly needed for merging. @@ -133,7 +133,7 @@ def init(self) -> None: """ pass - def merge_env_post_transform(self, new_attrs: dict[str, Any]) -> None: + def merge_builder_post_tranform(self, new_attrs: dict[str, Any]) -> None: """Give builders the option to merge any parallel post-transform information to the main builder. This can be useful for the build-finish phase. The function is called once for each finished @@ -619,29 +619,37 @@ def _write_parallel(self, docnames: Sequence[str], nproc: int) -> None: # create a status_iterator to step progressbar after post-transforming progress_post_transform = status_iterator(chunks, __('post-transforming... '), "darkgreen", len(chunks), self.app.verbosity) + transformed_doctrees = {} if self.parallel_post_transform_ok: - def transform_chunk_done(args: list[tuple[str, NoneType]], attrs_pickle: bytes) -> None: - new_attrs: dict[str, Any] = pickle.loads(attrs_pickle) - self.merge_env_post_transform(new_attrs) + def transform_chunk_done(args: list[tuple[str, NoneType]], pickled_return: bytes) -> None: + doctrees: dict[str, dict] + new_attrs: dict[str, Any] + doctrees, new_attrs = pickle.loads(pickled_return) + for docname, doctree in doctrees.items(): + # re-attach docutils reporter + doctree.settings.env = self.env + doctree.reporter = LoggingReporter(self.env.doc2path(docname)) + transformed_doctrees[docname] = doctree + self.merge_builder_post_tranform(new_attrs) next(progress_post_transform) def post_transform(docs: list[tuple[str, nodes.document]]) -> bytes | None: self.app.phase = BuildPhase.WRITING - transformed_doctrees = [] + doctrees = {} for docname, doctree in docs: doctree = self.env.get_and_resolve_doctree(docname, self, doctree) - transformed_doctrees.append(doctree) + doctrees[docname] = doctree merge_attr = { attr: getattr(self, attr, None) for attr in self.post_transform_merge_attr } - return pickle.dumps(), pickle.dumps(merge_attr, pickle.HIGHEST_PROTOCOL) + return pickle.dumps((doctrees, merge_attr), pickle.HIGHEST_PROTOCOL) for chunk in chunks: - arg = [] + args = [] for docname in chunk: doctree = self.env.get_doctree_write(docname) - arg.append((docname, doctree)) - tasks.add_task(post_transform, arg, transform_chunk_done) + args.append((docname, doctree)) + tasks.add_task(post_transform, args, transform_chunk_done) # make sure all subprocesses have finished for post-transformers tasks.join() @@ -649,14 +657,17 @@ def post_transform(docs: list[tuple[str, nodes.document]]) -> bytes | None: # run post-transformers and doctree-resolved serialized in the main process for chunk in chunks: for docname in chunk: - self.env.get_and_resolve_doctree(docname, self) + doctree = self.env.get_and_resolve_doctree(docname, self) + transformed_doctrees[docname] = doctree + next(progress_post_transform) # run serialized tasks between post-transform and writing (images, search index) - for chunk in chunks: - for docname in chunk: - doctree = self.env.get_doctree_write(docname) - self.write_doc_serialized(docname, doctree) + progress_write_serial = status_iterator(transformed_doctrees, __('write doc serialized... '), "darkgreen", + len(transformed_doctrees), self.app.verbosity) + for docname, doctree in transformed_doctrees.items(): + self.write_doc_serialized(docname, doctree) + next(progress_write_serial) self.app.phase = BuildPhase.WRITING @@ -670,11 +681,10 @@ def write_process(docs: list[tuple[str, nodes.document]]) -> bytes | None: for docname, doctree in docs: self.write_doc(docname, doctree) for chunk in chunks: - arg = [] + args = [] for docname in chunk: - doctree = self.env.get_doctree_write(docname) - arg.append((docname, doctree)) - tasks.add_task(write_process, arg, write_chunk_done) + args.append((docname, transformed_doctrees[docname])) + tasks.add_task(write_process, args, write_chunk_done) # make sure all subprocesses have finished for writing tasks.join() diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index a9887b721cc..2f1403a7e5d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -68,7 +68,7 @@ def init(self) -> None: # set a timeout for non-responding servers socket.setdefaulttimeout(5.0) - def merge_env_post_transform(self, new_attrs: dict[str, Any]) -> None: + def merge_builder_post_tranform(self, new_attrs: dict[str, Any]) -> None: """Merge hyperlinks back to the main builder after parallel post-transformation.