Skip to content

Commit

Permalink
Add progressbar for metadata_fixup
Browse files Browse the repository at this point in the history
Might take time for big files. Pdf.open() potentially is expensive as well, but QPDF doesn't give us progress feedback for that.

Closes Show progress during postprocessing #1313
  • Loading branch information
jbarlow83 committed May 19, 2024
1 parent 950c700 commit 9a3c5a3
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 9 deletions.
35 changes: 31 additions & 4 deletions src/ocrmypdf/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,18 +153,45 @@ def _set_language(pdf: Pdf, languages: list[str]):
pdf.Root.Lang = iso639_2


class MetadataProgress:
def __init__(self, progressbar_class):
self.progressbar_class = progressbar_class
self.progressbar = self.progressbar_class(
total=100, desc="Linearizing", unit='%'
)

def __enter__(self):
self.progressbar.__enter__()
return self

def __exit__(self, exc_type, exc_value, traceback):
return self.progressbar.__exit__(exc_type, exc_value, traceback)

def __call__(self, percent: int):
if not self.progressbar_class:
return
self.progressbar.update(completed=percent)


def metadata_fixup(
working_file: Path, context: PdfContext, pdf_save_settings: dict[str, Any]
) -> Path:
"""Fix certain metadata fields after Ghostscript PDF/A conversion.
"""Fix certain metadata fields whether PDF or PDF/A.
Override some of Ghostscript's metadata choices.
Also report on metadata in the input file that was not retained during
PDF/A conversion.
conversion.
"""
output_file = context.get_path('metafix.pdf')
options = context.options

with Pdf.open(context.origin) as original, Pdf.open(working_file) as pdf:
pbar_class = context.plugin_manager.hook.get_progressbar_class()
with (
Pdf.open(context.origin) as original,
Pdf.open(working_file) as pdf,
MetadataProgress(pbar_class) as pbar,
):
docinfo = get_docinfo(original, context)
with (
original.open_metadata(
Expand All @@ -182,6 +209,6 @@ def metadata_fixup(
report_on_metadata(options, meta_missing)

_set_language(pdf, options.languages)
pdf.save(output_file, **pdf_save_settings)
pdf.save(output_file, progress=pbar, **pdf_save_settings)

return output_file
16 changes: 11 additions & 5 deletions src/ocrmypdf/_progressbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __enter__(self):
def __exit__(self, *args):
"""Exit a progress bar context."""

def update(self, n=1):
def update(self, n=1, *, completed=None):
"""Update the progress bar by an increment.
For use within a progress bar context.
Expand All @@ -85,7 +85,7 @@ def __enter__(self):
def __exit__(self, exc_type, exc_value, traceback):
return False

def update(self, _arg=None):
def update(self, _arg=None, *, completed=None):
return


Expand All @@ -103,6 +103,7 @@ def __init__(
disable: bool = False,
**kwargs,
):
self._entered = False
self.progress = Progress(
TextColumn(
"[progress.description]{task.description}",
Expand Down Expand Up @@ -130,13 +131,18 @@ def __init__(

def __enter__(self):
self.progress.start()
self._entered = True
return self

def __exit__(self, exc_type, exc_value, traceback):
self.progress.refresh()
self.progress.stop()
return False

def update(self, value=None):
advance = self.unit_scale if value is None else value
self.progress.update(self.progress_bar, advance=advance)
def update(self, n=1, *, completed=None):
assert self._entered, "Progress bar must be entered before updating"
if completed is None:
advance = self.unit_scale if n is None else n
self.progress.update(self.progress_bar, advance=advance)
else:
self.progress.update(self.progress_bar, completed=completed)

0 comments on commit 9a3c5a3

Please sign in to comment.