From ca5ffa3defd805189614379f5933663d429736b0 Mon Sep 17 00:00:00 2001 From: Martin Kurtz <70766440+rakurtz@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:15:49 +0200 Subject: [PATCH] Update __init__.py redirecting stdout to stderr to be compliant with ocrmypdf. otherwise we brake the usage of ocrmypdf with piped stdin / stdout... --- ocrmypdf_easyocr/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ocrmypdf_easyocr/__init__.py b/ocrmypdf_easyocr/__init__.py index ec34109..2c6db6c 100644 --- a/ocrmypdf_easyocr/__init__.py +++ b/ocrmypdf_easyocr/__init__.py @@ -8,6 +8,8 @@ import logging import multiprocessing.managers import os +import sys +import contextlib import threading import traceback from pathlib import Path @@ -111,7 +113,12 @@ def _ocr_process(q: multiprocessing.Queue[Task], options): if reader is None: use_gpu = options.gpu languages = [ISO_639_3_2[lang] for lang in options.languages] - reader = easyocr.Reader(languages, use_gpu) + + # Redirect stdout to stderr during Reader initialization to be compliant with ocrmypdf + # otherwise piping a pdf output to stdout gets interfered with the progress bar of loading the model to ram + with contextlib.redirect_stdout(sys.stderr): + reader = easyocr.Reader(languages, use_gpu) + output_dict["output"] = reader.readtext( gray, batch_size=options.easyocr_batch_size )