Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: 1257 - Load ocrd tool json locally #1260

Merged
merged 12 commits into from
Aug 1, 2024
2 changes: 1 addition & 1 deletion src/ocrd_network/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
DOCKER_RABBIT_MQ_FEATURES = "quorum_queue,implicit_default_bindings,classic_mirrored_queue_version"

NETWORK_PROTOCOLS = ["http://", "https://"]
OCRD_ALL_JSON_TOOLS_URL = "https://ocr-d.de/js/ocrd-all-tool.json"
OCRD_ALL_TOOL_JSON = "ocrd-all-tool.json"
# Used as a placeholder to lock all pages when no page_id is specified
SERVER_ALL_PAGES_PLACEHOLDER = "all_pages"

Expand Down
8 changes: 4 additions & 4 deletions src/ocrd_network/processing_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ocrd.task_sequence import ProcessorTask
from ocrd_utils import initLogging, getLogger
from .constants import AgentType, JobState, OCRD_ALL_JSON_TOOLS_URL, ServerApiTags
from .constants import AgentType, JobState, ServerApiTags
from .database import (
initiate_database,
db_get_processing_job,
Expand Down Expand Up @@ -58,7 +58,7 @@
)
from .tcp_to_uds_mets_proxy import MetsServerProxy
from .utils import (
download_ocrd_all_tool_json,
load_ocrd_all_tool_json,
expand_page_ids,
generate_id,
generate_workflow_content,
Expand Down Expand Up @@ -90,8 +90,8 @@ def __init__(self, config_path: str, host: str, port: int) -> None:
log_file = get_processing_server_logging_file_path(pid=getpid())
configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")

self.log.info(f"Downloading ocrd all tool json")
self.ocrd_all_tool_json = download_ocrd_all_tool_json(ocrd_all_url=OCRD_ALL_JSON_TOOLS_URL)
self.log.info(f"Loading ocrd all tool json")
self.ocrd_all_tool_json = load_ocrd_all_tool_json()
self.hostname = host
self.port = port

Expand Down
14 changes: 5 additions & 9 deletions src/ocrd_network/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from fastapi import UploadFile
from functools import wraps
from hashlib import md5
from json import loads
from pathlib import Path
from re import compile as re_compile, split as re_split
from requests import get as requests_get, Session as Session_TCP
Expand All @@ -14,7 +15,8 @@
from ocrd.resolver import Resolver
from ocrd.workspace import Workspace
from ocrd.mets_server import MpxReq
from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger
from ocrd_utils import config, generate_range, REGEX_PREFIX, safe_filename, getLogger, resource_string
from .constants import OCRD_ALL_TOOL_JSON
from .rabbitmq_utils import OcrdResultMessage


Expand Down Expand Up @@ -92,14 +94,8 @@ def is_url_responsive(url: str, tries: int = 1, wait_time: int = 3) -> bool:
return False


def download_ocrd_all_tool_json(ocrd_all_url: str):
if not ocrd_all_url:
raise ValueError(f"The URL of ocrd all tool json is empty")
headers = {"Accept": "application/json"}
response = Session_TCP().get(ocrd_all_url, headers=headers)
if not response.status_code == 200:
raise ValueError(f"Failed to download ocrd all tool json from: '{ocrd_all_url}'")
return response.json()
def load_ocrd_all_tool_json():
return loads(resource_string('ocrd', OCRD_ALL_TOOL_JSON))


def post_to_callback_url(logger, callback_url: str, result_message: OcrdResultMessage):
Expand Down
Loading