Skip to content

Commit

Permalink
Fix race condition
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Jul 11, 2024
1 parent 94b849c commit 44b36a2
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ start:
docker compose up --build

stop:
docker compose stop ; docker compose stop -f docker-compose-test.yml
docker compose stop ; docker compose -f docker-compose-test.yml stop

start-test:
docker compose -f docker-compose-test.yml up --attach api-pdf-layout --attach queue-processor-pdf-layout --attach worker-pdf-layout --build
Expand Down
1 change: 0 additions & 1 deletion src/QueueProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from sentry_sdk.integrations.redis import RedisIntegration
import sentry_sdk

from PdfFile import PdfFile
from configuration import (
MONGO_HOST,
MONGO_PORT,
Expand Down
36 changes: 18 additions & 18 deletions src/extract_segments.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from configuration import DOCUMENT_LAYOUT_ANALYSIS_URL
from time import sleep

from configuration import DOCUMENT_LAYOUT_ANALYSIS_URL, service_logger
from data_model.SegmentBox import SegmentBox
from PdfFile import PdfFile
from data_model.ExtractionData import ExtractionData
Expand All @@ -10,11 +12,24 @@ def get_xml_name(task: Task) -> str:
return f"{task.tenant}__{task.params.filename.lower().replace('.pdf', '.xml')}"


def exists_file(tenant: str, file_name: str) -> bool:
for i in range(5):
pdf_file = PdfFile(tenant)
if pdf_file.get_path(file_name).exists():
return True

service_logger.info(f"File {pdf_file.get_path(file_name)} exists")
sleep(1)

return False


def extract_segments(task: Task, xml_file_name: str = "") -> ExtractionData:
pdf_file = PdfFile(task.tenant)
if not pdf_file.get_path(task.params.filename).exists():
if not exists_file(task.tenant, task.params.filename):
raise FileNotFoundError

pdf_file = PdfFile(task.tenant)

with open(pdf_file.get_path(task.params.filename), "rb") as stream:
files = {"file": stream}

Expand All @@ -34,18 +49,3 @@ def extract_segments(task: Task, xml_file_name: str = "") -> ExtractionData:
page_height=0 if not segments else segments[0].page_height,
page_width=0 if not segments else segments[0].page_width,
)


if __name__ == "__main__":
a = {
"left": 1,
"top": 1,
"width": 1,
"height": 1,
"page_number": 1,
"page_width": 1,
"page_height": 1,
"text": "",
"type": "Section_Header",
}
print(SegmentBox(**a))

0 comments on commit 44b36a2

Please sign in to comment.