Skip to content

Commit

Permalink
Handle PDF conversion errors
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Feb 12, 2024
1 parent 636f586 commit 649d97a
Showing 1 changed file with 16 additions and 13 deletions.
29 changes: 16 additions & 13 deletions src/QueueProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,7 @@ def process(self, id, message, rc, ts):
extraction_data = extract_paragraphs_asynchronous(task)

if not extraction_data:
extraction_message = ExtractionMessage(
tenant=task.tenant,
task=task.task,
params=task.params,
success=False,
error_message="Error getting the xml from the pdf",
)

self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute()
self.logger.error(extraction_message.model_dump_json())
return True
raise FileNotFoundError

service_url = f"{config.SERVICE_HOST}:{config.SERVICE_PORT}"
results_url = f"{service_url}/get_paragraphs/{task.tenant}/{task.params.filename}"
Expand All @@ -78,10 +68,23 @@ def process(self, id, message, rc, ts):
self.pdf_paragraph_db.paragraphs.insert_one(json.loads(extraction_data_json))
self.logger.info(f"Results Redis message: {extraction_message}")
self.results_queue.sendMessage(delay=5).message(extraction_message.model_dump_json()).execute()
return True

except FileNotFoundError:
extraction_message = ExtractionMessage(
tenant=task.tenant,
task=task.task,
params=task.params,
success=False,
error_message="Error getting the xml from the pdf",
)

self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute()
self.logger.error(extraction_message.model_dump_json())

except Exception:
self.logger.error("error extracting the paragraphs", exc_info=1)
return True

return True

def subscribe_to_extractions_tasks_queue(self):
while True:
Expand Down

0 comments on commit 649d97a

Please sign in to comment.