Handle PDF conversion errors

huridocs · Feb 12, 2024 · 649d97a · 649d97a
1 parent 636f586
commit 649d97a
Showing 1 changed file with 16 additions and 13 deletions.
diff --git a/src/QueueProcessor.py b/src/QueueProcessor.py
@@ -50,17 +50,7 @@ def process(self, id, message, rc, ts):
             extraction_data = extract_paragraphs_asynchronous(task)
 
             if not extraction_data:
-                extraction_message = ExtractionMessage(
-                    tenant=task.tenant,
-                    task=task.task,
-                    params=task.params,
-                    success=False,
-                    error_message="Error getting the xml from the pdf",
-                )
-
-                self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute()
-                self.logger.error(extraction_message.model_dump_json())
-                return True
+                raise FileNotFoundError
 
             service_url = f"{config.SERVICE_HOST}:{config.SERVICE_PORT}"
             results_url = f"{service_url}/get_paragraphs/{task.tenant}/{task.params.filename}"
@@ -78,10 +68,23 @@ def process(self, id, message, rc, ts):
             self.pdf_paragraph_db.paragraphs.insert_one(json.loads(extraction_data_json))
             self.logger.info(f"Results Redis message: {extraction_message}")
             self.results_queue.sendMessage(delay=5).message(extraction_message.model_dump_json()).execute()
-            return True
+
+        except FileNotFoundError:
+            extraction_message = ExtractionMessage(
+                tenant=task.tenant,
+                task=task.task,
+                params=task.params,
+                success=False,
+                error_message="Error getting the xml from the pdf",
+            )
+
+            self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute()
+            self.logger.error(extraction_message.model_dump_json())
+
         except Exception:
             self.logger.error("error extracting the paragraphs", exc_info=1)
-            return True
+
+        return True
 
     def subscribe_to_extractions_tasks_queue(self):
         while True: