superagent-ai · homanp · Mar 4, 2024 · Mar 4, 2024
diff --git a/service/code_interpreter.py b/service/code_interpreter.py
@@ -166,4 +166,8 @@ async def run_python(self, code: str):
             f"python {codefile_path}",
         )
 
+        if process.messages:
+            line = process.messages[0].line
+            return line
+
         return process
diff --git a/service/embedding.py b/service/embedding.py
@@ -52,9 +52,7 @@ def __init__(
         )
 
     def _get_strategy(self, type: str) -> Optional[str]:
-        strategies = {
-            "PDF": "auto",
-        }
+        strategies = {"PDF": "auto", "CSV": "auto"}
         try:
             return strategies[type]
         except KeyError:
@@ -80,9 +78,10 @@ async def _partition_file(
         #     strategy = "auto"
 
         logger.info(
-            f"Downloading and extracting elements from {file.url},"
+            f"Downloading and extracting elements from {file.url}, "
             f"using `{strategy}` strategy"
         )
+        print(file.suffix)
         with NamedTemporaryFile(suffix=file.suffix, delete=True) as temp_file:
             with requests.get(url=file.url) as response:
                 temp_file.write(response.content)
@@ -173,7 +172,6 @@ async def generate_chunks(
                             ),
                         }
                         chunks.append(chunk_data)
-
                 if config.splitter.name == "semantic":
                     elements = await self._partition_file(
                         file,

diff --git a/service/ingest.py b/service/ingest.py
@@ -14,6 +14,7 @@ async def handle_urls(
 ):
     embedding_service.files = files
     chunks = await embedding_service.generate_chunks(config=config)
+    print(chunks)
     summary_documents = await embedding_service.generate_summary_documents(
         documents=chunks
     )

diff --git a/service/router.py b/service/router.py
@@ -12,7 +12,11 @@
 from utils.summarise import SUMMARY_SUFFIX
 from vectordbs import BaseVectorDatabase, get_vector_service
 
-STRUTURED_DATA = [".xlsx", ".csv", ".json"]
+STRUCTURED_DATA = [
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "text/csv",
+    "application/json",
+]
 
 
 def create_route_layer() -> RouteLayer:
@@ -40,15 +44,14 @@ async def get_documents(
     if not len(chunks):
         logger.error(f"No documents found for query: {payload.input}")
         return []
-    is_structured = chunks[0].metadata.get("document_type") in STRUTURED_DATA
+    is_structured = chunks[0].metadata.get("filetype") in STRUCTURED_DATA
     reranked_chunks = []
     if is_structured and payload.interpreter_mode:
         async with CodeInterpreterService(
             session_id=payload.session_id, file_urls=[chunks[0].metadata.get("doc_url")]
         ) as service:
             code = await service.generate_code(query=payload.input)
-            response = await service.run_python(code=code)
-            output = response.stdout
+            output = await service.run_python(code=code)
             reranked_chunks.append(
                 BaseDocumentChunk(
                     id=str(uuid4()),