Merge pull request #56 from VikParuchuri/dev

Fix ray version
VikParuchuri · Jan 3, 2024 · ad6c97c · ad6c97c
2 parents 5ff2c23 + fd261fb
commit ad6c97c
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 37 deletions.
diff --git a/convert.py b/convert.py
@@ -87,7 +87,6 @@ def main():
         num_gpus=1 if settings.CUDA else 0,
         storage=settings.RAY_CACHE_PATH,
         _temp_dir=settings.RAY_CACHE_PATH,
-        dashboard_host=settings.RAY_DASHBOARD_HOST,
         log_to_driver=settings.DEBUG
     )
 

diff --git a/marker/debug/data.py b/marker/debug/data.py
@@ -21,11 +21,10 @@ def dump_equation_debug_data(doc, images, converted_spans):
     assert len(converted_spans) == len(images)
 
     data_lines = []
-    for idx, (image, converted_span) in enumerate(zip(images, converted_spans)):
+    for idx, (pil_image, converted_span) in enumerate(zip(images, converted_spans)):
         if converted_span is None:
             continue
         # Image is a BytesIO object
-        pil_image = Image.open(image)
         img_bytes = io.BytesIO()
         pil_image.save(img_bytes, format="WEBP", lossless=True)
         b64_image = base64.b64encode(img_bytes.getvalue()).decode("utf-8")

diff --git a/marker/settings.py b/marker/settings.py
@@ -78,7 +78,7 @@ def TORCH_DEVICE_MODEL(self) -> str:
     TEXIFY_MODEL_MAX: int = 384 # Max inference length for texify
     TEXIFY_TOKEN_BUFFER: int = 256 # Number of tokens to buffer above max for texify
     TEXIFY_DPI: int = 96 # DPI to render images at
-    TEXIFY_BATCH_SIZE: int = 1 if TORCH_DEVICE_MODEL == "cpu" else 6 # Batch size for texify, don't batch on cpu
+    TEXIFY_BATCH_SIZE: int = 2 if TORCH_DEVICE_MODEL == "cpu" else 6 # Batch size for texify, lower on cpu due to float32
     TEXIFY_MODEL_NAME: str = "vikp/texify"
 
     # Layout model
@@ -102,7 +102,6 @@ def TORCH_DEVICE_MODEL(self) -> str:
 
     # Ray
     RAY_CACHE_PATH: Optional[str] = None # Where to save ray cache
-    RAY_DASHBOARD_HOST: str = "127.0.0.1"
     RAY_CORES_PER_WORKER: int = 1 # How many cpu cores to allocate per worker
 
     # Debug

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,11 +27,11 @@ PyMuPDF = "^1.23.5"
 pymupdf-fonts = "^1.0.5"
 pydantic = "^2.4.2"
 pydantic-settings = "^2.0.3"
-transformers = "^4.34.1"
+transformers = "^4.36.2"
 numpy = "^1.26.1"
 python-dotenv = "^1.0.0"
-torch = "^2.1.1"
-ray = "^2.7.1"
+torch = "^2.1.2"
+ray = "^2.9.0"
 tqdm = "^4.66.1"
 tabulate = "^0.9.0"
 thefuzz = "^0.20.0"