Skip to content

Commit

Permalink
Extract layout, audio files (#557)
Browse files Browse the repository at this point in the history
  • Loading branch information
hexapode authored Dec 18, 2024
1 parent 6d62fb8 commit 6338641
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
7 changes: 7 additions & 0 deletions llama_parse/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ class LlamaParse(BasePydanticReader):
default=False,
description="If set to true, the parser will extract/tag charts from the document.",
)
extract_layout: Optional[bool] = Field(
default=False,
description="If set to true, the parser will extract the layout information of the document. Cost 1 credit per page.",
)
fast_mode: Optional[bool] = Field(
default=False,
description="Note: Non compatible with gpt-4o. If set to true, the parser will use a faster mode to extract text from documents. This mode will skip OCR of images, and table/heading reconstruction.",
Expand Down Expand Up @@ -457,6 +461,9 @@ async def _create_job(
if self.extract_charts:
data["extract_charts"] = self.extract_charts

if self.extract_layout:
data["extract_layout"] = self.extract_layout

if self.fast_mode:
data["fast_mode"] = self.fast_mode

Expand Down
7 changes: 7 additions & 0 deletions llama_parse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,11 @@ class Language(str, Enum):
".xlr",
".eth",
".tsv",
".mp3",
".mp4",
".mpeg",
".mpga",
".m4a",
".wav",
".webm",
]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "llama-parse"
version = "0.5.17"
version = "0.5.18"
description = "Parse files into RAG-Optimized formats."
authors = ["Logan Markewich <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 6338641

Please sign in to comment.