Skip to content

Commit

Permalink
Improvement[Community] Improve Document Loaders and Splitters (#27568)
Browse files Browse the repository at this point in the history
- Fix word spelling error
- Add static method decorator
- Fix language splitter

Co-authored-by: Erick Friis <[email protected]>
  • Loading branch information
ZhangShenao and efriis authored Oct 24, 2024
1 parent 7345470 commit 455ab7d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
5 changes: 3 additions & 2 deletions libs/community/langchain_community/document_loaders/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class PyPDFLoader(BasePDFLoader):
loader = PyPDFLoader(
file_path = "./example_data/layout-parser-paper.pdf",
password = "my-pasword",
password = "my-password",
extract_images = True,
# headers = None
# extraction_mode = "plain",
Expand Down Expand Up @@ -572,7 +572,8 @@ def get_processed_pdf(self, pdf_id: str) -> str:
response = requests.get(url, headers=self._mathpix_headers)
return response.content.decode("utf-8")

def clean_pdf(self, contents: str) -> str:
@staticmethod
def clean_pdf(contents: str) -> str:
"""Clean the PDF file.
Args:
Expand Down
2 changes: 1 addition & 1 deletion libs/text-splitters/langchain_text_splitters/character.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def get_separators_for_language(language: Language) -> List[str]:
"\n\\\\begin{verse}",
"\n\\\\begin{verbatim}",
# Now split by math environments
"\n\\\begin{align}",
"\n\\\\begin{align}",
"$$",
"$",
# Now split by the normal type of lines
Expand Down

0 comments on commit 455ab7d

Please sign in to comment.