-
Notifications
You must be signed in to change notification settings - Fork 16k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'bagatur/community' of github.com:langchain-ai/langchain…
… into bagatur/community
- Loading branch information
Showing
48 changed files
with
1,938 additions
and
184 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
"""Agent toolkits contain integrations with various resources and services. | ||
LangChain has a large ecosystem of integrations with various external resources | ||
like local and remote file systems, APIs and databases. | ||
These integrations allow developers to create versatile applications that combine the | ||
power of LLMs with the ability to access, interact with and manipulate external | ||
resources. | ||
When developing an application, developers should inspect the capabilities and | ||
permissions of the tools that underlie the given agent toolkit, and determine | ||
whether permissions of the given toolkit are appropriate for the application. | ||
See [Security](https://python.langchain.com/docs/security) for more information. | ||
""" | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
from langchain_core._api.path import as_import_path | ||
|
||
from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit | ||
from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit | ||
from langchain_community.agent_toolkits.azure_cognitive_services import ( | ||
AzureCognitiveServicesToolkit, | ||
) | ||
from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501 | ||
create_conversational_retrieval_agent, | ||
) | ||
from langchain_community.agent_toolkits.file_management.toolkit import ( | ||
FileManagementToolkit, | ||
) | ||
from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit | ||
from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit | ||
from langchain_community.agent_toolkits.json.base import create_json_agent | ||
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit | ||
from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit | ||
from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit | ||
from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit | ||
from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit | ||
from langchain_community.agent_toolkits.openapi.base import create_openapi_agent | ||
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit | ||
from langchain_community.agent_toolkits.playwright.toolkit import ( | ||
PlayWrightBrowserToolkit, | ||
) | ||
from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent | ||
from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent | ||
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit | ||
from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit | ||
from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent | ||
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit | ||
from langchain_community.agent_toolkits.sql.base import create_sql_agent | ||
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit | ||
from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit | ||
from langchain_community.agent_toolkits.vectorstore.base import ( | ||
create_vectorstore_agent, | ||
create_vectorstore_router_agent, | ||
) | ||
from langchain_community.agent_toolkits.vectorstore.toolkit import ( | ||
VectorStoreInfo, | ||
VectorStoreRouterToolkit, | ||
VectorStoreToolkit, | ||
) | ||
from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit | ||
from langchain_community.tools.retriever import create_retriever_tool | ||
|
||
DEPRECATED_AGENTS = [ | ||
"create_csv_agent", | ||
"create_pandas_dataframe_agent", | ||
"create_xorbits_agent", | ||
"create_python_agent", | ||
"create_spark_dataframe_agent", | ||
] | ||
|
||
|
||
def __getattr__(name: str) -> Any: | ||
"""Get attr name.""" | ||
if name in DEPRECATED_AGENTS: | ||
relative_path = as_import_path(Path(__file__).parent, suffix=name) | ||
old_path = "langchain." + relative_path | ||
new_path = "langchain_experimental." + relative_path | ||
raise ImportError( | ||
f"{name} has been moved to langchain experimental. " | ||
"See https://github.com/langchain-ai/langchain/discussions/11680" | ||
"for more information.\n" | ||
f"Please update your import statement from: `{old_path}` to `{new_path}`." | ||
) | ||
raise AttributeError(f"{name} does not exist") | ||
|
||
|
||
__all__ = [ | ||
"AINetworkToolkit", | ||
"AmadeusToolkit", | ||
"AzureCognitiveServicesToolkit", | ||
"FileManagementToolkit", | ||
"GmailToolkit", | ||
"JiraToolkit", | ||
"JsonToolkit", | ||
"MultionToolkit", | ||
"NasaToolkit", | ||
"NLAToolkit", | ||
"O365Toolkit", | ||
"OpenAPIToolkit", | ||
"PlayWrightBrowserToolkit", | ||
"PowerBIToolkit", | ||
"SlackToolkit", | ||
"SteamToolkit", | ||
"SQLDatabaseToolkit", | ||
"SparkSQLToolkit", | ||
"VectorStoreInfo", | ||
"VectorStoreRouterToolkit", | ||
"VectorStoreToolkit", | ||
"ZapierToolkit", | ||
"create_json_agent", | ||
"create_openapi_agent", | ||
"create_pbi_agent", | ||
"create_pbi_chat_agent", | ||
"create_spark_sql_agent", | ||
"create_sql_agent", | ||
"create_vectorstore_agent", | ||
"create_vectorstore_router_agent", | ||
"create_conversational_retrieval_agent", | ||
"create_retriever_tool", | ||
] |
147 changes: 147 additions & 0 deletions
147
...ity_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
"""Use to load blobs from the local file system.""" | ||
from pathlib import Path | ||
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union | ||
|
||
from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
def _make_iterator( | ||
length_func: Callable[[], int], show_progress: bool = False | ||
) -> Callable[[Iterable[T]], Iterator[T]]: | ||
"""Create a function that optionally wraps an iterable in tqdm.""" | ||
if show_progress: | ||
try: | ||
from tqdm.auto import tqdm | ||
except ImportError: | ||
raise ImportError( | ||
"You must install tqdm to use show_progress=True." | ||
"You can install tqdm with `pip install tqdm`." | ||
) | ||
|
||
# Make sure to provide `total` here so that tqdm can show | ||
# a progress bar that takes into account the total number of files. | ||
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]: | ||
"""Wrap an iterable in a tqdm progress bar.""" | ||
return tqdm(iterable, total=length_func()) | ||
|
||
iterator = _with_tqdm | ||
else: | ||
iterator = iter # type: ignore | ||
|
||
return iterator | ||
|
||
|
||
# PUBLIC API | ||
|
||
|
||
class FileSystemBlobLoader(BlobLoader): | ||
"""Load blobs in the local file system. | ||
Example: | ||
.. code-block:: python | ||
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader | ||
loader = FileSystemBlobLoader("/path/to/directory") | ||
for blob in loader.yield_blobs(): | ||
print(blob) | ||
""" # noqa: E501 | ||
|
||
def __init__( | ||
self, | ||
path: Union[str, Path], | ||
*, | ||
glob: str = "**/[!.]*", | ||
exclude: Sequence[str] = (), | ||
suffixes: Optional[Sequence[str]] = None, | ||
show_progress: bool = False, | ||
) -> None: | ||
"""Initialize with a path to directory and how to glob over it. | ||
Args: | ||
path: Path to directory to load from or path to file to load. | ||
If a path to a file is provided, glob/exclude/suffixes are ignored. | ||
glob: Glob pattern relative to the specified path | ||
by default set to pick up all non-hidden files | ||
exclude: patterns to exclude from results, use glob syntax | ||
suffixes: Provide to keep only files with these suffixes | ||
Useful when wanting to keep files with different suffixes | ||
Suffixes must include the dot, e.g. ".txt" | ||
show_progress: If true, will show a progress bar as the files are loaded. | ||
This forces an iteration through all matching files | ||
to count them prior to loading them. | ||
Examples: | ||
.. code-block:: python | ||
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader | ||
# Load a single file. | ||
loader = FileSystemBlobLoader("/path/to/file.txt") | ||
# Recursively load all text files in a directory. | ||
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt") | ||
# Recursively load all non-hidden files in a directory. | ||
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*") | ||
# Load all files in a directory without recursion. | ||
loader = FileSystemBlobLoader("/path/to/directory", glob="*") | ||
# Recursively load all files in a directory, except for py or pyc files. | ||
loader = FileSystemBlobLoader( | ||
"/path/to/directory", | ||
glob="**/*.txt", | ||
exclude=["**/*.py", "**/*.pyc"] | ||
) | ||
""" # noqa: E501 | ||
if isinstance(path, Path): | ||
_path = path | ||
elif isinstance(path, str): | ||
_path = Path(path) | ||
else: | ||
raise TypeError(f"Expected str or Path, got {type(path)}") | ||
|
||
self.path = _path.expanduser() # Expand user to handle ~ | ||
self.glob = glob | ||
self.suffixes = set(suffixes or []) | ||
self.show_progress = show_progress | ||
self.exclude = exclude | ||
|
||
def yield_blobs( | ||
self, | ||
) -> Iterable[Blob]: | ||
"""Yield blobs that match the requested pattern.""" | ||
iterator = _make_iterator( | ||
length_func=self.count_matching_files, show_progress=self.show_progress | ||
) | ||
|
||
for path in iterator(self._yield_paths()): | ||
yield Blob.from_path(path) | ||
|
||
def _yield_paths(self) -> Iterable[Path]: | ||
"""Yield paths that match the requested pattern.""" | ||
if self.path.is_file(): | ||
yield self.path | ||
return | ||
|
||
paths = self.path.glob(self.glob) | ||
for path in paths: | ||
if self.exclude: | ||
if any(path.match(glob) for glob in self.exclude): | ||
continue | ||
if path.is_file(): | ||
if self.suffixes and path.suffix not in self.suffixes: | ||
continue | ||
yield path | ||
|
||
def count_matching_files(self) -> int: | ||
"""Count files that match the pattern without loading them.""" | ||
# Carry out a full iteration to count the files without | ||
# materializing anything expensive in memory. | ||
num = 0 | ||
for _ in self._yield_paths(): | ||
num += 1 | ||
return num |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
70 changes: 70 additions & 0 deletions
70
...ts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
"""Code for generic / auxiliary parsers. | ||
This module contains some logic to help assemble more sophisticated parsers. | ||
""" | ||
from typing import Iterator, Mapping, Optional | ||
|
||
from langchain_core.documents import Document | ||
|
||
from langchain_community.document_loaders.base import BaseBlobParser | ||
from langchain_community.document_loaders.blob_loaders.schema import Blob | ||
|
||
|
||
class MimeTypeBasedParser(BaseBlobParser): | ||
"""Parser that uses `mime`-types to parse a blob. | ||
This parser is useful for simple pipelines where the mime-type is sufficient | ||
to determine how to parse a blob. | ||
To use, configure handlers based on mime-types and pass them to the initializer. | ||
Example: | ||
.. code-block:: python | ||
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser | ||
parser = MimeTypeBasedParser( | ||
handlers={ | ||
"application/pdf": ..., | ||
}, | ||
fallback_parser=..., | ||
) | ||
""" # noqa: E501 | ||
|
||
def __init__( | ||
self, | ||
handlers: Mapping[str, BaseBlobParser], | ||
*, | ||
fallback_parser: Optional[BaseBlobParser] = None, | ||
) -> None: | ||
"""Define a parser that uses mime-types to determine how to parse a blob. | ||
Args: | ||
handlers: A mapping from mime-types to functions that take a blob, parse it | ||
and return a document. | ||
fallback_parser: A fallback_parser parser to use if the mime-type is not | ||
found in the handlers. If provided, this parser will be | ||
used to parse blobs with all mime-types not found in | ||
the handlers. | ||
If not provided, a ValueError will be raised if the | ||
mime-type is not found in the handlers. | ||
""" | ||
self.handlers = handlers | ||
self.fallback_parser = fallback_parser | ||
|
||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: | ||
"""Load documents from a blob.""" | ||
mimetype = blob.mimetype | ||
|
||
if mimetype is None: | ||
raise ValueError(f"{blob} does not have a mimetype.") | ||
|
||
if mimetype in self.handlers: | ||
handler = self.handlers[mimetype] | ||
yield from handler.lazy_parse(blob) | ||
else: | ||
if self.fallback_parser is not None: | ||
yield from self.fallback_parser.lazy_parse(blob) | ||
else: | ||
raise ValueError(f"Unsupported mime type: {mimetype}") |
Oops, something went wrong.