Skip to content

Commit

Permalink
Merge branch 'bagatur/community' of github.com:langchain-ai/langchain…
Browse files Browse the repository at this point in the history
… into bagatur/community
  • Loading branch information
efriis committed Dec 7, 2023
2 parents bbc795b + 6a0a7a7 commit f0304a8
Show file tree
Hide file tree
Showing 48 changed files with 1,938 additions and 184 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Agent toolkits contain integrations with various resources and services.
LangChain has a large ecosystem of integrations with various external resources
like local and remote file systems, APIs and databases.
These integrations allow developers to create versatile applications that combine the
power of LLMs with the ability to access, interact with and manipulate external
resources.
When developing an application, developers should inspect the capabilities and
permissions of the tools that underlie the given agent toolkit, and determine
whether permissions of the given toolkit are appropriate for the application.
See [Security](https://python.langchain.com/docs/security) for more information.
"""
from pathlib import Path
from typing import Any

from langchain_core._api.path import as_import_path

from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit
from langchain_community.agent_toolkits.azure_cognitive_services import (
AzureCognitiveServicesToolkit,
)
from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501
create_conversational_retrieval_agent,
)
from langchain_community.agent_toolkits.file_management.toolkit import (
FileManagementToolkit,
)
from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit
from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit
from langchain_community.agent_toolkits.json.base import create_json_agent
from langchain_community.agent_toolkits.json.toolkit import JsonToolkit
from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit
from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit
from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit
from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit
from langchain_community.agent_toolkits.openapi.base import create_openapi_agent
from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit
from langchain_community.agent_toolkits.playwright.toolkit import (
PlayWrightBrowserToolkit,
)
from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent
from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit
from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit
from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent
from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit
from langchain_community.agent_toolkits.sql.base import create_sql_agent
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit
from langchain_community.agent_toolkits.vectorstore.base import (
create_vectorstore_agent,
create_vectorstore_router_agent,
)
from langchain_community.agent_toolkits.vectorstore.toolkit import (
VectorStoreInfo,
VectorStoreRouterToolkit,
VectorStoreToolkit,
)
from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit
from langchain_community.tools.retriever import create_retriever_tool

DEPRECATED_AGENTS = [
"create_csv_agent",
"create_pandas_dataframe_agent",
"create_xorbits_agent",
"create_python_agent",
"create_spark_dataframe_agent",
]


def __getattr__(name: str) -> Any:
"""Get attr name."""
if name in DEPRECATED_AGENTS:
relative_path = as_import_path(Path(__file__).parent, suffix=name)
old_path = "langchain." + relative_path
new_path = "langchain_experimental." + relative_path
raise ImportError(
f"{name} has been moved to langchain experimental. "
"See https://github.com/langchain-ai/langchain/discussions/11680"
"for more information.\n"
f"Please update your import statement from: `{old_path}` to `{new_path}`."
)
raise AttributeError(f"{name} does not exist")


__all__ = [
"AINetworkToolkit",
"AmadeusToolkit",
"AzureCognitiveServicesToolkit",
"FileManagementToolkit",
"GmailToolkit",
"JiraToolkit",
"JsonToolkit",
"MultionToolkit",
"NasaToolkit",
"NLAToolkit",
"O365Toolkit",
"OpenAPIToolkit",
"PlayWrightBrowserToolkit",
"PowerBIToolkit",
"SlackToolkit",
"SteamToolkit",
"SQLDatabaseToolkit",
"SparkSQLToolkit",
"VectorStoreInfo",
"VectorStoreRouterToolkit",
"VectorStoreToolkit",
"ZapierToolkit",
"create_json_agent",
"create_openapi_agent",
"create_pbi_agent",
"create_pbi_chat_agent",
"create_spark_sql_agent",
"create_sql_agent",
"create_vectorstore_agent",
"create_vectorstore_router_agent",
"create_conversational_retrieval_agent",
"create_retriever_tool",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
"""Use to load blobs from the local file system."""
from pathlib import Path
from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union

from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader

T = TypeVar("T")


def _make_iterator(
length_func: Callable[[], int], show_progress: bool = False
) -> Callable[[Iterable[T]], Iterator[T]]:
"""Create a function that optionally wraps an iterable in tqdm."""
if show_progress:
try:
from tqdm.auto import tqdm
except ImportError:
raise ImportError(
"You must install tqdm to use show_progress=True."
"You can install tqdm with `pip install tqdm`."
)

# Make sure to provide `total` here so that tqdm can show
# a progress bar that takes into account the total number of files.
def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]:
"""Wrap an iterable in a tqdm progress bar."""
return tqdm(iterable, total=length_func())

iterator = _with_tqdm
else:
iterator = iter # type: ignore

return iterator


# PUBLIC API


class FileSystemBlobLoader(BlobLoader):
"""Load blobs in the local file system.
Example:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
loader = FileSystemBlobLoader("/path/to/directory")
for blob in loader.yield_blobs():
print(blob)
""" # noqa: E501

def __init__(
self,
path: Union[str, Path],
*,
glob: str = "**/[!.]*",
exclude: Sequence[str] = (),
suffixes: Optional[Sequence[str]] = None,
show_progress: bool = False,
) -> None:
"""Initialize with a path to directory and how to glob over it.
Args:
path: Path to directory to load from or path to file to load.
If a path to a file is provided, glob/exclude/suffixes are ignored.
glob: Glob pattern relative to the specified path
by default set to pick up all non-hidden files
exclude: patterns to exclude from results, use glob syntax
suffixes: Provide to keep only files with these suffixes
Useful when wanting to keep files with different suffixes
Suffixes must include the dot, e.g. ".txt"
show_progress: If true, will show a progress bar as the files are loaded.
This forces an iteration through all matching files
to count them prior to loading them.
Examples:
.. code-block:: python
from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader
# Load a single file.
loader = FileSystemBlobLoader("/path/to/file.txt")
# Recursively load all text files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt")
# Recursively load all non-hidden files in a directory.
loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*")
# Load all files in a directory without recursion.
loader = FileSystemBlobLoader("/path/to/directory", glob="*")
# Recursively load all files in a directory, except for py or pyc files.
loader = FileSystemBlobLoader(
"/path/to/directory",
glob="**/*.txt",
exclude=["**/*.py", "**/*.pyc"]
)
""" # noqa: E501
if isinstance(path, Path):
_path = path
elif isinstance(path, str):
_path = Path(path)
else:
raise TypeError(f"Expected str or Path, got {type(path)}")

self.path = _path.expanduser() # Expand user to handle ~
self.glob = glob
self.suffixes = set(suffixes or [])
self.show_progress = show_progress
self.exclude = exclude

def yield_blobs(
self,
) -> Iterable[Blob]:
"""Yield blobs that match the requested pattern."""
iterator = _make_iterator(
length_func=self.count_matching_files, show_progress=self.show_progress
)

for path in iterator(self._yield_paths()):
yield Blob.from_path(path)

def _yield_paths(self) -> Iterable[Path]:
"""Yield paths that match the requested pattern."""
if self.path.is_file():
yield self.path
return

paths = self.path.glob(self.glob)
for path in paths:
if self.exclude:
if any(path.match(glob) for glob in self.exclude):
continue
if path.is_file():
if self.suffixes and path.suffix not in self.suffixes:
continue
yield path

def count_matching_files(self) -> int:
"""Count files that match the pattern without loading them."""
# Carry out a full iteration to count the files without
# materializing anything expensive in memory.
num = 0
for _ in self._yield_paths():
num += 1
return num
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, Iterator, List, Literal, Optional, Sequence, Union, \
TYPE_CHECKING
from typing import (
TYPE_CHECKING,
Any,
Iterator,
List,
Literal,
Optional,
Sequence,
Union,
)

from langchain_core.documents import Document

Expand Down Expand Up @@ -84,7 +92,7 @@ class GenericLoader(BaseLoader):
parser=PyPDFParser()
)
"""
""" # noqa: E501

def __init__(
self,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Code for generic / auxiliary parsers.
This module contains some logic to help assemble more sophisticated parsers.
"""
from typing import Iterator, Mapping, Optional

from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseBlobParser
from langchain_community.document_loaders.blob_loaders.schema import Blob


class MimeTypeBasedParser(BaseBlobParser):
"""Parser that uses `mime`-types to parse a blob.
This parser is useful for simple pipelines where the mime-type is sufficient
to determine how to parse a blob.
To use, configure handlers based on mime-types and pass them to the initializer.
Example:
.. code-block:: python
from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser
parser = MimeTypeBasedParser(
handlers={
"application/pdf": ...,
},
fallback_parser=...,
)
""" # noqa: E501

def __init__(
self,
handlers: Mapping[str, BaseBlobParser],
*,
fallback_parser: Optional[BaseBlobParser] = None,
) -> None:
"""Define a parser that uses mime-types to determine how to parse a blob.
Args:
handlers: A mapping from mime-types to functions that take a blob, parse it
and return a document.
fallback_parser: A fallback_parser parser to use if the mime-type is not
found in the handlers. If provided, this parser will be
used to parse blobs with all mime-types not found in
the handlers.
If not provided, a ValueError will be raised if the
mime-type is not found in the handlers.
"""
self.handlers = handlers
self.fallback_parser = fallback_parser

def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Load documents from a blob."""
mimetype = blob.mimetype

if mimetype is None:
raise ValueError(f"{blob} does not have a mimetype.")

if mimetype in self.handlers:
handler = self.handlers[mimetype]
yield from handler.lazy_parse(blob)
else:
if self.fallback_parser is not None:
yield from self.fallback_parser.lazy_parse(blob)
else:
raise ValueError(f"Unsupported mime type: {mimetype}")
Loading

0 comments on commit f0304a8

Please sign in to comment.