diff --git a/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py new file mode 100644 index 0000000000000..655d053746618 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/agent_toolkits/__init__.py @@ -0,0 +1,123 @@ +"""Agent toolkits contain integrations with various resources and services. + +LangChain has a large ecosystem of integrations with various external resources +like local and remote file systems, APIs and databases. + +These integrations allow developers to create versatile applications that combine the +power of LLMs with the ability to access, interact with and manipulate external +resources. + +When developing an application, developers should inspect the capabilities and +permissions of the tools that underlie the given agent toolkit, and determine +whether permissions of the given toolkit are appropriate for the application. + +See [Security](https://python.langchain.com/docs/security) for more information. +""" +from pathlib import Path +from typing import Any + +from langchain_core._api.path import as_import_path + +from langchain_community.agent_toolkits.ainetwork.toolkit import AINetworkToolkit +from langchain_community.agent_toolkits.amadeus.toolkit import AmadeusToolkit +from langchain_community.agent_toolkits.azure_cognitive_services import ( + AzureCognitiveServicesToolkit, +) +from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501 + create_conversational_retrieval_agent, +) +from langchain_community.agent_toolkits.file_management.toolkit import ( + FileManagementToolkit, +) +from langchain_community.agent_toolkits.gmail.toolkit import GmailToolkit +from langchain_community.agent_toolkits.jira.toolkit import JiraToolkit +from langchain_community.agent_toolkits.json.base import create_json_agent +from langchain_community.agent_toolkits.json.toolkit import JsonToolkit +from langchain_community.agent_toolkits.multion.toolkit import MultionToolkit +from langchain_community.agent_toolkits.nasa.toolkit import NasaToolkit +from langchain_community.agent_toolkits.nla.toolkit import NLAToolkit +from langchain_community.agent_toolkits.office365.toolkit import O365Toolkit +from langchain_community.agent_toolkits.openapi.base import create_openapi_agent +from langchain_community.agent_toolkits.openapi.toolkit import OpenAPIToolkit +from langchain_community.agent_toolkits.playwright.toolkit import ( + PlayWrightBrowserToolkit, +) +from langchain_community.agent_toolkits.powerbi.base import create_pbi_agent +from langchain_community.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent +from langchain_community.agent_toolkits.powerbi.toolkit import PowerBIToolkit +from langchain_community.agent_toolkits.slack.toolkit import SlackToolkit +from langchain_community.agent_toolkits.spark_sql.base import create_spark_sql_agent +from langchain_community.agent_toolkits.spark_sql.toolkit import SparkSQLToolkit +from langchain_community.agent_toolkits.sql.base import create_sql_agent +from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit +from langchain_community.agent_toolkits.steam.toolkit import SteamToolkit +from langchain_community.agent_toolkits.vectorstore.base import ( + create_vectorstore_agent, + create_vectorstore_router_agent, +) +from langchain_community.agent_toolkits.vectorstore.toolkit import ( + VectorStoreInfo, + VectorStoreRouterToolkit, + VectorStoreToolkit, +) +from langchain_community.agent_toolkits.zapier.toolkit import ZapierToolkit +from langchain_community.tools.retriever import create_retriever_tool + +DEPRECATED_AGENTS = [ + "create_csv_agent", + "create_pandas_dataframe_agent", + "create_xorbits_agent", + "create_python_agent", + "create_spark_dataframe_agent", +] + + +def __getattr__(name: str) -> Any: + """Get attr name.""" + if name in DEPRECATED_AGENTS: + relative_path = as_import_path(Path(__file__).parent, suffix=name) + old_path = "langchain." + relative_path + new_path = "langchain_experimental." + relative_path + raise ImportError( + f"{name} has been moved to langchain experimental. " + "See https://github.com/langchain-ai/langchain/discussions/11680" + "for more information.\n" + f"Please update your import statement from: `{old_path}` to `{new_path}`." + ) + raise AttributeError(f"{name} does not exist") + + +__all__ = [ + "AINetworkToolkit", + "AmadeusToolkit", + "AzureCognitiveServicesToolkit", + "FileManagementToolkit", + "GmailToolkit", + "JiraToolkit", + "JsonToolkit", + "MultionToolkit", + "NasaToolkit", + "NLAToolkit", + "O365Toolkit", + "OpenAPIToolkit", + "PlayWrightBrowserToolkit", + "PowerBIToolkit", + "SlackToolkit", + "SteamToolkit", + "SQLDatabaseToolkit", + "SparkSQLToolkit", + "VectorStoreInfo", + "VectorStoreRouterToolkit", + "VectorStoreToolkit", + "ZapierToolkit", + "create_json_agent", + "create_openapi_agent", + "create_pbi_agent", + "create_pbi_chat_agent", + "create_spark_sql_agent", + "create_sql_agent", + "create_vectorstore_agent", + "create_vectorstore_router_agent", + "create_conversational_retrieval_agent", + "create_retriever_tool", +] diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py new file mode 100644 index 0000000000000..0fcdd4438ee46 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py @@ -0,0 +1,147 @@ +"""Use to load blobs from the local file system.""" +from pathlib import Path +from typing import Callable, Iterable, Iterator, Optional, Sequence, TypeVar, Union + +from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader + +T = TypeVar("T") + + +def _make_iterator( + length_func: Callable[[], int], show_progress: bool = False +) -> Callable[[Iterable[T]], Iterator[T]]: + """Create a function that optionally wraps an iterable in tqdm.""" + if show_progress: + try: + from tqdm.auto import tqdm + except ImportError: + raise ImportError( + "You must install tqdm to use show_progress=True." + "You can install tqdm with `pip install tqdm`." + ) + + # Make sure to provide `total` here so that tqdm can show + # a progress bar that takes into account the total number of files. + def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]: + """Wrap an iterable in a tqdm progress bar.""" + return tqdm(iterable, total=length_func()) + + iterator = _with_tqdm + else: + iterator = iter # type: ignore + + return iterator + + +# PUBLIC API + + +class FileSystemBlobLoader(BlobLoader): + """Load blobs in the local file system. + + Example: + + .. code-block:: python + + from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader + loader = FileSystemBlobLoader("/path/to/directory") + for blob in loader.yield_blobs(): + print(blob) + """ # noqa: E501 + + def __init__( + self, + path: Union[str, Path], + *, + glob: str = "**/[!.]*", + exclude: Sequence[str] = (), + suffixes: Optional[Sequence[str]] = None, + show_progress: bool = False, + ) -> None: + """Initialize with a path to directory and how to glob over it. + + Args: + path: Path to directory to load from or path to file to load. + If a path to a file is provided, glob/exclude/suffixes are ignored. + glob: Glob pattern relative to the specified path + by default set to pick up all non-hidden files + exclude: patterns to exclude from results, use glob syntax + suffixes: Provide to keep only files with these suffixes + Useful when wanting to keep files with different suffixes + Suffixes must include the dot, e.g. ".txt" + show_progress: If true, will show a progress bar as the files are loaded. + This forces an iteration through all matching files + to count them prior to loading them. + + Examples: + + .. code-block:: python + from langchain_community.document_loaders.blob_loaders import FileSystemBlobLoader + + # Load a single file. + loader = FileSystemBlobLoader("/path/to/file.txt") + + # Recursively load all text files in a directory. + loader = FileSystemBlobLoader("/path/to/directory", glob="**/*.txt") + + # Recursively load all non-hidden files in a directory. + loader = FileSystemBlobLoader("/path/to/directory", glob="**/[!.]*") + + # Load all files in a directory without recursion. + loader = FileSystemBlobLoader("/path/to/directory", glob="*") + + # Recursively load all files in a directory, except for py or pyc files. + loader = FileSystemBlobLoader( + "/path/to/directory", + glob="**/*.txt", + exclude=["**/*.py", "**/*.pyc"] + ) + """ # noqa: E501 + if isinstance(path, Path): + _path = path + elif isinstance(path, str): + _path = Path(path) + else: + raise TypeError(f"Expected str or Path, got {type(path)}") + + self.path = _path.expanduser() # Expand user to handle ~ + self.glob = glob + self.suffixes = set(suffixes or []) + self.show_progress = show_progress + self.exclude = exclude + + def yield_blobs( + self, + ) -> Iterable[Blob]: + """Yield blobs that match the requested pattern.""" + iterator = _make_iterator( + length_func=self.count_matching_files, show_progress=self.show_progress + ) + + for path in iterator(self._yield_paths()): + yield Blob.from_path(path) + + def _yield_paths(self) -> Iterable[Path]: + """Yield paths that match the requested pattern.""" + if self.path.is_file(): + yield self.path + return + + paths = self.path.glob(self.glob) + for path in paths: + if self.exclude: + if any(path.match(glob) for glob in self.exclude): + continue + if path.is_file(): + if self.suffixes and path.suffix not in self.suffixes: + continue + yield path + + def count_matching_files(self) -> int: + """Count files that match the pattern without loading them.""" + # Carry out a full iteration to count the files without + # materializing anything expensive in memory. + num = 0 + for _ in self._yield_paths(): + num += 1 + return num diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py index 8028dafab5775..0ec6ca60bdf09 100644 --- a/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/generic.py @@ -1,8 +1,16 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Iterator, List, Literal, Optional, Sequence, Union, \ - TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, + Iterator, + List, + Literal, + Optional, + Sequence, + Union, +) from langchain_core.documents import Document @@ -84,7 +92,7 @@ class GenericLoader(BaseLoader): parser=PyPDFParser() ) - """ + """ # noqa: E501 def __init__( self, diff --git a/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py new file mode 100644 index 0000000000000..6b6b91b93ee08 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_loaders/parsers/generic.py @@ -0,0 +1,70 @@ +"""Code for generic / auxiliary parsers. + +This module contains some logic to help assemble more sophisticated parsers. +""" +from typing import Iterator, Mapping, Optional + +from langchain_core.documents import Document + +from langchain_community.document_loaders.base import BaseBlobParser +from langchain_community.document_loaders.blob_loaders.schema import Blob + + +class MimeTypeBasedParser(BaseBlobParser): + """Parser that uses `mime`-types to parse a blob. + + This parser is useful for simple pipelines where the mime-type is sufficient + to determine how to parse a blob. + + To use, configure handlers based on mime-types and pass them to the initializer. + + Example: + + .. code-block:: python + + from langchain_community.document_loaders.parsers.generic import MimeTypeBasedParser + + parser = MimeTypeBasedParser( + handlers={ + "application/pdf": ..., + }, + fallback_parser=..., + ) + """ # noqa: E501 + + def __init__( + self, + handlers: Mapping[str, BaseBlobParser], + *, + fallback_parser: Optional[BaseBlobParser] = None, + ) -> None: + """Define a parser that uses mime-types to determine how to parse a blob. + + Args: + handlers: A mapping from mime-types to functions that take a blob, parse it + and return a document. + fallback_parser: A fallback_parser parser to use if the mime-type is not + found in the handlers. If provided, this parser will be + used to parse blobs with all mime-types not found in + the handlers. + If not provided, a ValueError will be raised if the + mime-type is not found in the handlers. + """ + self.handlers = handlers + self.fallback_parser = fallback_parser + + def lazy_parse(self, blob: Blob) -> Iterator[Document]: + """Load documents from a blob.""" + mimetype = blob.mimetype + + if mimetype is None: + raise ValueError(f"{blob} does not have a mimetype.") + + if mimetype in self.handlers: + handler = self.handlers[mimetype] + yield from handler.lazy_parse(blob) + else: + if self.fallback_parser is not None: + yield from self.fallback_parser.lazy_parse(blob) + else: + raise ValueError(f"Unsupported mime type: {mimetype}") diff --git a/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py b/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py new file mode 100644 index 0000000000000..0e2b5d394c2c3 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py @@ -0,0 +1,149 @@ +from typing import Any, Iterator, List, Sequence, cast + +from langchain_core.documents import BaseDocumentTransformer, Document + + +class BeautifulSoupTransformer(BaseDocumentTransformer): + """Transform HTML content by extracting specific tags and removing unwanted ones. + + Example: + .. code-block:: python + + from langchain_community.document_transformers import BeautifulSoupTransformer + + bs4_transformer = BeautifulSoupTransformer() + docs_transformed = bs4_transformer.transform_documents(docs) + """ # noqa: E501 + + def __init__(self) -> None: + """ + Initialize the transformer. + + This checks if the BeautifulSoup4 package is installed. + If not, it raises an ImportError. + """ + try: + import bs4 # noqa:F401 + except ImportError: + raise ImportError( + "BeautifulSoup4 is required for BeautifulSoupTransformer. " + "Please install it with `pip install beautifulsoup4`." + ) + + def transform_documents( + self, + documents: Sequence[Document], + unwanted_tags: List[str] = ["script", "style"], + tags_to_extract: List[str] = ["p", "li", "div", "a"], + remove_lines: bool = True, + **kwargs: Any, + ) -> Sequence[Document]: + """ + Transform a list of Document objects by cleaning their HTML content. + + Args: + documents: A sequence of Document objects containing HTML content. + unwanted_tags: A list of tags to be removed from the HTML. + tags_to_extract: A list of tags whose content will be extracted. + remove_lines: If set to True, unnecessary lines will be + removed from the HTML content. + + Returns: + A sequence of Document objects with transformed content. + """ + for doc in documents: + cleaned_content = doc.page_content + + cleaned_content = self.remove_unwanted_tags(cleaned_content, unwanted_tags) + + cleaned_content = self.extract_tags(cleaned_content, tags_to_extract) + + if remove_lines: + cleaned_content = self.remove_unnecessary_lines(cleaned_content) + + doc.page_content = cleaned_content + + return documents + + @staticmethod + def remove_unwanted_tags(html_content: str, unwanted_tags: List[str]) -> str: + """ + Remove unwanted tags from a given HTML content. + + Args: + html_content: The original HTML content string. + unwanted_tags: A list of tags to be removed from the HTML. + + Returns: + A cleaned HTML string with unwanted tags removed. + """ + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html_content, "html.parser") + for tag in unwanted_tags: + for element in soup.find_all(tag): + element.decompose() + return str(soup) + + @staticmethod + def extract_tags(html_content: str, tags: List[str]) -> str: + """ + Extract specific tags from a given HTML content. + + Args: + html_content: The original HTML content string. + tags: A list of tags to be extracted from the HTML. + + Returns: + A string combining the content of the extracted tags. + """ + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html_content, "html.parser") + text_parts: List[str] = [] + for element in soup.find_all(): + if element.name in tags: + # Extract all navigable strings recursively from this element. + text_parts += get_navigable_strings(element) + + # To avoid duplicate text, remove all descendants from the soup. + element.decompose() + + return " ".join(text_parts) + + @staticmethod + def remove_unnecessary_lines(content: str) -> str: + """ + Clean up the content by removing unnecessary lines. + + Args: + content: A string, which may contain unnecessary lines or spaces. + + Returns: + A cleaned string with unnecessary lines removed. + """ + lines = content.split("\n") + stripped_lines = [line.strip() for line in lines] + non_empty_lines = [line for line in stripped_lines if line] + cleaned_content = " ".join(non_empty_lines) + return cleaned_content + + async def atransform_documents( + self, + documents: Sequence[Document], + **kwargs: Any, + ) -> Sequence[Document]: + raise NotImplementedError + + +def get_navigable_strings(element: Any) -> Iterator[str]: + from bs4 import NavigableString, Tag + + for child in cast(Tag, element).children: + if isinstance(child, Tag): + yield from get_navigable_strings(child) + elif isinstance(child, NavigableString): + if (element.name == "a") and (href := element.get("href")): + yield f"{child.strip()} ({href})" + else: + yield child.strip() diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py b/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py index 4ca5b4cfa144c..9f918a57907ed 100644 --- a/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py +++ b/.scripts/community_split/libs/community/langchain_community/embeddings/__init__.py @@ -7,7 +7,7 @@ .. code-block:: - Embeddings --> Embeddings # Examples: BedrockEmbeddings, HuggingFaceEmbeddings + Embeddings --> Embeddings # Examples: CohereEmbeddings, HuggingFaceEmbeddings """ diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/cache.py b/.scripts/community_split/libs/community/langchain_community/embeddings/cache.py new file mode 100644 index 0000000000000..b4f807eccd5d5 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/embeddings/cache.py @@ -0,0 +1,176 @@ +"""Module contains code for a cache backed embedder. + +The cache backed embedder is a wrapper around an embedder that caches +embeddings in a key-value store. The cache is used to avoid recomputing +embeddings for the same text. + +The text is hashed and the hash is used as the key in the cache. +""" +from __future__ import annotations + +import hashlib +import json +import uuid +from functools import partial +from typing import Callable, List, Sequence, Union, cast + +from langchain_core.embeddings import Embeddings +from langchain_core.stores import BaseStore, ByteStore + +from langchain_community.storage.encoder_backed import EncoderBackedStore + +NAMESPACE_UUID = uuid.UUID(int=1985) + + +def _hash_string_to_uuid(input_string: str) -> uuid.UUID: + """Hash a string and returns the corresponding UUID.""" + hash_value = hashlib.sha1(input_string.encode("utf-8")).hexdigest() + return uuid.uuid5(NAMESPACE_UUID, hash_value) + + +def _key_encoder(key: str, namespace: str) -> str: + """Encode a key.""" + return namespace + str(_hash_string_to_uuid(key)) + + +def _create_key_encoder(namespace: str) -> Callable[[str], str]: + """Create an encoder for a key.""" + return partial(_key_encoder, namespace=namespace) + + +def _value_serializer(value: Sequence[float]) -> bytes: + """Serialize a value.""" + return json.dumps(value).encode() + + +def _value_deserializer(serialized_value: bytes) -> List[float]: + """Deserialize a value.""" + return cast(List[float], json.loads(serialized_value.decode())) + + +class CacheBackedEmbeddings(Embeddings): + """Interface for caching results from embedding models. + + The interface allows works with any store that implements + the abstract store interface accepting keys of type str and values of list of + floats. + + If need be, the interface can be extended to accept other implementations + of the value serializer and deserializer, as well as the key encoder. + + Examples: + + .. code-block: python + + from langchain_community.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings + from langchain_community.storage import LocalFileStore + + store = LocalFileStore('./my_cache') + + underlying_embedder = OpenAIEmbeddings() + embedder = CacheBackedEmbeddings.from_bytes_store( + underlying_embedder, store, namespace=underlying_embedder.model + ) + + # Embedding is computed and cached + embeddings = embedder.embed_documents(["hello", "goodbye"]) + + # Embeddings are retrieved from the cache, no computation is done + embeddings = embedder.embed_documents(["hello", "goodbye"]) + """ # noqa: E501 + + def __init__( + self, + underlying_embeddings: Embeddings, + document_embedding_store: BaseStore[str, List[float]], + ) -> None: + """Initialize the embedder. + + Args: + underlying_embeddings: the embedder to use for computing embeddings. + document_embedding_store: The store to use for caching document embeddings. + """ + super().__init__() + self.document_embedding_store = document_embedding_store + self.underlying_embeddings = underlying_embeddings + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed a list of texts. + + The method first checks the cache for the embeddings. + If the embeddings are not found, the method uses the underlying embedder + to embed the documents and stores the results in the cache. + + Args: + texts: A list of texts to embed. + + Returns: + A list of embeddings for the given texts. + """ + vectors: List[Union[List[float], None]] = self.document_embedding_store.mget( + texts + ) + missing_indices: List[int] = [ + i for i, vector in enumerate(vectors) if vector is None + ] + missing_texts = [texts[i] for i in missing_indices] + + if missing_texts: + missing_vectors = self.underlying_embeddings.embed_documents(missing_texts) + self.document_embedding_store.mset( + list(zip(missing_texts, missing_vectors)) + ) + for index, updated_vector in zip(missing_indices, missing_vectors): + vectors[index] = updated_vector + + return cast( + List[List[float]], vectors + ) # Nones should have been resolved by now + + def embed_query(self, text: str) -> List[float]: + """Embed query text. + + This method does not support caching at the moment. + + Support for caching queries is easily to implement, but might make + sense to hold off to see the most common patterns. + + If the cache has an eviction policy, we may need to be a bit more careful + about sharing the cache between documents and queries. Generally, + one is OK evicting query caches, but document caches should be kept. + + Args: + text: The text to embed. + + Returns: + The embedding for the given text. + """ + return self.underlying_embeddings.embed_query(text) + + @classmethod + def from_bytes_store( + cls, + underlying_embeddings: Embeddings, + document_embedding_cache: ByteStore, + *, + namespace: str = "", + ) -> CacheBackedEmbeddings: + """On-ramp that adds the necessary serialization and encoding to the store. + + Args: + underlying_embeddings: The embedder to use for embedding. + document_embedding_cache: The cache to use for storing document embeddings. + *, + namespace: The namespace to use for document cache. + This namespace is used to avoid collisions with other caches. + For example, set it to the name of the embedding model used. + """ + namespace = namespace + key_encoder = _create_key_encoder(namespace) + encoder_backed_store = EncoderBackedStore[str, List[float]]( + document_embedding_cache, + key_encoder, + _value_serializer, + _value_deserializer, + ) + return cls(underlying_embeddings, encoder_backed_store) diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py b/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py new file mode 100644 index 0000000000000..84a568866f178 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/embeddings/huggingface.py @@ -0,0 +1,343 @@ +from typing import Any, Dict, List, Optional + +import requests +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra, Field + +DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" +DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" +DEFAULT_BGE_MODEL = "BAAI/bge-large-en" +DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " +DEFAULT_QUERY_INSTRUCTION = ( + "Represent the question for retrieving supporting documents: " +) +DEFAULT_QUERY_BGE_INSTRUCTION_EN = ( + "Represent this question for searching relevant passages: " +) +DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章:" + + +class HuggingFaceEmbeddings(BaseModel, Embeddings): + """HuggingFace sentence_transformers embedding models. + + To use, you should have the ``sentence_transformers`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import HuggingFaceEmbeddings + + model_name = "sentence-transformers/all-mpnet-base-v2" + model_kwargs = {'device': 'cpu'} + encode_kwargs = {'normalize_embeddings': False} + hf = HuggingFaceEmbeddings( + model_name=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_MODEL_NAME + """Model name to use.""" + cache_folder: Optional[str] = None + """Path to store models. + Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + multi_process: bool = False + """Run encode() on multiple GPUs.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + import sentence_transformers + + except ImportError as exc: + raise ImportError( + "Could not import sentence_transformers python package. " + "Please install it with `pip install sentence-transformers`." + ) from exc + + self.client = sentence_transformers.SentenceTransformer( + self.model_name, cache_folder=self.cache_folder, **self.model_kwargs + ) + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + import sentence_transformers + + texts = list(map(lambda x: x.replace("\n", " "), texts)) + if self.multi_process: + pool = self.client.start_multi_process_pool() + embeddings = self.client.encode_multi_process(texts, pool) + sentence_transformers.SentenceTransformer.stop_multi_process_pool(pool) + else: + embeddings = self.client.encode(texts, **self.encode_kwargs) + + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] + + +class HuggingFaceInstructEmbeddings(BaseModel, Embeddings): + """Wrapper around sentence_transformers embedding models. + + To use, you should have the ``sentence_transformers`` + and ``InstructorEmbedding`` python packages installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import HuggingFaceInstructEmbeddings + + model_name = "hkunlp/instructor-large" + model_kwargs = {'device': 'cpu'} + encode_kwargs = {'normalize_embeddings': True} + hf = HuggingFaceInstructEmbeddings( + model_name=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_INSTRUCT_MODEL + """Model name to use.""" + cache_folder: Optional[str] = None + """Path to store models. + Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + embed_instruction: str = DEFAULT_EMBED_INSTRUCTION + """Instruction to use for embedding documents.""" + query_instruction: str = DEFAULT_QUERY_INSTRUCTION + """Instruction to use for embedding query.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + from InstructorEmbedding import INSTRUCTOR + + self.client = INSTRUCTOR( + self.model_name, cache_folder=self.cache_folder, **self.model_kwargs + ) + except ImportError as e: + raise ImportError("Dependencies for InstructorEmbedding not found.") from e + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace instruct model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [[self.embed_instruction, text] for text in texts] + embeddings = self.client.encode(instruction_pairs, **self.encode_kwargs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace instruct model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = [self.query_instruction, text] + embedding = self.client.encode([instruction_pair], **self.encode_kwargs)[0] + return embedding.tolist() + + +class HuggingFaceBgeEmbeddings(BaseModel, Embeddings): + """HuggingFace BGE sentence_transformers embedding models. + + To use, you should have the ``sentence_transformers`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import HuggingFaceBgeEmbeddings + + model_name = "BAAI/bge-large-en" + model_kwargs = {'device': 'cpu'} + encode_kwargs = {'normalize_embeddings': True} + hf = HuggingFaceBgeEmbeddings( + model_name=model_name, + model_kwargs=model_kwargs, + encode_kwargs=encode_kwargs + ) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_BGE_MODEL + """Model name to use.""" + cache_folder: Optional[str] = None + """Path to store models. + Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN + """Instruction to use for embedding query.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + import sentence_transformers + + except ImportError as exc: + raise ImportError( + "Could not import sentence_transformers python package. " + "Please install it with `pip install sentence_transformers`." + ) from exc + + self.client = sentence_transformers.SentenceTransformer( + self.model_name, cache_folder=self.cache_folder, **self.model_kwargs + ) + if "-zh" in self.model_name: + self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + texts = [t.replace("\n", " ") for t in texts] + embeddings = self.client.encode(texts, **self.encode_kwargs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embedding = self.client.encode( + self.query_instruction + text, **self.encode_kwargs + ) + return embedding.tolist() + + +class HuggingFaceInferenceAPIEmbeddings(BaseModel, Embeddings): + """Embed texts using the HuggingFace API. + + Requires a HuggingFace Inference API key and a model name. + """ + + api_key: str + """Your API key for the HuggingFace Inference API.""" + model_name: str = "sentence-transformers/all-MiniLM-L6-v2" + """The name of the model to use for text embeddings.""" + api_url: Optional[str] = None + """Custom inference endpoint url. None for using default public url.""" + + @property + def _api_url(self) -> str: + return self.api_url or self._default_api_url + + @property + def _default_api_url(self) -> str: + return ( + "https://api-inference.huggingface.co" + "/pipeline" + "/feature-extraction" + f"/{self.model_name}" + ) + + @property + def _headers(self) -> dict: + return {"Authorization": f"Bearer {self.api_key}"} + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Get the embeddings for a list of texts. + + Args: + texts (Documents): A list of texts to get embeddings for. + + Returns: + Embedded texts as List[List[float]], where each inner List[float] + corresponds to a single input text. + + Example: + .. code-block:: python + + from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings + + hf_embeddings = HuggingFaceInferenceAPIEmbeddings( + api_key="your_api_key", + model_name="sentence-transformers/all-MiniLM-l6-v2" + ) + texts = ["Hello, world!", "How are you?"] + hf_embeddings.embed_documents(texts) + """ # noqa: E501 + response = requests.post( + self._api_url, + headers=self._headers, + json={ + "inputs": texts, + "options": {"wait_for_model": True, "use_cache": True}, + }, + ) + return response.json() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py b/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py new file mode 100644 index 0000000000000..f183efe87b533 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/embeddings/johnsnowlabs.py @@ -0,0 +1,92 @@ +import os +import sys +from typing import Any, List + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra + + +class JohnSnowLabsEmbeddings(BaseModel, Embeddings): + """JohnSnowLabs embedding models + + To use, you should have the ``johnsnowlabs`` python package installed. + Example: + .. code-block:: python + + from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings + + embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') + output = embedding.embed_query("foo bar") + """ # noqa: E501 + + model: Any = "embed_sentence.bert" + + def __init__( + self, + model: Any = "embed_sentence.bert", + hardware_target: str = "cpu", + **kwargs: Any, + ): + """Initialize the johnsnowlabs model.""" + super().__init__(**kwargs) + # 1) Check imports + try: + from johnsnowlabs import nlp + from nlu.pipe.pipeline import NLUPipeline + except ImportError as exc: + raise ImportError( + "Could not import johnsnowlabs python package. " + "Please install it with `pip install johnsnowlabs`." + ) from exc + + # 2) Start a Spark Session + try: + os.environ["PYSPARK_PYTHON"] = sys.executable + os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable + nlp.start(hardware_target=hardware_target) + except Exception as exc: + raise Exception("Failure starting Spark Session") from exc + + # 3) Load the model + try: + if isinstance(model, str): + self.model = nlp.load(model) + elif isinstance(model, NLUPipeline): + self.model = model + else: + self.model = nlp.to_nlu_pipe(model) + except Exception as exc: + raise Exception("Failure loading model") from exc + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a JohnSnowLabs transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + + df = self.model.predict(texts, output_level="document") + emb_col = None + for c in df.columns: + if "embedding" in c: + emb_col = c + return [vec.tolist() for vec in df[emb_col].tolist()] + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a JohnSnowLabs transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0] diff --git a/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py b/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py new file mode 100644 index 0000000000000..0b706532cf230 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py @@ -0,0 +1,168 @@ +import importlib +import logging +from typing import Any, Callable, List, Optional + +from langchain_community.embeddings.self_hosted import SelfHostedEmbeddings + +DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" +DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large" +DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: " +DEFAULT_QUERY_INSTRUCTION = ( + "Represent the question for retrieving supporting documents: " +) + +logger = logging.getLogger(__name__) + + +def _embed_documents(client: Any, *args: Any, **kwargs: Any) -> List[List[float]]: + """Inference function to send to the remote hardware. + + Accepts a sentence_transformer model_id and + returns a list of embeddings for each document in the batch. + """ + return client.encode(*args, **kwargs) + + +def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any: + """Load the embedding model.""" + if not instruct: + import sentence_transformers + + client = sentence_transformers.SentenceTransformer(model_id) + else: + from InstructorEmbedding import INSTRUCTOR + + client = INSTRUCTOR(model_id) + + if importlib.util.find_spec("torch") is not None: + import torch + + cuda_device_count = torch.cuda.device_count() + if device < -1 or (device >= cuda_device_count): + raise ValueError( + f"Got device=={device}, " + f"device is required to be within [-1, {cuda_device_count})" + ) + if device < 0 and cuda_device_count > 0: + logger.warning( + "Device has %d GPUs available. " + "Provide device={deviceId} to `from_model_id` to use available" + "GPUs for execution. deviceId is -1 for CPU and " + "can be a positive integer associated with CUDA device id.", + cuda_device_count, + ) + + client = client.to(device) + return client + + +class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings): + """HuggingFace embedding models on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another cloud + like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import SelfHostedHuggingFaceEmbeddings + import runhouse as rh + model_name = "sentence-transformers/all-mpnet-base-v2" + gpu = rh.cluster(name="rh-a10x", instance_type="A100:1") + hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu) + """ + + client: Any #: :meta private: + model_id: str = DEFAULT_MODEL_NAME + """Model name to use.""" + model_reqs: List[str] = ["./", "sentence_transformers", "torch"] + """Requirements to install on hardware to inference the model.""" + hardware: Any + """Remote hardware to send the inference function to.""" + model_load_fn: Callable = load_embedding_model + """Function to load the model remotely on the server.""" + load_fn_kwargs: Optional[dict] = None + """Keyword arguments to pass to the model load function.""" + inference_fn: Callable = _embed_documents + """Inference function to extract the embeddings.""" + + def __init__(self, **kwargs: Any): + """Initialize the remote inference function.""" + load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) + load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME) + load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False) + load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) + super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) + + +class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings): + """HuggingFace InstructEmbedding models on self-hosted remote hardware. + + Supported hardware includes auto-launched instances on AWS, GCP, Azure, + and Lambda, as well as servers specified + by IP address and SSH credentials (such as on-prem, or another + cloud like Paperspace, Coreweave, etc.). + + To use, you should have the ``runhouse`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.embeddings import SelfHostedHuggingFaceInstructEmbeddings + import runhouse as rh + model_name = "hkunlp/instructor-large" + gpu = rh.cluster(name='rh-a10x', instance_type='A100:1') + hf = SelfHostedHuggingFaceInstructEmbeddings( + model_name=model_name, hardware=gpu) + """ # noqa: E501 + + model_id: str = DEFAULT_INSTRUCT_MODEL + """Model name to use.""" + embed_instruction: str = DEFAULT_EMBED_INSTRUCTION + """Instruction to use for embedding documents.""" + query_instruction: str = DEFAULT_QUERY_INSTRUCTION + """Instruction to use for embedding query.""" + model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"] + """Requirements to install on hardware to inference the model.""" + + def __init__(self, **kwargs: Any): + """Initialize the remote inference function.""" + load_fn_kwargs = kwargs.pop("load_fn_kwargs", {}) + load_fn_kwargs["model_id"] = load_fn_kwargs.get( + "model_id", DEFAULT_INSTRUCT_MODEL + ) + load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True) + load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0) + super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace instruct model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [] + for text in texts: + instruction_pairs.append([self.embed_instruction, text]) + embeddings = self.client(self.pipeline_ref, instruction_pairs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace instruct model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = [self.query_instruction, text] + embedding = self.client(self.pipeline_ref, [instruction_pair])[0] + return embedding.tolist() diff --git a/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py b/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py new file mode 100644 index 0000000000000..be832cf1368c2 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/llms/anthropic.py @@ -0,0 +1,351 @@ +import re +import warnings +from typing import ( + Any, + AsyncIterator, + Callable, + Dict, + Iterator, + List, + Mapping, + Optional, +) + +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models import BaseLanguageModel +from langchain_core.language_models.llms import LLM +from langchain_core.outputs import GenerationChunk +from langchain_core.prompt_values import PromptValue +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import ( + check_package_version, + get_from_dict_or_env, + get_pydantic_field_names, +) +from langchain_core.utils.utils import build_extra_kwargs, convert_to_secret_str + + +class _AnthropicCommon(BaseLanguageModel): + client: Any = None #: :meta private: + async_client: Any = None #: :meta private: + model: str = Field(default="claude-2", alias="model_name") + """Model name to use.""" + + max_tokens_to_sample: int = Field(default=256, alias="max_tokens") + """Denotes the number of tokens to predict per generation.""" + + temperature: Optional[float] = None + """A non-negative float that tunes the degree of randomness in generation.""" + + top_k: Optional[int] = None + """Number of most likely tokens to consider at each step.""" + + top_p: Optional[float] = None + """Total probability mass of tokens to consider at each step.""" + + streaming: bool = False + """Whether to stream the results.""" + + default_request_timeout: Optional[float] = None + """Timeout for requests to Anthropic Completion API. Default is 600 seconds.""" + + anthropic_api_url: Optional[str] = None + + anthropic_api_key: Optional[SecretStr] = None + + HUMAN_PROMPT: Optional[str] = None + AI_PROMPT: Optional[str] = None + count_tokens: Optional[Callable[[str], int]] = None + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + + @root_validator(pre=True) + def build_extra(cls, values: Dict) -> Dict: + extra = values.get("model_kwargs", {}) + all_required_field_names = get_pydantic_field_names(cls) + values["model_kwargs"] = build_extra_kwargs( + extra, values, all_required_field_names + ) + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + values["anthropic_api_key"] = convert_to_secret_str( + get_from_dict_or_env(values, "anthropic_api_key", "ANTHROPIC_API_KEY") + ) + # Get custom api url from environment. + values["anthropic_api_url"] = get_from_dict_or_env( + values, + "anthropic_api_url", + "ANTHROPIC_API_URL", + default="https://api.anthropic.com", + ) + + try: + import anthropic + + check_package_version("anthropic", gte_version="0.3") + values["client"] = anthropic.Anthropic( + base_url=values["anthropic_api_url"], + api_key=values["anthropic_api_key"].get_secret_value(), + timeout=values["default_request_timeout"], + ) + values["async_client"] = anthropic.AsyncAnthropic( + base_url=values["anthropic_api_url"], + api_key=values["anthropic_api_key"].get_secret_value(), + timeout=values["default_request_timeout"], + ) + values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT + values["AI_PROMPT"] = anthropic.AI_PROMPT + values["count_tokens"] = values["client"].count_tokens + + except ImportError: + raise ImportError( + "Could not import anthropic python package. " + "Please it install it with `pip install anthropic`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Anthropic API.""" + d = { + "max_tokens_to_sample": self.max_tokens_to_sample, + "model": self.model, + } + if self.temperature is not None: + d["temperature"] = self.temperature + if self.top_k is not None: + d["top_k"] = self.top_k + if self.top_p is not None: + d["top_p"] = self.top_p + return {**d, **self.model_kwargs} + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {**{}, **self._default_params} + + def _get_anthropic_stop(self, stop: Optional[List[str]] = None) -> List[str]: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if stop is None: + stop = [] + + # Never want model to invent new turns of Human / Assistant dialog. + stop.extend([self.HUMAN_PROMPT]) + + return stop + + +class Anthropic(LLM, _AnthropicCommon): + """Anthropic large language models. + + To use, you should have the ``anthropic`` python package installed, and the + environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + import anthropic + from langchain_community.llms import Anthropic + + model = Anthropic(model="", anthropic_api_key="my-api-key") + + # Simplest invocation, automatically wrapped with HUMAN_PROMPT + # and AI_PROMPT. + response = model("What are the biggest risks facing humanity?") + + # Or if you want to use the chat mode, build a few-shot-prompt, or + # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT: + raw_prompt = "What are the biggest risks facing humanity?" + prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" + response = model(prompt) + """ + + class Config: + """Configuration for this pydantic object.""" + + allow_population_by_field_name = True + arbitrary_types_allowed = True + + @root_validator() + def raise_warning(cls, values: Dict) -> Dict: + """Raise warning that this class is deprecated.""" + warnings.warn( + "This Anthropic LLM is deprecated. " + "Please use `from langchain_community.chat_models import ChatAnthropic` " + "instead" + ) + return values + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "anthropic-llm" + + def _wrap_prompt(self, prompt: str) -> str: + if not self.HUMAN_PROMPT or not self.AI_PROMPT: + raise NameError("Please ensure the anthropic package is loaded") + + if prompt.startswith(self.HUMAN_PROMPT): + return prompt # Already wrapped. + + # Guard against common errors in specifying wrong number of newlines. + corrected_prompt, n_subs = re.subn(r"^\n*Human:", self.HUMAN_PROMPT, prompt) + if n_subs == 1: + return corrected_prompt + + # As a last resort, wrap the prompt ourselves to emulate instruct-style. + return f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here you go:\n" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + r"""Call out to Anthropic's completion endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + prompt = "What are the biggest risks facing humanity?" + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + response = model(prompt) + + """ + if self.streaming: + completion = "" + for chunk in self._stream( + prompt=prompt, stop=stop, run_manager=run_manager, **kwargs + ): + completion += chunk.text + return completion + + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + response = self.client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **params, + ) + return response.completion + + def convert_prompt(self, prompt: PromptValue) -> str: + return self._wrap_prompt(prompt.to_string()) + + async def _acall( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to Anthropic's completion endpoint asynchronously.""" + if self.streaming: + completion = "" + async for chunk in self._astream( + prompt=prompt, stop=stop, run_manager=run_manager, **kwargs + ): + completion += chunk.text + return completion + + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + response = await self.async_client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + **params, + ) + return response.completion + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + r"""Call Anthropic completion_stream and return the resulting generator. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + Returns: + A generator representing the stream of tokens from Anthropic. + Example: + .. code-block:: python + + prompt = "Write a poem about a stream." + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + generator = anthropic.stream(prompt) + for token in generator: + yield token + """ + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + for token in self.client.completions.create( + prompt=self._wrap_prompt(prompt), stop_sequences=stop, stream=True, **params + ): + chunk = GenerationChunk(text=token.completion) + yield chunk + if run_manager: + run_manager.on_llm_new_token(chunk.text, chunk=chunk) + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + r"""Call Anthropic completion_stream and return the resulting generator. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + Returns: + A generator representing the stream of tokens from Anthropic. + Example: + .. code-block:: python + prompt = "Write a poem about a stream." + prompt = f"\n\nHuman: {prompt}\n\nAssistant:" + generator = anthropic.stream(prompt) + for token in generator: + yield token + """ + stop = self._get_anthropic_stop(stop) + params = {**self._default_params, **kwargs} + + async for token in await self.async_client.completions.create( + prompt=self._wrap_prompt(prompt), + stop_sequences=stop, + stream=True, + **params, + ): + chunk = GenerationChunk(text=token.completion) + yield chunk + if run_manager: + await run_manager.on_llm_new_token(chunk.text, chunk=chunk) + + def get_num_tokens(self, text: str) -> int: + """Calculate number of tokens.""" + if not self.count_tokens: + raise NameError("Please ensure the anthropic package is loaded") + return self.count_tokens(text) diff --git a/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py b/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py new file mode 100644 index 0000000000000..840acdbdb8114 --- /dev/null +++ b/.scripts/community_split/libs/community/langchain_community/llms/cloudflare_workersai.py @@ -0,0 +1,126 @@ +import json +import logging +from typing import Any, Dict, Iterator, List, Optional + +import requests +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.llms import LLM +from langchain_core.outputs import GenerationChunk + +logger = logging.getLogger(__name__) + + +class CloudflareWorkersAI(LLM): + """Langchain LLM class to help to access Cloudflare Workers AI service. + + To use, you must provide an API token and + account ID to access Cloudflare Workers AI, and + pass it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain_community.llms.cloudflare_workersai import CloudflareWorkersAI + + my_account_id = "my_account_id" + my_api_token = "my_secret_api_token" + llm_model = "@cf/meta/llama-2-7b-chat-int8" + + cf_ai = CloudflareWorkersAI( + account_id=my_account_id, + api_token=my_api_token, + model=llm_model + ) + """ # noqa: E501 + + account_id: str + api_token: str + model: str = "@cf/meta/llama-2-7b-chat-int8" + base_url: str = "https://api.cloudflare.com/client/v4/accounts" + streaming: bool = False + endpoint_url: str = "" + + def __init__(self, **kwargs: Any) -> None: + """Initialize the Cloudflare Workers AI class.""" + super().__init__(**kwargs) + + self.endpoint_url = f"{self.base_url}/{self.account_id}/ai/run/{self.model}" + + @property + def _llm_type(self) -> str: + """Return type of LLM.""" + return "cloudflare" + + @property + def _default_params(self) -> Dict[str, Any]: + """Default parameters""" + return {} + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Identifying parameters""" + return { + "account_id": self.account_id, + "api_token": self.api_token, + "model": self.model, + "base_url": self.base_url, + } + + def _call_api(self, prompt: str, params: Dict[str, Any]) -> requests.Response: + """Call Cloudflare Workers API""" + headers = {"Authorization": f"Bearer {self.api_token}"} + data = {"prompt": prompt, "stream": self.streaming, **params} + response = requests.post(self.endpoint_url, headers=headers, json=data) + return response + + def _process_response(self, response: requests.Response) -> str: + """Process API response""" + if response.ok: + data = response.json() + return data["result"]["response"] + else: + raise ValueError(f"Request failed with status {response.status_code}") + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + """Streaming prediction""" + original_steaming: bool = self.streaming + self.streaming = True + _response_prefix_count = len("data: ") + _response_stream_end = b"data: [DONE]" + for chunk in self._call_api(prompt, kwargs).iter_lines(): + if chunk == _response_stream_end: + break + if len(chunk) > _response_prefix_count: + try: + data = json.loads(chunk[_response_prefix_count:]) + except Exception as e: + logger.debug(chunk) + raise e + if data is not None and "response" in data: + yield GenerationChunk(text=data["response"]) + if run_manager: + run_manager.on_llm_new_token(data["response"]) + logger.debug("stream end") + self.streaming = original_steaming + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Regular prediction""" + if self.streaming: + return "".join( + [c.text for c in self._stream(prompt, stop, run_manager, **kwargs)] + ) + else: + response = self._call_api(prompt, kwargs) + return self._process_response(response) diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py index 1228619b2f283..ca896f3ff9870 100644 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py @@ -3,13 +3,12 @@ import os from aiohttp import ClientSession -from langchain_core.callbacks import atrace_as_chain_group, trace_as_chain_group -from langchain_core.tracers.context import tracing_v2_enabled +from langchain_core.callbacks.manager import atrace_as_chain_group, trace_as_chain_group +from langchain_core.tracers.context import tracing_v2_enabled, tracing_enabled from langchain_core.prompts import PromptTemplate -from langchain_community.callbacks import tracing_enabled -from langchain_community.chat_models import ChatOpenAI -from langchain_community.llms import OpenAI +from langchain_openai.chat_models import ChatOpenAI +from langchain_openai.llms import OpenAI questions = [ ( diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py index f0908bbf8d052..a63592fd7874a 100644 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_openai_callback.py @@ -3,7 +3,7 @@ from langchain_community.callbacks import get_openai_callback -from langchain_community.llms import OpenAI +from langchain_openai.llms import OpenAI async def test_openai_callback() -> None: diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py index 1ffe61dbdcf07..11f140466e9df 100644 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py @@ -8,7 +8,7 @@ from langchain_community.callbacks.streamlit.streamlit_callback_handler import ( StreamlitCallbackHandler, ) -from langchain_community.llms import OpenAI +from langchain_openai.llms import OpenAI @pytest.mark.requires("streamlit") diff --git a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py index 02f022c62ad7f..e5d6afd8de6c7 100644 --- a/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py +++ b/.scripts/community_split/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py @@ -5,7 +5,7 @@ from aiohttp import ClientSession from langchain_community.callbacks import wandb_tracing_enabled -from langchain_community.llms import OpenAI +from langchain_openai.llms import OpenAI questions = [ ( diff --git a/.scripts/community_split/script_integrations.sh b/.scripts/community_split/script_integrations.sh index 5455aa2248ef4..4905ec0733790 100755 --- a/.scripts/community_split/script_integrations.sh +++ b/.scripts/community_split/script_integrations.sh @@ -123,14 +123,9 @@ mv langchain/tests/integration_tests/utilities community/tests/integration_tests mv langchain/tests/integration_tests/vectorstores community/tests/integration_tests mv langchain/tests/integration_tests/adapters community/tests/integration_tests mv langchain/tests/integration_tests/callbacks community/tests/integration_tests -mv langchain/tests/integration_tests/cache community/tests/integration_tests mv langchain/tests/integration_tests/{test_kuzu,test_nebulagraph}.py community/tests/integration_tests/graphs touch community/tests/integration_tests/{chat_message_histories,tools}/__init__.py -mkdir -p langchain/tests/integration_tests/cache -mv community/tests/integration_tests/cache/test_upstash_redis_cache.py langchain/tests/integration_tests/cache/ -touch langchain/tests/integration_tests/cache/__init__.py - git grep -l 'from langchain.utils.json_schema' | xargs sed -i '' 's/from langchain.utils.json_schema/from langchain_core.utils.json_schema/g' git grep -l 'from langchain.utils.html' | xargs sed -i '' 's/from langchain.utils.html/from langchain_core.utils.html/g' @@ -183,6 +178,7 @@ git grep -l 'langchain\.tools' | xargs sed -i '' 's/langchain\.tools/langchain_c git grep -l 'langchain\.llms' | xargs sed -i '' 's/langchain\.llms/langchain_community.llms/g' git grep -l 'import langchain$' | xargs sed -i '' 's/import\ langchain$/import\ langchain_community/g' git grep -l 'from\ langchain\ ' | xargs sed -i '' 's/from\ langchain\ /from\ langchain_community\ /g' +git grep -l 'langchain_core.language_models.llmsten' | xargs sed -i '' 's/langchain_core.language_models.llmsten/langchain_community.llms.baseten/g' cd .. diff --git a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb index 436e2fd14246e..0858988f0dcda 100644 --- a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb +++ b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb @@ -34,13 +34,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "tags": [] }, "outputs": [], "source": [ - "#!pip install langchain google-cloud-aiplatform" + "!pip install -U google-cloud-aiplatform" ] }, { @@ -57,41 +57,27 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [ - "chat = ChatVertexAI()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "system = \"You are a helpful assistant who translate English to French\"\n", - "human = \"Translate this sentence from English to French. I love programming.\"\n", - "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n", - "messages = prompt.format_messages()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)" + "AIMessage(content=\" J'aime la programmation.\")" ] }, - "execution_count": 9, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "chat(messages)" + "system = \"You are a helpful assistant who translate English to French\"\n", + "human = \"Translate this sentence from English to French. I love programming.\"\n", + "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n", + "\n", + "chat = ChatVertexAI()\n", + "\n", + "chain = prompt | chat\n", + "chain.invoke({})" ] }, { @@ -103,35 +89,29 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "system = (\n", - " \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n", - ")\n", - "human = \"{text}\"\n", - "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content=' 私はプログラミングが大好きです。', additional_kwargs={}, example=False)" + "AIMessage(content=' プログラミングが大好きです')" ] }, - "execution_count": 13, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "system = (\n", + " \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n", + ")\n", + "human = \"{text}\"\n", + "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n", + "\n", "chain = prompt | chat\n", + "\n", "chain.invoke(\n", " {\n", " \"input_language\": \"English\",\n", @@ -162,20 +142,7 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "chat = ChatVertexAI(\n", - " model_name=\"codechat-bison\", max_output_tokens=1000, temperature=0.5\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": { "tags": [] }, @@ -185,20 +152,39 @@ "output_type": "stream", "text": [ " ```python\n", - "def is_prime(x): \n", - " if (x <= 1): \n", + "def is_prime(n):\n", + " if n <= 1:\n", " return False\n", - " for i in range(2, x): \n", - " if (x % i == 0): \n", + " for i in range(2, n):\n", + " if n % i == 0:\n", " return False\n", " return True\n", + "\n", + "def find_prime_numbers(n):\n", + " prime_numbers = []\n", + " for i in range(2, n + 1):\n", + " if is_prime(i):\n", + " prime_numbers.append(i)\n", + " return prime_numbers\n", + "\n", + "print(find_prime_numbers(100))\n", + "```\n", + "\n", + "Output:\n", + "\n", + "```\n", + "[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n", "```\n" ] } ], "source": [ - "# For simple string in string out usage, we can use the `predict` method:\n", - "print(chat.predict(\"Write a Python function to identify all prime numbers\"))" + "chat = ChatVertexAI(\n", + " model_name=\"codechat-bison\", max_output_tokens=1000, temperature=0.5\n", + ")\n", + "\n", + "message = chat.invoke(\"Write a Python function to identify all prime numbers\")\n", + "print(message.content)" ] }, { @@ -207,66 +193,42 @@ "source": [ "## Asynchronous calls\n", "\n", - "We can make asynchronous calls via the `agenerate` and `ainvoke` methods." + "We can make asynchronous calls via the Runnables [Async Interface](/docs/expression_language/interface)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ + "# for running these examples in the notebook:\n", "import asyncio\n", "\n", - "# import nest_asyncio\n", - "# nest_asyncio.apply()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "LLMResult(generations=[[ChatGeneration(text=\" J'aime la programmation.\", generation_info=None, message=AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('223599ef-38f8-4c79-ac6d-a5013060eb9d'))])" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chat = ChatVertexAI(\n", - " model_name=\"chat-bison\",\n", - " max_output_tokens=1000,\n", - " temperature=0.7,\n", - " top_p=0.95,\n", - " top_k=40,\n", - ")\n", + "import nest_asyncio\n", "\n", - "asyncio.run(chat.agenerate([messages]))" + "nest_asyncio.apply()" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "AIMessage(content=' अहं प्रोग्रामिंग प्रेमामि', additional_kwargs={}, example=False)" + "AIMessage(content=' Why do you love programming?')" ] }, - "execution_count": 36, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "chain = prompt | chat\n", + "\n", "asyncio.run(\n", " chain.ainvoke(\n", " {\n", @@ -289,56 +251,51 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys" - ] - }, - { - "cell_type": "code", - "execution_count": 32, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " 1. China (1,444,216,107)\n", - "2. India (1,393,409,038)\n", - "3. United States (332,403,650)\n", - "4. Indonesia (273,523,615)\n", - "5. Pakistan (220,892,340)\n", - "6. Brazil (212,559,409)\n", - "7. Nigeria (206,139,589)\n", - "8. Bangladesh (164,689,383)\n", - "9. Russia (145,934,462)\n", - "10. Mexico (128,932,488)\n", - "11. Japan (126,476,461)\n", - "12. Ethiopia (115,063,982)\n", - "13. Philippines (109,581,078)\n", - "14. Egypt (102,334,404)\n", - "15. Vietnam (97,338,589)" + " The five most populous countries in the world are:\n", + "1. China (1.4 billion)\n", + "2. India (1.3 billion)\n", + "3. United States (331 million)\n", + "4. Indonesia (273 million)\n", + "5. Pakistan (220 million)" ] } ], "source": [ + "import sys\n", + "\n", "prompt = ChatPromptTemplate.from_messages(\n", - " [(\"human\", \"List out the 15 most populous countries in the world\")]\n", + " [(\"human\", \"List out the 5 most populous countries in the world\")]\n", ")\n", - "messages = prompt.format_messages()\n", - "for chunk in chat.stream(messages):\n", + "\n", + "chat = ChatVertexAI()\n", + "\n", + "chain = prompt | chat\n", + "\n", + "for chunk in chain.stream({}):\n", " sys.stdout.write(chunk.content)\n", " sys.stdout.flush()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "poetry-venv", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "poetry-venv" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -350,7 +307,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.4" }, "vscode": { "interpreter": { diff --git a/libs/community/langchain_community/agent_toolkits/__init__.py b/libs/community/langchain_community/agent_toolkits/__init__.py index 9ab2aef75cf12..655d053746618 100644 --- a/libs/community/langchain_community/agent_toolkits/__init__.py +++ b/libs/community/langchain_community/agent_toolkits/__init__.py @@ -23,7 +23,7 @@ from langchain_community.agent_toolkits.azure_cognitive_services import ( AzureCognitiveServicesToolkit, ) -from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( +from langchain_community.agent_toolkits.conversational_retrieval.openai_functions import ( # noqa: E501 create_conversational_retrieval_agent, ) from langchain_community.agent_toolkits.file_management.toolkit import ( diff --git a/libs/community/langchain_community/chat_models/vertexai.py b/libs/community/langchain_community/chat_models/vertexai.py index e3afed8c1154d..f583ec0d1ffa3 100644 --- a/libs/community/langchain_community/chat_models/vertexai.py +++ b/libs/community/langchain_community/chat_models/vertexai.py @@ -245,7 +245,7 @@ def _stream( ) -> Iterator[ChatGenerationChunk]: question = _get_question(messages) history = _parse_chat_history(messages[:-1]) - params = self._prepare_params(stop=stop, **kwargs) + params = self._prepare_params(stop=stop, stream=True, **kwargs) examples = kwargs.get("examples", None) if examples: params["examples"] = _parse_examples(examples) diff --git a/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py b/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py index 8fe848983378e..0fcdd4438ee46 100644 --- a/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py +++ b/libs/community/langchain_community/document_loaders/blob_loaders/file_system.py @@ -47,7 +47,7 @@ class FileSystemBlobLoader(BlobLoader): loader = FileSystemBlobLoader("/path/to/directory") for blob in loader.yield_blobs(): print(blob) - """ + """ # noqa: E501 def __init__( self, @@ -96,7 +96,7 @@ def __init__( glob="**/*.txt", exclude=["**/*.py", "**/*.pyc"] ) - """ + """ # noqa: E501 if isinstance(path, Path): _path = path elif isinstance(path, str): diff --git a/libs/community/langchain_community/document_loaders/generic.py b/libs/community/langchain_community/document_loaders/generic.py index e6ee4eb04533d..0ec6ca60bdf09 100644 --- a/libs/community/langchain_community/document_loaders/generic.py +++ b/libs/community/langchain_community/document_loaders/generic.py @@ -92,7 +92,7 @@ class GenericLoader(BaseLoader): parser=PyPDFParser() ) - """ + """ # noqa: E501 def __init__( self, diff --git a/libs/community/langchain_community/document_loaders/parsers/generic.py b/libs/community/langchain_community/document_loaders/parsers/generic.py index 8e8ef2f7ea45d..6b6b91b93ee08 100644 --- a/libs/community/langchain_community/document_loaders/parsers/generic.py +++ b/libs/community/langchain_community/document_loaders/parsers/generic.py @@ -30,7 +30,7 @@ class MimeTypeBasedParser(BaseBlobParser): }, fallback_parser=..., ) - """ + """ # noqa: E501 def __init__( self, diff --git a/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py b/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py index 3b329873bfdc9..0e2b5d394c2c3 100644 --- a/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py +++ b/libs/community/langchain_community/document_transformers/beautiful_soup_transformer.py @@ -8,10 +8,12 @@ class BeautifulSoupTransformer(BaseDocumentTransformer): Example: .. code-block:: python + from langchain_community.document_transformers import BeautifulSoupTransformer + bs4_transformer = BeautifulSoupTransformer() docs_transformed = bs4_transformer.transform_documents(docs) - """ + """ # noqa: E501 def __init__(self) -> None: """ diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 5e99e5df0d03c..832b9a8743bcc 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -7,7 +7,7 @@ .. code-block:: - Embeddings --> Embeddings # Examples: BedrockEmbeddings, HuggingFaceEmbeddings + Embeddings --> Embeddings # Examples: CohereEmbeddings, HuggingFaceEmbeddings """ diff --git a/libs/community/langchain_community/embeddings/cache.py b/libs/community/langchain_community/embeddings/cache.py index c400c5eebecc2..b4f807eccd5d5 100644 --- a/libs/community/langchain_community/embeddings/cache.py +++ b/libs/community/langchain_community/embeddings/cache.py @@ -77,7 +77,7 @@ class CacheBackedEmbeddings(Embeddings): # Embeddings are retrieved from the cache, no computation is done embeddings = embedder.embed_documents(["hello", "goodbye"]) - """ + """ # noqa: E501 def __init__( self, diff --git a/libs/community/langchain_community/embeddings/huggingface.py b/libs/community/langchain_community/embeddings/huggingface.py index 6ca23b6bed5db..84a568866f178 100644 --- a/libs/community/langchain_community/embeddings/huggingface.py +++ b/libs/community/langchain_community/embeddings/huggingface.py @@ -320,7 +320,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: ) texts = ["Hello, world!", "How are you?"] hf_embeddings.embed_documents(texts) - """ + """ # noqa: E501 response = requests.post( self._api_url, headers=self._headers, diff --git a/libs/community/langchain_community/embeddings/johnsnowlabs.py b/libs/community/langchain_community/embeddings/johnsnowlabs.py index fb713ba028840..f183efe87b533 100644 --- a/libs/community/langchain_community/embeddings/johnsnowlabs.py +++ b/libs/community/langchain_community/embeddings/johnsnowlabs.py @@ -17,7 +17,7 @@ class JohnSnowLabsEmbeddings(BaseModel, Embeddings): embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') output = embedding.embed_query("foo bar") - """ + """ # noqa: E501 model: Any = "embed_sentence.bert" diff --git a/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py b/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py index 163b456234a2f..0b706532cf230 100644 --- a/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py +++ b/libs/community/langchain_community/embeddings/self_hosted_hugging_face.py @@ -118,7 +118,7 @@ class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings): gpu = rh.cluster(name='rh-a10x', instance_type='A100:1') hf = SelfHostedHuggingFaceInstructEmbeddings( model_name=model_name, hardware=gpu) - """ + """ # noqa: E501 model_id: str = DEFAULT_INSTRUCT_MODEL """Model name to use.""" diff --git a/libs/community/langchain_community/llms/anthropic.py b/libs/community/langchain_community/llms/anthropic.py index 0c8f48c2a3630..be832cf1368c2 100644 --- a/libs/community/langchain_community/llms/anthropic.py +++ b/libs/community/langchain_community/llms/anthropic.py @@ -179,7 +179,8 @@ def raise_warning(cls, values: Dict) -> Dict: """Raise warning that this class is deprecated.""" warnings.warn( "This Anthropic LLM is deprecated. " - "Please use `from langchain_community.chat_models import ChatAnthropic` instead" + "Please use `from langchain_community.chat_models import ChatAnthropic` " + "instead" ) return values diff --git a/libs/community/langchain_community/llms/cloudflare_workersai.py b/libs/community/langchain_community/llms/cloudflare_workersai.py index 2f7187fd112f0..840acdbdb8114 100644 --- a/libs/community/langchain_community/llms/cloudflare_workersai.py +++ b/libs/community/langchain_community/llms/cloudflare_workersai.py @@ -31,7 +31,7 @@ class CloudflareWorkersAI(LLM): api_token=my_api_token, model=llm_model ) - """ + """ # noqa: E501 account_id: str api_token: str diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index 9df979de6dd2b..843498b256130 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1149,7 +1149,7 @@ toml = ["tomli"] name = "cryptography" version = "41.0.7" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c78451b78313fa81607fa1b3f1ae0a5ddd8014c38a02d9db0616133987b9cdf"}, @@ -7824,6 +7824,20 @@ files = [ {file = "types_protobuf-4.24.0.4-py3-none-any.whl", hash = "sha256:131ab7d0cbc9e444bc89c994141327dcce7bcaeded72b1acb72a94827eb9c7af"}, ] +[[package]] +name = "types-pyopenssl" +version = "23.3.0.0" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.7" +files = [ + {file = "types-pyOpenSSL-23.3.0.0.tar.gz", hash = "sha256:5ffb077fe70b699c88d5caab999ae80e192fe28bf6cda7989b7e79b1e4e2dcd3"}, + {file = "types_pyOpenSSL-23.3.0.0-py3-none-any.whl", hash = "sha256:00171433653265843b7469ddb9f3c86d698668064cc33ef10537822156130ebf"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" + [[package]] name = "types-python-dateutil" version = "2.8.19.14" @@ -7857,6 +7871,21 @@ files = [ {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"}, ] +[[package]] +name = "types-redis" +version = "4.6.0.11" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.7" +files = [ + {file = "types-redis-4.6.0.11.tar.gz", hash = "sha256:c8cfc84635183deca2db4a528966c5566445fd3713983f0034fb0f5a09e0890d"}, + {file = "types_redis-4.6.0.11-py3-none-any.whl", hash = "sha256:94fc61118601fb4f79206b33b9f4344acff7ca1d7bba67834987fb0efcf6a770"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + [[package]] name = "types-requests" version = "2.31.0.6" @@ -8636,4 +8665,4 @@ extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "as [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "ad450a24cd3a74ccb5bd08ecee8d354a9b93e95e809a2676e166f1bd26c3e95d" +content-hash = "0e834d5ec14678eca62929509cbdcf146e85fe317ea4694d7d10520d15a8d5f5" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index c0a1ef018de18..6ecf331b24f9c 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -144,6 +144,7 @@ ruff = "^0.1.5" types-toml = "^0.10.8.1" types-pytz = "^2023.3.0.0" types-chardet = "^5.0.4.6" +types-redis = "^4.3.21.6" mypy-protobuf = "^3.0.0" [tool.poetry.group.typing.dependencies] diff --git a/libs/community/tests/integration_tests/cache/__init__.py b/libs/community/tests/integration_tests/cache/__init__.py deleted file mode 100644 index f75c193f46f68..0000000000000 --- a/libs/community/tests/integration_tests/cache/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""All integration tests for Cache objects.""" diff --git a/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py b/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py index b0ae5201cdc2d..eb76617de267c 100644 --- a/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py +++ b/libs/community/tests/integration_tests/callbacks/test_langchain_tracer.py @@ -3,13 +3,11 @@ import os from aiohttp import ClientSession -from langchain_core.callbacks import atrace_as_chain_group, trace_as_chain_group +from langchain_core.callbacks.manager import atrace_as_chain_group, trace_as_chain_group from langchain_core.prompts import PromptTemplate -from langchain_core.tracers.context import tracing_v2_enabled - -from langchain_community.callbacks import tracing_enabled -from langchain_community.chat_models import ChatOpenAI -from langchain_community.llms import OpenAI +from langchain_core.tracers.context import tracing_enabled, tracing_v2_enabled +from langchain_openai.chat_models import ChatOpenAI +from langchain_openai.llms import OpenAI questions = [ ( diff --git a/libs/community/tests/integration_tests/callbacks/test_openai_callback.py b/libs/community/tests/integration_tests/callbacks/test_openai_callback.py index 5112f4dd84e5b..5a9c281f0f5fc 100644 --- a/libs/community/tests/integration_tests/callbacks/test_openai_callback.py +++ b/libs/community/tests/integration_tests/callbacks/test_openai_callback.py @@ -1,8 +1,9 @@ """Integration tests for the langchain tracer module.""" import asyncio +from langchain_openai.llms import OpenAI + from langchain_community.callbacks import get_openai_callback -from langchain_community.llms import OpenAI async def test_openai_callback() -> None: diff --git a/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py b/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py index 13777d0b9fa5e..0fa1a4778aef2 100644 --- a/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py +++ b/libs/community/tests/integration_tests/callbacks/test_streamlit_callback.py @@ -1,6 +1,7 @@ """Integration tests for the StreamlitCallbackHandler module.""" import pytest +from langchain_openai.llms import OpenAI # Import the internal StreamlitCallbackHandler from its module - and not from # the `langchain_community.callbacks.streamlit` package - so that we don't end up using @@ -8,7 +9,6 @@ from langchain_community.callbacks.streamlit.streamlit_callback_handler import ( StreamlitCallbackHandler, ) -from langchain_community.llms import OpenAI @pytest.mark.requires("streamlit") diff --git a/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py b/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py index 7553d3198fe8a..aa94679450e59 100644 --- a/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py +++ b/libs/community/tests/integration_tests/callbacks/test_wandb_tracer.py @@ -3,9 +3,9 @@ import os from aiohttp import ClientSession +from langchain_openai.llms import OpenAI from langchain_community.callbacks import wandb_tracing_enabled -from langchain_community.llms import OpenAI questions = [ ( diff --git a/libs/community/tests/integration_tests/chat_models/test_vertexai.py b/libs/community/tests/integration_tests/chat_models/test_vertexai.py index 38ca90ed603c7..d09cc1e41e5d3 100644 --- a/libs/community/tests/integration_tests/chat_models/test_vertexai.py +++ b/libs/community/tests/integration_tests/chat_models/test_vertexai.py @@ -11,7 +11,12 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + HumanMessage, + SystemMessage, +) from langchain_core.outputs import LLMResult from langchain_community.chat_models import ChatVertexAI @@ -44,6 +49,7 @@ def test_vertexai_single_call(model_name: str) -> None: assert isinstance(response.content, str) +@pytest.mark.scheduled def test_candidates() -> None: model = ChatVertexAI(model_name="chat-bison@001", temperature=0.3, n=2) message = HumanMessage(content="Hello") @@ -65,6 +71,16 @@ async def test_vertexai_agenerate() -> None: assert response.generations[0][0] == sync_response.generations[0][0] +@pytest.mark.scheduled +async def test_vertexai_stream() -> None: + model = ChatVertexAI(temperature=0) + message = HumanMessage(content="Hello") + + sync_response = model.stream([message]) + for chunk in sync_response: + assert isinstance(chunk, AIMessageChunk) + + @pytest.mark.scheduled def test_vertexai_single_call_with_context() -> None: model = ChatVertexAI() diff --git a/libs/community/tests/integration_tests/llms/test_baseten.py b/libs/community/tests/integration_tests/llms/test_baseten.py index f00e3372d23e7..874e6188af1b3 100644 --- a/libs/community/tests/integration_tests/llms/test_baseten.py +++ b/libs/community/tests/integration_tests/llms/test_baseten.py @@ -1,7 +1,7 @@ """Test Baseten API wrapper.""" import os -from langchain_core.language_models.llmsten import Baseten +from langchain_community.llms.baseten import Baseten def test_baseten_call() -> None: diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index 3e6433b46a7f1..f3f468f8b566c 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -2706,4 +2706,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "aa5871a132c7130bae29b10a539d0673754dc1b4d2b57cb6084bf2ecab6b4462" +content-hash = "c8ef67c5f276f64738530de9f654dcd4670dca0aae3c18a7a70d40aea6b0ed1b" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 19deab3dd6276..d3507a65a228d 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -15,6 +15,7 @@ langsmith = "~0.0.63" tenacity = "^8.1.0" jsonpatch = "^1.33" anyio = ">=3,<5" +PyYAML = ">=5.3" [tool.poetry.group.lint.dependencies] ruff = "^0.1.5" diff --git a/libs/langchain/tests/integration_tests/cache/__init__.py b/libs/langchain/tests/integration_tests/cache/__init__.py index e69de29bb2d1d..f75c193f46f68 100644 --- a/libs/langchain/tests/integration_tests/cache/__init__.py +++ b/libs/langchain/tests/integration_tests/cache/__init__.py @@ -0,0 +1 @@ +"""All integration tests for Cache objects.""" diff --git a/libs/community/tests/integration_tests/cache/test_astradb.py b/libs/langchain/tests/integration_tests/cache/test_astradb.py similarity index 96% rename from libs/community/tests/integration_tests/cache/test_astradb.py rename to libs/langchain/tests/integration_tests/cache/test_astradb.py index 29e7a4312dd18..d17a631020ee8 100644 --- a/libs/community/tests/integration_tests/cache/test_astradb.py +++ b/libs/langchain/tests/integration_tests/cache/test_astradb.py @@ -16,8 +16,8 @@ import pytest from langchain_core.outputs import Generation, LLMResult -from langchain_community.cache import AstraDBCache, AstraDBSemanticCache -from langchain_community.globals import get_llm_cache, set_llm_cache +from langchain.cache import AstraDBCache, AstraDBSemanticCache +from langchain.globals import get_llm_cache, set_llm_cache from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings from tests.unit_tests.llms.fake_llm import FakeLLM diff --git a/libs/community/tests/integration_tests/cache/test_cassandra.py b/libs/langchain/tests/integration_tests/cache/test_cassandra.py similarity index 95% rename from libs/community/tests/integration_tests/cache/test_cassandra.py rename to libs/langchain/tests/integration_tests/cache/test_cassandra.py index 941e6d7936e6a..3dc186ed8d599 100644 --- a/libs/community/tests/integration_tests/cache/test_cassandra.py +++ b/libs/langchain/tests/integration_tests/cache/test_cassandra.py @@ -6,8 +6,8 @@ import pytest from langchain_core.outputs import Generation, LLMResult -from langchain_community.cache import CassandraCache, CassandraSemanticCache -from langchain_community.globals import get_llm_cache, set_llm_cache +from langchain.cache import CassandraCache, CassandraSemanticCache +from langchain.globals import get_llm_cache, set_llm_cache from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings from tests.unit_tests.llms.fake_llm import FakeLLM diff --git a/libs/community/tests/integration_tests/cache/test_gptcache.py b/libs/langchain/tests/integration_tests/cache/test_gptcache.py similarity index 93% rename from libs/community/tests/integration_tests/cache/test_gptcache.py rename to libs/langchain/tests/integration_tests/cache/test_gptcache.py index 91342fbb500c8..7e1e0c6957d10 100644 --- a/libs/community/tests/integration_tests/cache/test_gptcache.py +++ b/libs/langchain/tests/integration_tests/cache/test_gptcache.py @@ -4,8 +4,8 @@ import pytest from langchain_core.outputs import Generation -from langchain_community.cache import GPTCache -from langchain_community.globals import get_llm_cache, set_llm_cache +from langchain.cache import GPTCache +from langchain.globals import get_llm_cache, set_llm_cache from tests.unit_tests.llms.fake_llm import FakeLLM try: @@ -45,7 +45,7 @@ def init_gptcache_map_with_llm(cache_obj: Any, llm: str) -> None: "init_func", [None, init_gptcache_map, init_gptcache_map_with_llm] ) def test_gptcache_caching( - init_func: Union[Callable[[Any, str], None], Callable[[Any], None], None], + init_func: Union[Callable[[Any, str], None], Callable[[Any], None], None] ) -> None: """Test gptcache default caching behavior.""" set_llm_cache(GPTCache(init_func)) diff --git a/libs/community/tests/integration_tests/cache/test_momento_cache.py b/libs/langchain/tests/integration_tests/cache/test_momento_cache.py similarity index 96% rename from libs/community/tests/integration_tests/cache/test_momento_cache.py rename to libs/langchain/tests/integration_tests/cache/test_momento_cache.py index a42de252f5613..f5ef26ba663a5 100644 --- a/libs/community/tests/integration_tests/cache/test_momento_cache.py +++ b/libs/langchain/tests/integration_tests/cache/test_momento_cache.py @@ -13,8 +13,8 @@ import pytest from langchain_core.outputs import Generation, LLMResult -from langchain_community.cache import MomentoCache -from langchain_community.globals import set_llm_cache +from langchain.cache import MomentoCache +from langchain.globals import set_llm_cache from tests.unit_tests.llms.fake_llm import FakeLLM diff --git a/libs/community/tests/integration_tests/cache/test_redis_cache.py b/libs/langchain/tests/integration_tests/cache/test_redis_cache.py similarity index 97% rename from libs/community/tests/integration_tests/cache/test_redis_cache.py rename to libs/langchain/tests/integration_tests/cache/test_redis_cache.py index 049b5d60480b9..26ad630c5aa71 100644 --- a/libs/community/tests/integration_tests/cache/test_redis_cache.py +++ b/libs/langchain/tests/integration_tests/cache/test_redis_cache.py @@ -8,8 +8,8 @@ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage from langchain_core.outputs import ChatGeneration, Generation, LLMResult -from langchain_community.cache import RedisCache, RedisSemanticCache -from langchain_community.globals import get_llm_cache, set_llm_cache +from langchain.cache import RedisCache, RedisSemanticCache +from langchain.globals import get_llm_cache, set_llm_cache from tests.integration_tests.vectorstores.fake_embeddings import ( ConsistentFakeEmbeddings, FakeEmbeddings,