diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py index 93f743b7..466570ef 100644 --- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py +++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py @@ -1,6 +1,4 @@ -"""Embeddings Components Derived from NVEModel/Embeddings""" - -from typing import Any, List, Literal, Optional +from typing import Any, Dict, List, Literal, Optional from langchain_core.embeddings import Embeddings from langchain_core.outputs.llm_result import LLMResult @@ -28,6 +26,8 @@ class NVIDIAEmbeddings(BaseModel, Embeddings): - truncate: "NONE", "START", "END", truncate input text if it exceeds the model's maximum token length. Default is "NONE", which raises an error if an input is too long. + - dimensions: int, the number of dimensions for the embeddings. This parameter is + not supported by all models. """ model_config = ConfigDict( @@ -47,6 +47,13 @@ class NVIDIAEmbeddings(BaseModel, Embeddings): "Default is 'NONE', which raises an error if an input is too long." ), ) + dimensions: Optional[int] = Field( + default=None, + description=( + "The number of dimensions for the embeddings. This parameter is not " + "supported by all models." + ), + ) max_batch_size: int = Field(default=_DEFAULT_BATCH_SIZE) def __init__(self, **kwargs: Any): @@ -67,6 +74,8 @@ def __init__(self, **kwargs: Any): trucate (str): "NONE", "START", "END", truncate input text if it exceeds the model's context length. Default is "NONE", which raises an error if an input is too long. + dimensions (int): The number of dimensions for the embeddings. This + parameter is not supported by all models. API Key: - The recommended way to provide the API key is through the `NVIDIA_API_KEY` @@ -125,7 +134,8 @@ def _embed( # user: str -- ignored # truncate: "NONE" | "START" | "END" -- default "NONE", error raised if # an input is too long - payload = { + # dimensions: int -- not supported by all models + payload: Dict[str, Any] = { "input": texts, "model": self.model, "encoding_format": "float", @@ -133,6 +143,8 @@ def _embed( } if self.truncate: payload["truncate"] = self.truncate + if self.dimensions: + payload["dimensions"] = self.dimensions response = self._client.get_req( payload=payload, diff --git a/libs/ai-endpoints/tests/integration_tests/test_embeddings.py b/libs/ai-endpoints/tests/integration_tests/test_embeddings.py index e10bfdc7..b2cf374d 100644 --- a/libs/ai-endpoints/tests/integration_tests/test_embeddings.py +++ b/libs/ai-endpoints/tests/integration_tests/test_embeddings.py @@ -97,5 +97,81 @@ def test_embed_documents_truncate( assert len(output) == count +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_query_with_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + query = "foo bar" + embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) + assert len(embedding.embed_query(query)) == dimensions + + +@pytest.mark.parametrize("dimensions", [32, 64, 128, 2048]) +def test_embed_documents_with_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + documents = ["foo bar", "bar foo"] + embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) + output = embedding.embed_documents(documents) + assert len(output) == len(documents) + assert all(len(doc) == dimensions for doc in output) + + +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_query_with_large_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + query = "foo bar" + embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) + assert 2048 <= len(embedding.embed_query(query)) < dimensions + + +@pytest.mark.parametrize("dimensions", [102400]) +def test_embed_documents_with_large_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + documents = ["foo bar", "bar foo"] + embedding = NVIDIAEmbeddings(model=embedding_model, dimensions=dimensions, **mode) + output = embedding.embed_documents(documents) + assert len(output) == len(documents) + assert all(2048 <= len(doc) < dimensions for doc in output) + + +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_query_invalid_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + query = "foo bar" + with pytest.raises(Exception) as exc: + NVIDIAEmbeddings( + model=embedding_model, dimensions=dimensions, **mode + ).embed_query(query) + assert "400" in str(exc.value) + + +@pytest.mark.parametrize("dimensions", [-1]) +def test_embed_documents_invalid_dimensions( + embedding_model: str, mode: dict, dimensions: int +) -> None: + if embedding_model != "nvidia/llama-3.2-nv-embedqa-1b-v2": + pytest.skip("Model does not support custom dimensions.") + documents = ["foo bar", "bar foo"] + with pytest.raises(Exception) as exc: + NVIDIAEmbeddings( + model=embedding_model, dimensions=dimensions, **mode + ).embed_documents(documents) + assert "400" in str(exc.value) + + # todo: test max_length > max length accepted by the model # todo: test max_batch_size > max batch size accepted by the model