Merge pull request #125 from mattf/add-retriever-24.12

add support for nvidia/llama-3.2-nv-embedqa-1b-v2 and nvidia/llama-3.2-nv-rerankqa-1b-v2
langchain-ai · Dec 16, 2024 · 6dd747a · 6dd747a
2 parents c3d8365 + 7fd8109
commit 6dd747a
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 1 deletion.
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
@@ -624,6 +624,11 @@ def validate_client(self) -> "Model":
         model_type="embedding",
         client="NVIDIAEmbeddings",
     ),
+    "nvidia/llama-3.2-nv-embedqa-1b-v2": Model(
+        id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+        model_type="embedding",
+        client="NVIDIAEmbeddings",
+    ),
 }
 
 RANKING_MODEL_TABLE = {
@@ -646,6 +651,12 @@ def validate_client(self) -> "Model":
         client="NVIDIARerank",
         endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking",
     ),
+    "nvidia/llama-3.2-nv-rerankqa-1b-v2": Model(
+        id="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+        model_type="ranking",
+        client="NVIDIARerank",
+        endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
+    ),
 }
 
 COMPLETION_MODEL_TABLE = {

diff --git a/libs/ai-endpoints/tests/integration_tests/test_chat_models.py b/libs/ai-endpoints/tests/integration_tests/test_chat_models.py
@@ -236,6 +236,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_a(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
         llm.invoke("Show me the tokens")
+    assert llm._client.last_response is not None
     assert llm._client.last_response.status_code in [400, 422]
     assert "max_tokens" in str(llm._client.last_response.content)
 
@@ -250,6 +251,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_b(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
         llm.invoke("Show me the tokens")
+    assert llm._client.last_response is not None
     assert llm._client.last_response.status_code in [400, 422]
     # custom error string -
     #    model inference failed -- ValueError: A requested length of the model output
@@ -306,6 +308,7 @@ def test_ai_endpoints_invoke_seed_default(chat_model: str, mode: dict) -> None:
 def test_ai_endpoints_invoke_seed_range(chat_model: str, mode: dict, seed: int) -> None:
     llm = ChatNVIDIA(model=chat_model, seed=seed, **mode)
     llm.invoke("What's in a seed?")
+    assert llm._client.last_response is not None
     assert llm._client.last_response.status_code == 200
 
 
@@ -332,6 +335,7 @@ def test_ai_endpoints_invoke_temperature_negative(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, temperature=temperature, **mode)
         llm.invoke("What's in a temperature?")
+    assert llm._client.last_response is not None
     assert llm._client.last_response.status_code in [400, 422]
     assert "temperature" in str(llm._client.last_response.content)
 
@@ -360,6 +364,7 @@ def test_ai_endpoints_invoke_top_p_negative(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, top_p=top_p, **mode)
         llm.invoke("What's in a top_p?")
+    assert llm._client.last_response is not None
     assert llm._client.last_response.status_code in [400, 422]
     assert "top_p" in str(llm._client.last_response.content)
 

diff --git a/libs/ai-endpoints/tests/integration_tests/test_ranking.py b/libs/ai-endpoints/tests/integration_tests/test_ranking.py
@@ -202,7 +202,7 @@ def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None
     query = "What is acceleration?"
     documents = [
         Document(page_content="NVIDIA " * length)
-        for length in [32, 1024, 64, 128, 2048, 256, 512]
+        for length in [32, 1024, 64, 128, 10240, 256, 512]
     ]
     truncate_param = {}
     if truncate: