From 04b0d20c784fa301756e46ca22d731eb1771a7a3 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 16 Dec 2024 16:26:26 -0500 Subject: [PATCH 1/2] add support for nvidia/llama-3.2-nv-embedqa-1b-v2 and nvidia/llama-3.2-nv-rerankqa-1b-v2 --- .../langchain_nvidia_ai_endpoints/_statics.py | 11 +++++++++++ .../tests/integration_tests/test_ranking.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py index 916b6d2e..cf111e49 100644 --- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py +++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py @@ -624,6 +624,11 @@ def validate_client(self) -> "Model": model_type="embedding", client="NVIDIAEmbeddings", ), + "nvidia/llama-3.2-nv-embedqa-1b-v2": Model( + id="nvidia/llama-3.2-nv-embedqa-1b-v2", + model_type="embedding", + client="NVIDIAEmbeddings", + ), } RANKING_MODEL_TABLE = { @@ -646,6 +651,12 @@ def validate_client(self) -> "Model": client="NVIDIARerank", endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking", ), + "nvidia/llama-3.2-nv-rerankqa-1b-v2": Model( + id="nvidia/llama-3.2-nv-rerankqa-1b-v2", + model_type="ranking", + client="NVIDIARerank", + endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + ), } COMPLETION_MODEL_TABLE = { diff --git a/libs/ai-endpoints/tests/integration_tests/test_ranking.py b/libs/ai-endpoints/tests/integration_tests/test_ranking.py index 47fc8438..d067a1e6 100644 --- a/libs/ai-endpoints/tests/integration_tests/test_ranking.py +++ b/libs/ai-endpoints/tests/integration_tests/test_ranking.py @@ -202,7 +202,7 @@ def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None query = "What is acceleration?" documents = [ Document(page_content="NVIDIA " * length) - for length in [32, 1024, 64, 128, 2048, 256, 512] + for length in [32, 1024, 64, 128, 10240, 256, 512] ] truncate_param = {} if truncate: From 7fd810927912d086b539401d5fd1dea4b8b18d37 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 16 Dec 2024 16:37:02 -0500 Subject: [PATCH 2/2] fix lint --- .../ai-endpoints/tests/integration_tests/test_chat_models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/ai-endpoints/tests/integration_tests/test_chat_models.py b/libs/ai-endpoints/tests/integration_tests/test_chat_models.py index ac657085..d81c9e19 100644 --- a/libs/ai-endpoints/tests/integration_tests/test_chat_models.py +++ b/libs/ai-endpoints/tests/integration_tests/test_chat_models.py @@ -236,6 +236,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_a( with pytest.raises(Exception): llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode) llm.invoke("Show me the tokens") + assert llm._client.last_response is not None assert llm._client.last_response.status_code in [400, 422] assert "max_tokens" in str(llm._client.last_response.content) @@ -250,6 +251,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_b( with pytest.raises(Exception): llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode) llm.invoke("Show me the tokens") + assert llm._client.last_response is not None assert llm._client.last_response.status_code in [400, 422] # custom error string - # model inference failed -- ValueError: A requested length of the model output @@ -306,6 +308,7 @@ def test_ai_endpoints_invoke_seed_default(chat_model: str, mode: dict) -> None: def test_ai_endpoints_invoke_seed_range(chat_model: str, mode: dict, seed: int) -> None: llm = ChatNVIDIA(model=chat_model, seed=seed, **mode) llm.invoke("What's in a seed?") + assert llm._client.last_response is not None assert llm._client.last_response.status_code == 200 @@ -332,6 +335,7 @@ def test_ai_endpoints_invoke_temperature_negative( with pytest.raises(Exception): llm = ChatNVIDIA(model=chat_model, temperature=temperature, **mode) llm.invoke("What's in a temperature?") + assert llm._client.last_response is not None assert llm._client.last_response.status_code in [400, 422] assert "temperature" in str(llm._client.last_response.content) @@ -360,6 +364,7 @@ def test_ai_endpoints_invoke_top_p_negative( with pytest.raises(Exception): llm = ChatNVIDIA(model=chat_model, top_p=top_p, **mode) llm.invoke("What's in a top_p?") + assert llm._client.last_response is not None assert llm._client.last_response.status_code in [400, 422] assert "top_p" in str(llm._client.last_response.content)