Skip to content

Commit

Permalink
Merge pull request #125 from mattf/add-retriever-24.12
Browse files Browse the repository at this point in the history
add support for nvidia/llama-3.2-nv-embedqa-1b-v2 and nvidia/llama-3.2-nv-rerankqa-1b-v2
  • Loading branch information
mattf authored Dec 16, 2024
2 parents c3d8365 + 7fd8109 commit 6dd747a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 1 deletion.
11 changes: 11 additions & 0 deletions libs/ai-endpoints/langchain_nvidia_ai_endpoints/_statics.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,11 @@ def validate_client(self) -> "Model":
model_type="embedding",
client="NVIDIAEmbeddings",
),
"nvidia/llama-3.2-nv-embedqa-1b-v2": Model(
id="nvidia/llama-3.2-nv-embedqa-1b-v2",
model_type="embedding",
client="NVIDIAEmbeddings",
),
}

RANKING_MODEL_TABLE = {
Expand All @@ -646,6 +651,12 @@ def validate_client(self) -> "Model":
client="NVIDIARerank",
endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v1/reranking",
),
"nvidia/llama-3.2-nv-rerankqa-1b-v2": Model(
id="nvidia/llama-3.2-nv-rerankqa-1b-v2",
model_type="ranking",
client="NVIDIARerank",
endpoint="https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
),
}

COMPLETION_MODEL_TABLE = {
Expand Down
5 changes: 5 additions & 0 deletions libs/ai-endpoints/tests/integration_tests/test_chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_a(
with pytest.raises(Exception):
llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
llm.invoke("Show me the tokens")
assert llm._client.last_response is not None
assert llm._client.last_response.status_code in [400, 422]
assert "max_tokens" in str(llm._client.last_response.content)

Expand All @@ -250,6 +251,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_b(
with pytest.raises(Exception):
llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
llm.invoke("Show me the tokens")
assert llm._client.last_response is not None
assert llm._client.last_response.status_code in [400, 422]
# custom error string -
# model inference failed -- ValueError: A requested length of the model output
Expand Down Expand Up @@ -306,6 +308,7 @@ def test_ai_endpoints_invoke_seed_default(chat_model: str, mode: dict) -> None:
def test_ai_endpoints_invoke_seed_range(chat_model: str, mode: dict, seed: int) -> None:
llm = ChatNVIDIA(model=chat_model, seed=seed, **mode)
llm.invoke("What's in a seed?")
assert llm._client.last_response is not None
assert llm._client.last_response.status_code == 200


Expand All @@ -332,6 +335,7 @@ def test_ai_endpoints_invoke_temperature_negative(
with pytest.raises(Exception):
llm = ChatNVIDIA(model=chat_model, temperature=temperature, **mode)
llm.invoke("What's in a temperature?")
assert llm._client.last_response is not None
assert llm._client.last_response.status_code in [400, 422]
assert "temperature" in str(llm._client.last_response.content)

Expand Down Expand Up @@ -360,6 +364,7 @@ def test_ai_endpoints_invoke_top_p_negative(
with pytest.raises(Exception):
llm = ChatNVIDIA(model=chat_model, top_p=top_p, **mode)
llm.invoke("What's in a top_p?")
assert llm._client.last_response is not None
assert llm._client.last_response.status_code in [400, 422]
assert "top_p" in str(llm._client.last_response.content)

Expand Down
2 changes: 1 addition & 1 deletion libs/ai-endpoints/tests/integration_tests/test_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def test_truncate_negative(rerank_model: str, mode: dict, truncate: str) -> None
query = "What is acceleration?"
documents = [
Document(page_content="NVIDIA " * length)
for length in [32, 1024, 64, 128, 2048, 256, 512]
for length in [32, 1024, 64, 128, 10240, 256, 512]
]
truncate_param = {}
if truncate:
Expand Down

0 comments on commit 6dd747a

Please sign in to comment.