Skip to content

Commit

Permalink
pinecone: Review pinecone tests (#29073)
Browse files Browse the repository at this point in the history
Title: langchain-pinecone: improve test structure and async handling

Description: This PR improves the test infrastructure for the
langchain-pinecone package by:
1. Implementing LangChain's standard test patterns for embeddings
2. Adding comprehensive configuration testing
3. Improving async test coverage
4. Fixing integration test issues with namespaces and async markers

The changes make the tests more robust, maintainable, and aligned with
LangChain's testing standards while ensuring proper async behavior in
the embeddings implementation.

Key improvements:
- Added standard EmbeddingsTests implementation
- Split custom configuration tests into a separate test class
- Added proper async test coverage with pytest-asyncio
- Fixed namespace handling in vector store integration tests
- Improved test organization and documentation

Dependencies: None (uses existing test dependencies)

Tests and Documentation:
- ✅ Added standard test implementation following LangChain's patterns
- ✅ Added comprehensive unit tests for configuration and async behavior
- ✅ All tests passing locally
- No documentation changes needed (internal test improvements only)

Twitter handle: N/A

---------

Co-authored-by: Erick Friis <[email protected]>
  • Loading branch information
cwaddingham and efriis authored Jan 7, 2025
1 parent d9c51b7 commit ce9e9f9
Show file tree
Hide file tree
Showing 7 changed files with 616 additions and 519 deletions.
1 change: 0 additions & 1 deletion libs/partners/pinecone/langchain_pinecone/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ def validate_environment(self) -> Self:
self._client = client

# Ensure async_client is lazily initialized
_ = self.async_client
return self

def _get_batch_iterator(self, texts: List[str]) -> Iterable:
Expand Down
955 changes: 494 additions & 461 deletions libs/partners/pinecone/poetry.lock

Large diffs are not rendered by default.

17 changes: 11 additions & 6 deletions libs/partners/pinecone/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = [ "poetry-core>=1.0.0",]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
Expand All @@ -24,17 +24,22 @@ langchain-core = "^0.3.21"
pinecone = "^5.4.0"
aiohttp = ">=3.9.5,<3.10"
numpy = ">=1.26.0,<2.0.0"
langchain-tests = "^0.3.7"

[tool.ruff.lint]
select = [ "E", "F", "I", "T201",]
select = ["E", "F", "I", "T201"]

[tool.coverage.run]
omit = [ "tests/*",]
omit = ["tests/*"]

[tool.pytest.ini_options]
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
markers = [ "requires: mark tests as requiring a specific library", "compile: mark placeholder test used to compile integration tests without running them",]
markers = [
"requires: mark tests as requiring a specific library",
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"

[tool.poetry.group.test]
optional = true
Expand All @@ -52,12 +57,12 @@ optional = true
optional = true

[tool.poetry.group.test.dependencies]
pytest = "^7.3.0"
pytest = "^8"
freezegun = "^1.2.2"
pytest-mock = "^3.10.0"
syrupy = "^4.0.2"
pytest-watcher = "^0.3.4"
pytest-asyncio = "^0.21.1"
pytest-asyncio = ">=0.25.0,<1"

[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.0"
Expand Down
22 changes: 16 additions & 6 deletions libs/partners/pinecone/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
import time
from typing import AsyncGenerator

import pytest
from langchain_core.documents import Document
from pinecone import Pinecone, ServerlessSpec # type: ignore

from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore
from tests.integration_tests.test_vectorstores import DEFAULT_SLEEP

DIMENSION = 1024
INDEX_NAME = "langchain-pinecone-embeddings"
MODEL = "multilingual-e5-large"
NAMESPACE_NAME = "test_namespace"


@pytest.fixture()
def embd_client() -> PineconeEmbeddings:
return PineconeEmbeddings(model=MODEL)
@pytest.fixture(scope="function")
async def embd_client() -> AsyncGenerator[PineconeEmbeddings, None]:
client = PineconeEmbeddings(model=MODEL)
yield client
await client.async_client.close()


@pytest.fixture
Expand Down Expand Up @@ -44,6 +49,7 @@ def test_embed_query(embd_client: PineconeEmbeddings) -> None:
assert len(out) == DIMENSION


@pytest.mark.asyncio
async def test_aembed_query(embd_client: PineconeEmbeddings) -> None:
out = await embd_client.aembed_query("Hello, world!")
assert isinstance(out, list)
Expand All @@ -57,6 +63,7 @@ def test_embed_documents(embd_client: PineconeEmbeddings) -> None:
assert len(out[0]) == DIMENSION


@pytest.mark.asyncio
async def test_aembed_documents(embd_client: PineconeEmbeddings) -> None:
out = await embd_client.aembed_documents(["Hello, world!", "This is a test."])
assert isinstance(out, list)
Expand All @@ -68,7 +75,10 @@ def test_vector_store(
embd_client: PineconeEmbeddings, pc_index: Pinecone.Index
) -> None:
vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embd_client)
vectorstore.add_documents([Document("Hello, world!"), Document("This is a test.")])
time.sleep(5)
resp = vectorstore.similarity_search(query="hello")
vectorstore.add_documents(
[Document("Hello, world!"), Document("This is a test.")],
namespace=NAMESPACE_NAME,
)
time.sleep(DEFAULT_SLEEP) # Increase wait time to ensure indexing is complete
resp = vectorstore.similarity_search(query="hello", namespace=NAMESPACE_NAME)
assert len(resp) == 2
49 changes: 22 additions & 27 deletions libs/partners/pinecone/tests/integration_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import pytest # type: ignore[import-not-found]
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings # type: ignore[import-not-found]
from pinecone import PodSpec
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from pinecone import ServerlessSpec
from pytest_mock import MockerFixture # type: ignore[import-not-found]

from langchain_pinecone import PineconeVectorStore
Expand All @@ -20,52 +21,46 @@
DEFAULT_SLEEP = 20


class TestPinecone:
class TestPinecone(VectorStoreIntegrationTests):
index: "pinecone.Index"
pc: "pinecone.Pinecone"

@classmethod
def setup_class(cls) -> None:
def setup_class(self) -> None:
import pinecone

client = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
index_list = client.list_indexes()
for i in index_list:
if i["name"] == INDEX_NAME:
client.delete_index(INDEX_NAME)
break
if len(index_list) > 0:
time.sleep(DEFAULT_SLEEP) # prevent race with creation
if INDEX_NAME in [
i["name"] for i in index_list
]: # change to list comprehension
client.delete_index(INDEX_NAME)
time.sleep(DEFAULT_SLEEP) # prevent race with subsequent creation
client.create_index(
name=INDEX_NAME,
dimension=DIMENSION,
metric="cosine",
spec=PodSpec(environment="gcp-starter"),
spec=ServerlessSpec(cloud="aws", region="us-west-2"),
)

cls.index = client.Index(INDEX_NAME)

# insure the index is empty
index_stats = cls.index.describe_index_stats()
assert index_stats["dimension"] == DIMENSION
if index_stats["namespaces"].get(NAMESPACE_NAME) is not None:
assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == 0
self.index = client.Index(INDEX_NAME)
self.pc = client

@classmethod
def teardown_class(cls) -> None:
index_stats = cls.index.describe_index_stats()
for _namespace_name in index_stats["namespaces"].keys():
cls.index.delete(delete_all=True, namespace=_namespace_name)
def teardown_class(self) -> None:
self.pc.delete_index()

@pytest.fixture(autouse=True)
def setup(self) -> None:
# delete all the vectors in the index
print("called") # noqa: T201
try:
self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
time.sleep(DEFAULT_SLEEP) # prevent race condition with previous step
except Exception:
# if namespace not found
pass
index_stats = self.index.describe_index_stats()
if index_stats["total_vector_count"] > 0:
try:
self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
except Exception:
# if namespace not found
pass

@pytest.fixture
def embedding_openai(self) -> OpenAIEmbeddings:
Expand Down
87 changes: 71 additions & 16 deletions libs/partners/pinecone/tests/unit_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,83 @@
from typing import Any, Type
from unittest.mock import patch

import aiohttp
import pytest
from langchain_core.utils import convert_to_secret_str
from langchain_tests.unit_tests.embeddings import EmbeddingsTests

from langchain_pinecone import PineconeEmbeddings

API_KEY = convert_to_secret_str("NOT_A_VALID_KEY")
MODEL_NAME = "multilingual-e5-large"


def test_default_config() -> None:
e = PineconeEmbeddings(
pinecone_api_key=API_KEY, # type: ignore[call-arg]
model=MODEL_NAME,
)
assert e.batch_size == 96
@pytest.fixture(autouse=True)
def mock_pinecone() -> Any:
"""Mock Pinecone client for all tests."""
with patch("langchain_pinecone.embeddings.PineconeClient") as mock:
yield mock


class TestPineconeEmbeddingsStandard(EmbeddingsTests):
"""Standard LangChain embeddings tests."""

@property
def embeddings_class(self) -> Type[PineconeEmbeddings]:
"""Get the class under test."""
return PineconeEmbeddings

@property
def embedding_model_params(self) -> dict:
"""Get the parameters for initializing the embeddings model."""
return {
"model": MODEL_NAME,
"pinecone_api_key": API_KEY,
}


class TestPineconeEmbeddingsConfig:
"""Additional configuration tests for PineconeEmbeddings."""

def test_default_config(self) -> None:
"""Test default configuration is set correctly."""
embeddings = PineconeEmbeddings(model=MODEL_NAME, pinecone_api_key=API_KEY) # type: ignore
assert embeddings.batch_size == 96
assert embeddings.query_params == {"input_type": "query", "truncation": "END"}
assert embeddings.document_params == {
"input_type": "passage",
"truncation": "END",
}
assert embeddings.dimension == 1024

def test_custom_config(self) -> None:
"""Test custom configuration overrides defaults."""
embeddings = PineconeEmbeddings(
model=MODEL_NAME,
api_key=API_KEY,
batch_size=128,
query_params={"custom": "param"},
document_params={"other": "param"},
)
assert embeddings.batch_size == 128
assert embeddings.query_params == {"custom": "param"}
assert embeddings.document_params == {"other": "param"}

def test_default_config_with_api_key() -> None:
e = PineconeEmbeddings(api_key=API_KEY, model=MODEL_NAME)
assert e.batch_size == 96
@pytest.mark.asyncio
async def test_async_client_initialization(self) -> None:
"""Test async client is initialized correctly and only when needed."""
embeddings = PineconeEmbeddings(model=MODEL_NAME, api_key=API_KEY)
assert embeddings._async_client is None

# Access async_client property
client = embeddings.async_client
assert client is not None
assert isinstance(client, aiohttp.ClientSession)

def test_custom_config() -> None:
e = PineconeEmbeddings(
pinecone_api_key=API_KEY, # type: ignore[call-arg]
model=MODEL_NAME,
batch_size=128,
)
assert e.batch_size == 128
# Ensure headers are set correctly
expected_headers = {
"Api-Key": API_KEY.get_secret_value(),
"Content-Type": "application/json",
"X-Pinecone-API-Version": "2024-10",
}
assert client._default_headers == expected_headers
4 changes: 2 additions & 2 deletions libs/partners/pinecone/tests/unit_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from unittest.mock import Mock

from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore
from langchain_pinecone.vectorstores import PineconeVectorStore


def test_initialization() -> None:
Expand All @@ -9,7 +9,7 @@ def test_initialization() -> None:
index = Mock()
embedding = Mock()
text_key = "xyz"
Pinecone(index, embedding, text_key)
PineconeVectorStore(index, embedding, text_key)


def test_id_prefix() -> None:
Expand Down

0 comments on commit ce9e9f9

Please sign in to comment.