pinecone: Review pinecone tests (#29073)

Title: langchain-pinecone: improve test structure and async handling Description: This PR improves the test infrastructure for the langchain-pinecone package by: 1. Implementing LangChain's standard test patterns for embeddings 2. Adding comprehensive configuration testing 3. Improving async test coverage 4. Fixing integration test issues with namespaces and async markers The changes make the tests more robust, maintainable, and aligned with LangChain's testing standards while ensuring proper async behavior in the embeddings implementation. Key improvements: - Added standard EmbeddingsTests implementation - Split custom configuration tests into a separate test class - Added proper async test coverage with pytest-asyncio - Fixed namespace handling in vector store integration tests - Improved test organization and documentation Dependencies: None (uses existing test dependencies) Tests and Documentation: - ✅ Added standard test implementation following LangChain's patterns - ✅ Added comprehensive unit tests for configuration and async behavior - ✅ All tests passing locally - No documentation changes needed (internal test improvements only) Twitter handle: N/A --------- Co-authored-by: Erick Friis <[email protected]>
langchain-ai · Jan 7, 2025 · ce9e9f9 · ce9e9f9
1 parent d9c51b7
commit ce9e9f9
Show file tree

Hide file tree

Showing 7 changed files with 616 additions and 519 deletions.
diff --git a/libs/partners/pinecone/langchain_pinecone/embeddings.py b/libs/partners/pinecone/langchain_pinecone/embeddings.py
@@ -106,7 +106,6 @@ def validate_environment(self) -> Self:
         self._client = client
 
         # Ensure async_client is lazily initialized
-        _ = self.async_client
         return self
 
     def _get_batch_iterator(self, texts: List[str]) -> Iterable:

diff --git a/libs/partners/pinecone/poetry.lock b/libs/partners/pinecone/poetry.lock
diff --git a/libs/partners/pinecone/pyproject.toml b/libs/partners/pinecone/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = [ "poetry-core>=1.0.0",]
+requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
@@ -24,17 +24,22 @@ langchain-core = "^0.3.21"
 pinecone = "^5.4.0"
 aiohttp = ">=3.9.5,<3.10"
 numpy = ">=1.26.0,<2.0.0"
+langchain-tests = "^0.3.7"
 
 [tool.ruff.lint]
-select = [ "E", "F", "I", "T201",]
+select = ["E", "F", "I", "T201"]
 
 [tool.coverage.run]
-omit = [ "tests/*",]
+omit = ["tests/*"]
 
 [tool.pytest.ini_options]
 addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
-markers = [ "requires: mark tests as requiring a specific library", "compile: mark placeholder test used to compile integration tests without running them",]
+markers = [
+    "requires: mark tests as requiring a specific library",
+    "compile: mark placeholder test used to compile integration tests without running them",
+]
 asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
 
 [tool.poetry.group.test]
 optional = true
@@ -52,12 +57,12 @@ optional = true
 optional = true
 
 [tool.poetry.group.test.dependencies]
-pytest = "^7.3.0"
+pytest = "^8"
 freezegun = "^1.2.2"
 pytest-mock = "^3.10.0"
 syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
-pytest-asyncio = "^0.21.1"
+pytest-asyncio = ">=0.25.0,<1"
 
 [tool.poetry.group.codespell.dependencies]
 codespell = "^2.2.0"

diff --git a/libs/partners/pinecone/tests/integration_tests/test_embeddings.py b/libs/partners/pinecone/tests/integration_tests/test_embeddings.py
@@ -1,19 +1,24 @@
 import time
+from typing import AsyncGenerator
 
 import pytest
 from langchain_core.documents import Document
 from pinecone import Pinecone, ServerlessSpec  # type: ignore
 
 from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore
+from tests.integration_tests.test_vectorstores import DEFAULT_SLEEP
 
 DIMENSION = 1024
 INDEX_NAME = "langchain-pinecone-embeddings"
 MODEL = "multilingual-e5-large"
+NAMESPACE_NAME = "test_namespace"
 
 
-@pytest.fixture()
-def embd_client() -> PineconeEmbeddings:
-    return PineconeEmbeddings(model=MODEL)
+@pytest.fixture(scope="function")
+async def embd_client() -> AsyncGenerator[PineconeEmbeddings, None]:
+    client = PineconeEmbeddings(model=MODEL)
+    yield client
+    await client.async_client.close()
 
 
 @pytest.fixture
@@ -44,6 +49,7 @@ def test_embed_query(embd_client: PineconeEmbeddings) -> None:
     assert len(out) == DIMENSION
 
 
+@pytest.mark.asyncio
 async def test_aembed_query(embd_client: PineconeEmbeddings) -> None:
     out = await embd_client.aembed_query("Hello, world!")
     assert isinstance(out, list)
@@ -57,6 +63,7 @@ def test_embed_documents(embd_client: PineconeEmbeddings) -> None:
     assert len(out[0]) == DIMENSION
 
 
+@pytest.mark.asyncio
 async def test_aembed_documents(embd_client: PineconeEmbeddings) -> None:
     out = await embd_client.aembed_documents(["Hello, world!", "This is a test."])
     assert isinstance(out, list)
@@ -68,7 +75,10 @@ def test_vector_store(
     embd_client: PineconeEmbeddings, pc_index: Pinecone.Index
 ) -> None:
     vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embd_client)
-    vectorstore.add_documents([Document("Hello, world!"), Document("This is a test.")])
-    time.sleep(5)
-    resp = vectorstore.similarity_search(query="hello")
+    vectorstore.add_documents(
+        [Document("Hello, world!"), Document("This is a test.")],
+        namespace=NAMESPACE_NAME,
+    )
+    time.sleep(DEFAULT_SLEEP)  # Increase wait time to ensure indexing is complete
+    resp = vectorstore.similarity_search(query="hello", namespace=NAMESPACE_NAME)
     assert len(resp) == 2
diff --git a/libs/partners/pinecone/tests/integration_tests/test_vectorstores.py b/libs/partners/pinecone/tests/integration_tests/test_vectorstores.py
@@ -8,7 +8,8 @@
 import pytest  # type: ignore[import-not-found]
 from langchain_core.documents import Document
 from langchain_openai import OpenAIEmbeddings  # type: ignore[import-not-found]
-from pinecone import PodSpec
+from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
+from pinecone import ServerlessSpec
 from pytest_mock import MockerFixture  # type: ignore[import-not-found]
 
 from langchain_pinecone import PineconeVectorStore
@@ -20,52 +21,46 @@
 DEFAULT_SLEEP = 20
 
 
-class TestPinecone:
+class TestPinecone(VectorStoreIntegrationTests):
     index: "pinecone.Index"
+    pc: "pinecone.Pinecone"
 
     @classmethod
-    def setup_class(cls) -> None:
+    def setup_class(self) -> None:
         import pinecone
 
         client = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
         index_list = client.list_indexes()
-        for i in index_list:
-            if i["name"] == INDEX_NAME:
-                client.delete_index(INDEX_NAME)
-                break
-        if len(index_list) > 0:
-            time.sleep(DEFAULT_SLEEP)  # prevent race with creation
+        if INDEX_NAME in [
+            i["name"] for i in index_list
+        ]:  # change to list comprehension
+            client.delete_index(INDEX_NAME)
+            time.sleep(DEFAULT_SLEEP)  # prevent race with subsequent creation
         client.create_index(
             name=INDEX_NAME,
             dimension=DIMENSION,
             metric="cosine",
-            spec=PodSpec(environment="gcp-starter"),
+            spec=ServerlessSpec(cloud="aws", region="us-west-2"),
         )
 
-        cls.index = client.Index(INDEX_NAME)
-
-        # insure the index is empty
-        index_stats = cls.index.describe_index_stats()
-        assert index_stats["dimension"] == DIMENSION
-        if index_stats["namespaces"].get(NAMESPACE_NAME) is not None:
-            assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == 0
+        self.index = client.Index(INDEX_NAME)
+        self.pc = client
 
     @classmethod
-    def teardown_class(cls) -> None:
-        index_stats = cls.index.describe_index_stats()
-        for _namespace_name in index_stats["namespaces"].keys():
-            cls.index.delete(delete_all=True, namespace=_namespace_name)
+    def teardown_class(self) -> None:
+        self.pc.delete_index()
 
     @pytest.fixture(autouse=True)
     def setup(self) -> None:
         # delete all the vectors in the index
         print("called")  # noqa: T201
-        try:
-            self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
-            time.sleep(DEFAULT_SLEEP)  # prevent race condition with previous step
-        except Exception:
-            # if namespace not found
-            pass
+        index_stats = self.index.describe_index_stats()
+        if index_stats["total_vector_count"] > 0:
+            try:
+                self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
+            except Exception:
+                # if namespace not found
+                pass
 
     @pytest.fixture
     def embedding_openai(self) -> OpenAIEmbeddings:

diff --git a/libs/partners/pinecone/tests/unit_tests/test_embeddings.py b/libs/partners/pinecone/tests/unit_tests/test_embeddings.py
@@ -1,28 +1,83 @@
+from typing import Any, Type
+from unittest.mock import patch
+
+import aiohttp
+import pytest
 from langchain_core.utils import convert_to_secret_str
+from langchain_tests.unit_tests.embeddings import EmbeddingsTests
 
 from langchain_pinecone import PineconeEmbeddings
 
 API_KEY = convert_to_secret_str("NOT_A_VALID_KEY")
 MODEL_NAME = "multilingual-e5-large"
 
 
-def test_default_config() -> None:
-    e = PineconeEmbeddings(
-        pinecone_api_key=API_KEY,  # type: ignore[call-arg]
-        model=MODEL_NAME,
-    )
-    assert e.batch_size == 96
+@pytest.fixture(autouse=True)
+def mock_pinecone() -> Any:
+    """Mock Pinecone client for all tests."""
+    with patch("langchain_pinecone.embeddings.PineconeClient") as mock:
+        yield mock
+
+
+class TestPineconeEmbeddingsStandard(EmbeddingsTests):
+    """Standard LangChain embeddings tests."""
+
+    @property
+    def embeddings_class(self) -> Type[PineconeEmbeddings]:
+        """Get the class under test."""
+        return PineconeEmbeddings
+
+    @property
+    def embedding_model_params(self) -> dict:
+        """Get the parameters for initializing the embeddings model."""
+        return {
+            "model": MODEL_NAME,
+            "pinecone_api_key": API_KEY,
+        }
+
+
+class TestPineconeEmbeddingsConfig:
+    """Additional configuration tests for PineconeEmbeddings."""
+
+    def test_default_config(self) -> None:
+        """Test default configuration is set correctly."""
+        embeddings = PineconeEmbeddings(model=MODEL_NAME, pinecone_api_key=API_KEY)  # type: ignore
+        assert embeddings.batch_size == 96
+        assert embeddings.query_params == {"input_type": "query", "truncation": "END"}
+        assert embeddings.document_params == {
+            "input_type": "passage",
+            "truncation": "END",
+        }
+        assert embeddings.dimension == 1024
 
+    def test_custom_config(self) -> None:
+        """Test custom configuration overrides defaults."""
+        embeddings = PineconeEmbeddings(
+            model=MODEL_NAME,
+            api_key=API_KEY,
+            batch_size=128,
+            query_params={"custom": "param"},
+            document_params={"other": "param"},
+        )
+        assert embeddings.batch_size == 128
+        assert embeddings.query_params == {"custom": "param"}
+        assert embeddings.document_params == {"other": "param"}
 
-def test_default_config_with_api_key() -> None:
-    e = PineconeEmbeddings(api_key=API_KEY, model=MODEL_NAME)
-    assert e.batch_size == 96
+    @pytest.mark.asyncio
+    async def test_async_client_initialization(self) -> None:
+        """Test async client is initialized correctly and only when needed."""
+        embeddings = PineconeEmbeddings(model=MODEL_NAME, api_key=API_KEY)
+        assert embeddings._async_client is None
 
+        # Access async_client property
+        client = embeddings.async_client
+        assert client is not None
+        assert isinstance(client, aiohttp.ClientSession)
 
-def test_custom_config() -> None:
-    e = PineconeEmbeddings(
-        pinecone_api_key=API_KEY,  # type: ignore[call-arg]
-        model=MODEL_NAME,
-        batch_size=128,
-    )
-    assert e.batch_size == 128
+        # Ensure headers are set correctly
+        expected_headers = {
+            "Api-Key": API_KEY.get_secret_value(),
+            "Content-Type": "application/json",
+            "X-Pinecone-API-Version": "2024-10",
+        }
+        assert client._default_headers == expected_headers
diff --git a/libs/partners/pinecone/tests/unit_tests/test_vectorstores.py b/libs/partners/pinecone/tests/unit_tests/test_vectorstores.py
@@ -1,6 +1,6 @@
 from unittest.mock import Mock
 
-from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore
+from langchain_pinecone.vectorstores import PineconeVectorStore
 
 
 def test_initialization() -> None:
@@ -9,7 +9,7 @@ def test_initialization() -> None:
     index = Mock()
     embedding = Mock()
     text_key = "xyz"
-    Pinecone(index, embedding, text_key)
+    PineconeVectorStore(index, embedding, text_key)
 
 
 def test_id_prefix() -> None: