Skip to content

Commit

Permalink
x
Browse files Browse the repository at this point in the history
  • Loading branch information
efriis committed Jan 31, 2024
1 parent fa4ba0c commit 1007e2c
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 21 deletions.
3 changes: 0 additions & 3 deletions libs/partners/pinecone/langchain_pinecone/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,6 @@ def similarity_search_by_vector_with_score(
namespace=namespace,
filter=filter,
)
import pdb

pdb.set_trace()
for res in results["matches"]:
metadata = res["metadata"]
if self._text_key in metadata:
Expand Down
2 changes: 1 addition & 1 deletion libs/partners/pinecone/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-pinecone"
version = "0.0.1"
version = "0.0.1rc0"
description = "An integration package connecting Pinecone and LangChain"
authors = []
readme = "README.md"
Expand Down
44 changes: 27 additions & 17 deletions libs/partners/pinecone/tests/integration_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
NAMESPACE_NAME = "langchain-test-namespace" # name of the namespace
DIMENSION = 1536 # dimension of the embeddings

DEFAULT_SLEEP = 20


class TestPinecone:
index: "pinecone.Index"
Expand Down Expand Up @@ -56,10 +58,12 @@ def teardown_class(cls) -> None:
def setup(self) -> None:
# delete all the vectors in the index
print("called")
index_stats = self.index.describe_index_stats()
for _namespace_name in index_stats["namespaces"].keys():
self.index.delete(delete_all=True, namespace=_namespace_name)
time.sleep(20) # prevent race condition with previous step
self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
# index_stats = self.index.describe_index_stats()
# for _namespace_name in index_stats["namespaces"].keys():
# self.index.delete(delete_all=True, namespace=_namespace_name)
time.sleep(DEFAULT_SLEEP) # prevent race condition with previous step
# index_stats = self.index.describe_index_stats

@pytest.fixture
def embedding_openai(self) -> OpenAIEmbeddings:
Expand All @@ -83,7 +87,7 @@ def test_from_texts(
index_name=INDEX_NAME,
namespace=NAMESPACE_NAME,
)
time.sleep(20) # prevent race condition
time.sleep(DEFAULT_SLEEP) # prevent race condition
output = docsearch.similarity_search(unique_id, k=1, namespace=NAMESPACE_NAME)
assert output == [Document(page_content=needs)]

Expand All @@ -94,7 +98,7 @@ def test_from_texts_with_metadatas(

unique_id = uuid.uuid4().hex
needs = f"foobuu {unique_id} booo"
texts.insert(0, needs)
texts = [needs] + texts

metadatas = [{"page": i} for i in range(len(texts))]
docsearch = Pinecone.from_texts(
Expand All @@ -104,7 +108,7 @@ def test_from_texts_with_metadatas(
metadatas=metadatas,
namespace=NAMESPACE_NAME,
)
time.sleep(20) # prevent race condition
time.sleep(DEFAULT_SLEEP) # prevent race condition
output = docsearch.similarity_search(needs, k=1, namespace=NAMESPACE_NAME)

# TODO: why metadata={"page": 0.0}) instead of {"page": 0}?
Expand All @@ -114,6 +118,7 @@ def test_from_texts_with_scores(self, embedding_openai: OpenAIEmbeddings) -> Non
"""Test end to end construction and search with scores and IDs."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
print("metadatas", metadatas)
docsearch = Pinecone.from_texts(
texts,
embedding_openai,
Expand All @@ -122,13 +127,14 @@ def test_from_texts_with_scores(self, embedding_openai: OpenAIEmbeddings) -> Non
namespace=NAMESPACE_NAME,
)
print(texts)
time.sleep(20) # prevent race condition
time.sleep(DEFAULT_SLEEP) # prevent race condition
output = docsearch.similarity_search_with_score(
"foo", k=3, namespace=NAMESPACE_NAME
)
docs = [o[0] for o in output]
scores = [o[1] for o in output]
sorted_documents = sorted(docs, key=lambda x: x.metadata["page"])
print(sorted_documents)

# TODO: why metadata={"page": 0.0}) instead of {"page": 0}, etc???
assert sorted_documents == [
Expand Down Expand Up @@ -164,7 +170,7 @@ def test_from_existing_index_with_namespaces(
namespace=f"{INDEX_NAME}-2",
)

time.sleep(20) # prevent race condition
time.sleep(DEFAULT_SLEEP) # prevent race condition

# Search with namespace
docsearch = Pinecone.from_existing_index(
Expand All @@ -187,25 +193,28 @@ def test_add_documents_with_ids(
ids=ids,
embedding=embedding_openai,
index_name=INDEX_NAME,
namespace=INDEX_NAME,
namespace=NAMESPACE_NAME,
)
time.sleep(DEFAULT_SLEEP) # prevent race condition
index_stats = self.index.describe_index_stats()
time.sleep(20) # prevent race condition
assert index_stats["namespaces"][INDEX_NAME]["vector_count"] == len(texts)
assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == len(texts)

ids_1 = [uuid.uuid4().hex for _ in range(len(texts))]
Pinecone.from_texts(
texts=texts,
texts=[t + "-1" for t in texts],
ids=ids_1,
embedding=embedding_openai,
index_name=INDEX_NAME,
namespace=INDEX_NAME,
namespace=NAMESPACE_NAME,
)
time.sleep(20) # prevent race condition
time.sleep(DEFAULT_SLEEP) # prevent race condition
index_stats = self.index.describe_index_stats()
assert index_stats["namespaces"][INDEX_NAME]["vector_count"] == len(texts) * 2
assert (
index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == len(texts) * 2
)
assert index_stats["total_vector_count"] == len(texts) * 2

@pytest.mark.xfail(reason="relevance score just over 1")
def test_relevance_score_bound(self, embedding_openai: OpenAIEmbeddings) -> None:
"""Ensures all relevance scores are between 0 and 1."""
texts = ["foo", "bar", "baz"]
Expand All @@ -217,8 +226,9 @@ def test_relevance_score_bound(self, embedding_openai: OpenAIEmbeddings) -> None
metadatas=metadatas,
)
# wait for the index to be ready
time.sleep(20)
time.sleep(DEFAULT_SLEEP)
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
print(output)
assert all(
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
)
Expand Down

0 comments on commit 1007e2c

Please sign in to comment.