Skip to content

Commit

Permalink
Changed FakeEmbeddingsWithOsDimension to make embeddings in integrati…
Browse files Browse the repository at this point in the history
…on tests more distinct from each oher (#11)
  • Loading branch information
willtai authored Nov 26, 2024
1 parent cb12a54 commit 0ab565b
Showing 1 changed file with 24 additions and 25 deletions.
49 changes: 24 additions & 25 deletions libs/neo4j/tests/integration_tests/vectorstores/test_neo4jvector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Test Neo4jVector functionality."""

import os
from math import isclose
from typing import Any, Dict, List, cast

from langchain_core.documents import Document
Expand Down Expand Up @@ -60,14 +59,18 @@ class FakeEmbeddingsWithOsDimension(FakeEmbeddings):

def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
"""Return simple embeddings."""
return [
[float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(i + 1)]
embedding = [
[float(1.0)] * (OS_TOKEN_COUNT - 1) + [100 * float(i + 1)]
for i in range(len(embedding_texts))
]
return embedding

def embed_query(self, text: str) -> List[float]:
"""Return simple embeddings."""
return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text) + 1)]
embedding = [float(1.0)] * (OS_TOKEN_COUNT - 1) + [
100 * float(texts.index(text) + 1)
]
return embedding


def test_neo4jvector() -> None:
Expand Down Expand Up @@ -225,20 +228,15 @@ def test_neo4jvector_relevance_score() -> None:
)

output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
expected_output = [
(Document(page_content="foo", metadata={"page": "0"}), 1.0),
(Document(page_content="bar", metadata={"page": "1"}), 0.9998160600662231),
(Document(page_content="baz", metadata={"page": "2"}), 0.9996607303619385),
]
output_texts = [doc.page_content for doc, _ in output]

# Check if the length of the outputs matches
assert len(output) == len(expected_output)

# Check if each document and its relevance score is close to the expected value
for (doc, score), (expected_doc, expected_score) in zip(output, expected_output):
assert doc.page_content == expected_doc.page_content
assert doc.metadata == expected_doc.metadata
assert isclose(score, expected_score, rel_tol=1e-5)
expected_order = ["foo", "It is the end of the world. Take shelter!", "baz"]
assert output_texts == expected_order
relevance_scores = [score for _, score in output]
assert all(
earlier >= later
for earlier, later in zip(relevance_scores, relevance_scores[1:])
)

drop_vector_indexes(docsearch)

Expand All @@ -258,9 +256,10 @@ def test_neo4jvector_retriever_search_threshold() -> None:

retriever = docsearch.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 3, "score_threshold": 0.9999},
search_kwargs={"k": 3, "score_threshold": 0.999},
)
output = retriever.invoke("foo")

assert output == [
Document(page_content="foo", metadata={"page": "0"}),
]
Expand Down Expand Up @@ -399,9 +398,10 @@ def test_neo4jvector_hybrid_deduplicate() -> None:
search_type=SearchType.HYBRID,
)
output = docsearch.similarity_search("foo", k=3)

assert output == [
Document(page_content="foo"),
Document(page_content="bar"),
Document(page_content="It is the end of the world. Take shelter!"),
Document(page_content="baz"),
]

Expand Down Expand Up @@ -662,15 +662,14 @@ def test_neo4jvector_special_character() -> None:
pre_delete_collection=True,
search_type=SearchType.HYBRID,
)
docsearch.similarity_search(
output = docsearch.similarity_search(
"It is the end of the world. Take shelter!",
k=1,
)
# assert output == [
# Document(
# page_content="It is the end of the world. Take shelter!", metadata={}
# )
# ]

assert output == [
Document(page_content="It is the end of the world. Take shelter!", metadata={})
]

drop_vector_indexes(docsearch)

Expand Down

0 comments on commit 0ab565b

Please sign in to comment.