Skip to content

Commit

Permalink
NIFI-13764 Embedding model name property is ignored in multiple vecto…
Browse files Browse the repository at this point in the history
…r db processor (#9)

Signed-off-by: Mark Bathori <[email protected]>
  • Loading branch information
mark-bathori authored Sep 20, 2024
1 parent afe8b3c commit ef0d85a
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 78 deletions.
23 changes: 21 additions & 2 deletions src/extensions/vectorstores/EmbeddingUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,23 @@
default_value=OPENAI,
required=True,
)
OPENAI_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The name of the OpenAI model to use",
default_value="text-embedding-ada-002",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)

PROPERTIES = [
EMBEDDING_FUNCTION,
HUGGING_FACE_MODEL_NAME,
Expand Down Expand Up @@ -160,6 +177,8 @@ def create_embedding_service(context):

if embedding_service == OPENAI:
openai_api_key = context.getProperty(OPENAI_API_KEY).getValue()
return OpenAIEmbeddings(openai_api_key=openai_api_key)
openai_model = context.getProperty(OPENAI_MODEL).getValue()
return OpenAIEmbeddings(openai_api_key=openai_api_key, model=openai_model)
huggingface_api_key = context.getProperty(HUGGING_FACE_API_KEY).getValue()
return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key)
huggingface_model = context.getProperty(HUGGING_FACE_MODEL).getValue()
return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key, model_name=huggingface_model)
16 changes: 0 additions & 16 deletions src/extensions/vectorstores/OpenSearchVectorUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,6 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
default_value="sentence-transformers/all-MiniLM-L6-v2",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings",
Expand All @@ -33,14 +25,6 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
default_value="text-embedding-ada-002",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HTTP_HOST = PropertyDescriptor(
name="HTTP Host",
description="URL where OpenSearch is hosted.",
Expand Down
6 changes: 2 additions & 4 deletions src/extensions/vectorstores/PutOpenSearchVector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0

from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_MODEL, create_embedding_service
from langchain.vectorstores import OpenSearchVectorSearch
from nifiapi.documentation import use_case
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
Expand All @@ -9,13 +9,11 @@
COSINESIMIL,
HTTP_HOST,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
INDEX_NAME,
L1,
L2,
LINF,
OPENAI_API_KEY,
OPENAI_API_MODEL,
PASSWORD,
TEXT_FIELD,
USERNAME,
Expand Down Expand Up @@ -184,7 +182,7 @@ class ProcessorDetails:
properties = [
EMBEDDING_MODEL,
OPENAI_API_KEY,
OPENAI_API_MODEL,
OPENAI_MODEL,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
HTTP_HOST,
Expand Down
27 changes: 9 additions & 18 deletions src/extensions/vectorstores/PutPinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
import json

import langchain.vectorstores
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_MODEL,
create_embedding_service,
)
from nifiapi.documentation import use_case
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
Expand Down Expand Up @@ -77,14 +84,6 @@ class ProcessorDetails:
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings",
Expand All @@ -93,14 +92,6 @@ class ProcessorDetails:
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
PINECONE_ENV = PropertyDescriptor(
name="Pinecone Environment",
description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.",
Expand Down Expand Up @@ -144,7 +135,7 @@ class ProcessorDetails:
PINECONE_API_KEY,
EMBEDDING_MODEL,
OPENAI_API_KEY,
OPENAI_API_MODEL,
OPENAI_MODEL,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
PINECONE_ENV,
Expand Down
20 changes: 3 additions & 17 deletions src/extensions/vectorstores/QdrantUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_MODEL,
)
from nifiapi.properties import (
ExpressionLanguageScope,
Expand Down Expand Up @@ -68,14 +70,6 @@
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use.",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_API_KEY = PropertyDescriptor(
name="OpenAI API Key",
description="The API Key for OpenAI in order to create embeddings.",
Expand All @@ -84,21 +78,13 @@
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
OPENAI_API_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The name of the OpenAI model to use.",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)

EMBEDDING_MODEL_PROPERTIES = [
EMBEDDING_MODEL,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
OPENAI_API_KEY,
OPENAI_API_MODEL,
OPENAI_MODEL,
]


Expand Down
6 changes: 2 additions & 4 deletions src/extensions/vectorstores/QueryOpenSearchVector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@

import json

from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_MODEL, create_embedding_service
from langchain.vectorstores import OpenSearchVectorSearch
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
from OpenSearchVectorUtils import (
COSINESIMIL,
HTTP_HOST,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
INDEX_NAME,
L1,
L2,
LINF,
OPENAI_API_KEY,
OPENAI_API_MODEL,
PASSWORD,
TEXT_FIELD,
USERNAME,
Expand Down Expand Up @@ -134,7 +132,7 @@ class ProcessorDetails:
properties = [
EMBEDDING_MODEL,
OPENAI_API_KEY,
OPENAI_API_MODEL,
OPENAI_MODEL,
HUGGING_FACE_API_KEY,
HUGGING_FACE_MODEL,
HTTP_HOST,
Expand Down
25 changes: 8 additions & 17 deletions src/extensions/vectorstores/QueryPinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@

import langchain.vectorstores
import QueryUtils
from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service
from EmbeddingUtils import (
EMBEDDING_MODEL,
HUGGING_FACE,
HUGGING_FACE_MODEL,
OPENAI,
OPENAI_MODEL,
create_embedding_service,
)
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators
from pinecone import Pinecone
Expand Down Expand Up @@ -54,22 +61,6 @@ class ProcessorDetails:
sensitive=True,
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
OPENAI_MODEL = PropertyDescriptor(
name="OpenAI Model",
description="The API Key for OpenAI in order to create embeddings",
required=True,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
default_value="text-embedding-ada-002",
dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)],
)
HUGGING_FACE_MODEL = PropertyDescriptor(
name="HuggingFace Model",
description="The name of the HuggingFace model to use",
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
required=True,
default_value="sentence-transformers/all-MiniLM-L6-v2",
dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)],
)
PINECONE_ENV = PropertyDescriptor(
name="Pinecone Environment",
description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.",
Expand Down

0 comments on commit ef0d85a

Please sign in to comment.