diff --git a/src/extensions/vectorstores/EmbeddingUtils.py b/src/extensions/vectorstores/EmbeddingUtils.py index df45086..1875a2d 100644 --- a/src/extensions/vectorstores/EmbeddingUtils.py +++ b/src/extensions/vectorstores/EmbeddingUtils.py @@ -101,6 +101,23 @@ default_value=OPENAI, required=True, ) +OPENAI_MODEL = PropertyDescriptor( + name="OpenAI Model", + description="The name of the OpenAI model to use", + default_value="text-embedding-ada-002", + required=True, + validators=[StandardValidators.NON_EMPTY_VALIDATOR], + dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], +) +HUGGING_FACE_MODEL = PropertyDescriptor( + name="HuggingFace Model", + description="The name of the HuggingFace model to use", + default_value="sentence-transformers/all-MiniLM-L6-v2", + required=True, + validators=[StandardValidators.NON_EMPTY_VALIDATOR], + dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], +) + PROPERTIES = [ EMBEDDING_FUNCTION, HUGGING_FACE_MODEL_NAME, @@ -160,6 +177,8 @@ def create_embedding_service(context): if embedding_service == OPENAI: openai_api_key = context.getProperty(OPENAI_API_KEY).getValue() - return OpenAIEmbeddings(openai_api_key=openai_api_key) + openai_model = context.getProperty(OPENAI_MODEL).getValue() + return OpenAIEmbeddings(openai_api_key=openai_api_key, model=openai_model) huggingface_api_key = context.getProperty(HUGGING_FACE_API_KEY).getValue() - return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key) + huggingface_model = context.getProperty(HUGGING_FACE_MODEL).getValue() + return HuggingFaceInferenceAPIEmbeddings(api_key=huggingface_api_key, model_name=huggingface_model) diff --git a/src/extensions/vectorstores/OpenSearchVectorUtils.py b/src/extensions/vectorstores/OpenSearchVectorUtils.py index 4f527e0..0acea68 100644 --- a/src/extensions/vectorstores/OpenSearchVectorUtils.py +++ b/src/extensions/vectorstores/OpenSearchVectorUtils.py @@ -17,14 +17,6 @@ validators=[StandardValidators.NON_EMPTY_VALIDATOR], dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], ) -HUGGING_FACE_MODEL = PropertyDescriptor( - name="HuggingFace Model", - description="The name of the HuggingFace model to use", - default_value="sentence-transformers/all-MiniLM-L6-v2", - required=True, - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], -) OPENAI_API_KEY = PropertyDescriptor( name="OpenAI API Key", description="The API Key for OpenAI in order to create embeddings", @@ -33,14 +25,6 @@ validators=[StandardValidators.NON_EMPTY_VALIDATOR], dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], ) -OPENAI_API_MODEL = PropertyDescriptor( - name="OpenAI Model", - description="The API Key for OpenAI in order to create embeddings", - default_value="text-embedding-ada-002", - required=True, - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], -) HTTP_HOST = PropertyDescriptor( name="HTTP Host", description="URL where OpenSearch is hosted.", diff --git a/src/extensions/vectorstores/PutOpenSearchVector.py b/src/extensions/vectorstores/PutOpenSearchVector.py index 92821db..329795e 100644 --- a/src/extensions/vectorstores/PutOpenSearchVector.py +++ b/src/extensions/vectorstores/PutOpenSearchVector.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service +from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_MODEL, create_embedding_service from langchain.vectorstores import OpenSearchVectorSearch from nifiapi.documentation import use_case from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult @@ -9,13 +9,11 @@ COSINESIMIL, HTTP_HOST, HUGGING_FACE_API_KEY, - HUGGING_FACE_MODEL, INDEX_NAME, L1, L2, LINF, OPENAI_API_KEY, - OPENAI_API_MODEL, PASSWORD, TEXT_FIELD, USERNAME, @@ -184,7 +182,7 @@ class ProcessorDetails: properties = [ EMBEDDING_MODEL, OPENAI_API_KEY, - OPENAI_API_MODEL, + OPENAI_MODEL, HUGGING_FACE_API_KEY, HUGGING_FACE_MODEL, HTTP_HOST, diff --git a/src/extensions/vectorstores/PutPinecone.py b/src/extensions/vectorstores/PutPinecone.py index 8625082..bc5baa7 100644 --- a/src/extensions/vectorstores/PutPinecone.py +++ b/src/extensions/vectorstores/PutPinecone.py @@ -3,7 +3,14 @@ import json import langchain.vectorstores -from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service +from EmbeddingUtils import ( + EMBEDDING_MODEL, + HUGGING_FACE, + HUGGING_FACE_MODEL, + OPENAI, + OPENAI_MODEL, + create_embedding_service, +) from nifiapi.documentation import use_case from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators @@ -77,14 +84,6 @@ class ProcessorDetails: sensitive=True, dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], ) - HUGGING_FACE_MODEL = PropertyDescriptor( - name="HuggingFace Model", - description="The name of the HuggingFace model to use", - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - required=True, - default_value="sentence-transformers/all-MiniLM-L6-v2", - dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], - ) OPENAI_API_KEY = PropertyDescriptor( name="OpenAI API Key", description="The API Key for OpenAI in order to create embeddings", @@ -93,14 +92,6 @@ class ProcessorDetails: validators=[StandardValidators.NON_EMPTY_VALIDATOR], dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], ) - OPENAI_API_MODEL = PropertyDescriptor( - name="OpenAI Model", - description="The API Key for OpenAI in order to create embeddings", - required=True, - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - default_value="text-embedding-ada-002", - dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], - ) PINECONE_ENV = PropertyDescriptor( name="Pinecone Environment", description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.", @@ -144,7 +135,7 @@ class ProcessorDetails: PINECONE_API_KEY, EMBEDDING_MODEL, OPENAI_API_KEY, - OPENAI_API_MODEL, + OPENAI_MODEL, HUGGING_FACE_API_KEY, HUGGING_FACE_MODEL, PINECONE_ENV, diff --git a/src/extensions/vectorstores/QdrantUtils.py b/src/extensions/vectorstores/QdrantUtils.py index 7435124..24211e8 100644 --- a/src/extensions/vectorstores/QdrantUtils.py +++ b/src/extensions/vectorstores/QdrantUtils.py @@ -5,7 +5,9 @@ from EmbeddingUtils import ( EMBEDDING_MODEL, HUGGING_FACE, + HUGGING_FACE_MODEL, OPENAI, + OPENAI_MODEL, ) from nifiapi.properties import ( ExpressionLanguageScope, @@ -68,14 +70,6 @@ sensitive=True, dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], ) -HUGGING_FACE_MODEL = PropertyDescriptor( - name="HuggingFace Model", - description="The name of the HuggingFace model to use.", - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - required=True, - default_value="sentence-transformers/all-MiniLM-L6-v2", - dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], -) OPENAI_API_KEY = PropertyDescriptor( name="OpenAI API Key", description="The API Key for OpenAI in order to create embeddings.", @@ -84,21 +78,13 @@ validators=[StandardValidators.NON_EMPTY_VALIDATOR], dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], ) -OPENAI_API_MODEL = PropertyDescriptor( - name="OpenAI Model", - description="The name of the OpenAI model to use.", - required=True, - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - default_value="text-embedding-ada-002", - dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], -) EMBEDDING_MODEL_PROPERTIES = [ EMBEDDING_MODEL, HUGGING_FACE_API_KEY, HUGGING_FACE_MODEL, OPENAI_API_KEY, - OPENAI_API_MODEL, + OPENAI_MODEL, ] diff --git a/src/extensions/vectorstores/QueryOpenSearchVector.py b/src/extensions/vectorstores/QueryOpenSearchVector.py index 365f824..54f3f41 100644 --- a/src/extensions/vectorstores/QueryOpenSearchVector.py +++ b/src/extensions/vectorstores/QueryOpenSearchVector.py @@ -2,7 +2,7 @@ import json -from EmbeddingUtils import EMBEDDING_MODEL, create_embedding_service +from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE_MODEL, OPENAI_MODEL, create_embedding_service from langchain.vectorstores import OpenSearchVectorSearch from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators @@ -10,13 +10,11 @@ COSINESIMIL, HTTP_HOST, HUGGING_FACE_API_KEY, - HUGGING_FACE_MODEL, INDEX_NAME, L1, L2, LINF, OPENAI_API_KEY, - OPENAI_API_MODEL, PASSWORD, TEXT_FIELD, USERNAME, @@ -134,7 +132,7 @@ class ProcessorDetails: properties = [ EMBEDDING_MODEL, OPENAI_API_KEY, - OPENAI_API_MODEL, + OPENAI_MODEL, HUGGING_FACE_API_KEY, HUGGING_FACE_MODEL, HTTP_HOST, diff --git a/src/extensions/vectorstores/QueryPinecone.py b/src/extensions/vectorstores/QueryPinecone.py index aeb416a..6b42e66 100644 --- a/src/extensions/vectorstores/QueryPinecone.py +++ b/src/extensions/vectorstores/QueryPinecone.py @@ -4,7 +4,14 @@ import langchain.vectorstores import QueryUtils -from EmbeddingUtils import EMBEDDING_MODEL, HUGGING_FACE, OPENAI, create_embedding_service +from EmbeddingUtils import ( + EMBEDDING_MODEL, + HUGGING_FACE, + HUGGING_FACE_MODEL, + OPENAI, + OPENAI_MODEL, + create_embedding_service, +) from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult from nifiapi.properties import ExpressionLanguageScope, PropertyDependency, PropertyDescriptor, StandardValidators from pinecone import Pinecone @@ -54,22 +61,6 @@ class ProcessorDetails: sensitive=True, dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], ) - OPENAI_MODEL = PropertyDescriptor( - name="OpenAI Model", - description="The API Key for OpenAI in order to create embeddings", - required=True, - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - default_value="text-embedding-ada-002", - dependencies=[PropertyDependency(EMBEDDING_MODEL, OPENAI)], - ) - HUGGING_FACE_MODEL = PropertyDescriptor( - name="HuggingFace Model", - description="The name of the HuggingFace model to use", - validators=[StandardValidators.NON_EMPTY_VALIDATOR], - required=True, - default_value="sentence-transformers/all-MiniLM-L6-v2", - dependencies=[PropertyDependency(EMBEDDING_MODEL, HUGGING_FACE)], - ) PINECONE_ENV = PropertyDescriptor( name="Pinecone Environment", description="The name of the Pinecone Environment. This can be found in the Pinecone console next to the API Key.",