Skip to content

Commit

Permalink
Add support for MongoDB (#1043)
Browse files Browse the repository at this point in the history
  • Loading branch information
abhahn authored Aug 27, 2024
1 parent 52ad823 commit 7dae506
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 2 deletions.
16 changes: 16 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,19 @@ PROMPTFLOW_RESPONSE_TIMEOUT=120
PROMPTFLOW_REQUEST_FIELD_NAME=query
PROMPTFLOW_RESPONSE_FIELD_NAME=reply
PROMPTFLOW_CITATIONS_FIELD_NAME=documents
# Chat with data: MongoDB database
MONGODB_ENDPOINT=
MONGODB_USERNAME=
MONGODB_PASSWORD=
MONGODB_DATABASE_NAME=
MONGODB_COLLECTION_NAME=
MONGODB_APP_NAME=
MONGODB_INDEX_NAME=
MONGODB_TOP_K=
MONGODB_STRICTNESS=
MONGODB_ENABLE_IN_DOMAIN=
MONGODB_CONTENT_COLUMNS=
MONGODB_FILENAME_COLUMN=
MONGODB_TITLE_COLUMN=
MONGODB_URL_COLUMN=
MONGODB_VECTOR_COLUMNS=
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This repo contains sample code for a simple chat webapp that integrates with Azu
- Elasticsearch index (preview)
- Pinecone index (private preview)
- Azure SQL Server (private preview)
- Mongo DB (preview)

## Configure the app

Expand Down Expand Up @@ -283,6 +284,34 @@ Note: RBAC assignments can take a few minutes before becoming effective.
- `AZURE_OPENAI_EMBEDDING_NAME`: the name of your Ada (text-embedding-ada-002) model deployment on your Azure OpenAI resource.
- `PINECONE_VECTOR_COLUMNS`: the vector columns in your index to use when searching. Join them with `|` like `contentVector|titleVector`.

#### Chat with your data using Mongo DB (Private Preview)

1. Update the `AZURE_OPENAI_*` environment variables as described in the [basic chat experience](#basic-chat-experience) above.

2. To connect to your data, you need to specify an Mongo DB database configuration. Learn more about [MongoDB](https://www.mongodb.com/).

3. Configure data source settings as described in the table below.

| App Setting | Required? | Default Value | Note |
| --- | --- | --- | ------------- |
|DATASOURCE_TYPE|Yes||Must be set to `MongoDB`|
|MONGODB_CONNECTION_STRING|Yes||The connection string used to connect to your Mongo DB instance|
|MONGODB_VECTOR_INDEX|Yes||The name of your Mongo DB vector index|
|MONGODB_DATABASE_NAME|Yes||The name of your Mongo DB database|
|MONGODB_CONTAINER_NAME|Yes||The name of your Mongo DB container|
|MONGODB_TOP_K|No|5|The number of documents to retrieve when querying your search index.|
|MONGODB_ENABLE_IN_DOMAIN|No|True|Limits responses to only queries relating to your data.|
|MONGODB_STRICTNESS|No|3|Integer from 1 to 5 specifying the strictness for the model limiting responses to your data.|
|MONGODB_CONTENT_COLUMNS|No||List of fields in your search index that contains the text content of your documents to use when formulating a bot response. Represent these as a string joined with "|", e.g. `"product_description|product_manual"`|
|MONGODB_FILENAME_COLUMN|No|| Field from your search index that gives a unique identifier of the source of your data to display in the UI.|
|MONGODB_TITLE_COLUMN|No||Field from your search index that gives a relevant title or header for your data content to display in the UI.|
|MONGODB_URL_COLUMN|No||Field from your search index that contains a URL for the document, e.g. an Azure Blob Storage URI. This value is not currently used.|
|MONGODB_VECTOR_COLUMNS|No||List of fields in your search index that contain vector embeddings of your documents to use when formulating a bot response. Represent these as a string joined with "|", e.g. `"product_description|product_manual"`|

MongoDB uses vector search by default, so ensure these settings are configured on your app:
- `AZURE_OPENAI_EMBEDDING_NAME`: the name of your Ada (text-embedding-ada-002) model deployment on your Azure OpenAI resource.
- `MONGODB_VECTOR_COLUMNS`: the vector columns in your index to use when searching. Join them with `|` like `contentVector|titleVector`.

#### Chat with your data using Azure SQL Server (Private Preview)

1. Update the `AZURE_OPENAI_*` environment variables as described in the [basic chat experience](#basic-chat-experience) above.
Expand Down
87 changes: 85 additions & 2 deletions backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,8 @@ class _AzureSqlServerSettings(BaseSettings, DatasourcePayloadConstructor):
model_config = SettingsConfigDict(
env_prefix="AZURE_SQL_SERVER_",
env_file=DOTENV_PATH,
extra="ignore"
extra="ignore",
env_ignore_empty=True
)
_type: Literal["azure_sql_server"] = PrivateAttr(default="azure_sql_server")

Expand Down Expand Up @@ -658,7 +659,84 @@ def construct_payload_configuration(
"parameters": parameters
}


class _MongoDbSettings(BaseSettings, DatasourcePayloadConstructor):
model_config = SettingsConfigDict(
env_prefix="MONGODB_",
env_file=DOTENV_PATH,
extra="ignore",
env_ignore_empty=True
)
_type: Literal["mongo_db"] = PrivateAttr(default="mongo_db")

endpoint: str
username: str = Field(exclude=True)
password: str = Field(exclude=True)
database_name: str
collection_name: str
app_name: str
index_name: str
query_type: Literal["vector"] = "vector"
top_k: int = Field(default=5, serialization_alias="top_n_documents")
strictness: int = 3
enable_in_domain: bool = Field(default=True, serialization_alias="in_scope")
content_columns: Optional[List[str]] = Field(default=None, exclude=True)
vector_columns: Optional[List[str]] = Field(default=None, exclude=True)
title_column: Optional[str] = Field(default=None, exclude=True)
url_column: Optional[str] = Field(default=None, exclude=True)
filename_column: Optional[str] = Field(default=None, exclude=True)


# Constructed fields
authentication: Optional[dict] = None
embedding_dependency: Optional[dict] = None
fields_mapping: Optional[dict] = None

@field_validator('content_columns', 'vector_columns', mode="before")
@classmethod
def split_columns(cls, comma_separated_string: str) -> List[str]:
if isinstance(comma_separated_string, str) and len(comma_separated_string) > 0:
return parse_multi_columns(comma_separated_string)

return None

@model_validator(mode="after")
def set_fields_mapping(self) -> Self:
self.fields_mapping = {
"content_fields": self.content_columns,
"title_field": self.title_column,
"url_field": self.url_column,
"filepath_field": self.filename_column,
"vector_fields": self.vector_columns
}
return self

@model_validator(mode="after")
def construct_authentication(self) -> Self:
self.authentication = {
"type": "username_and_password",
"username": self.username,
"password": self.password
}
return self

def construct_payload_configuration(
self,
*args,
**kwargs
):
self.embedding_dependency = \
self._settings.azure_openai.extract_embedding_dependency()

parameters = self.model_dump(exclude_none=True, by_alias=True)
parameters.update(self._settings.search.model_dump(exclude_none=True, by_alias=True))

return {
"type": self._type,
"parameters": parameters
}


class _BaseSettings(BaseSettings):
model_config = SettingsConfigDict(
env_file=DOTENV_PATH,
Expand Down Expand Up @@ -729,15 +807,20 @@ def set_datasource_settings(self) -> Self:
elif self.base_settings.datasource_type == "AzureSqlServer":
self.datasource = _AzureSqlServerSettings(settings=self, _env_file=DOTENV_PATH)
logging.debug("Using SQL Server")

elif self.base_settings.datasource_type == "MongoDB":
self.datasource = _MongoDbSettings(settings=self, _env_file=DOTENV_PATH)
logging.debug("Using Mongo DB")

else:
self.datasource = None
logging.warning("No datasource configuration found in the environment -- calls will be made to Azure OpenAI without grounding data.")

return self

except ValidationError:
except ValidationError as e:
logging.warning("No datasource configuration found in the environment -- calls will be made to Azure OpenAI without grounding data.")
logging.warning(e.errors())


app_settings = _AppSettings()

0 comments on commit 7dae506

Please sign in to comment.