Skip to content

Commit

Permalink
Merge branch 'develop' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
clemlesne committed Jul 7, 2023
2 parents f69f6f9 + 2589998 commit f633b84
Show file tree
Hide file tree
Showing 14 changed files with 399 additions and 251 deletions.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ Includes:
- Deployable on any Kubernetes cluster, with its Helm chart
- Manage users effortlessly with OpenID Connect
- More than 150 tones and personalities (accountant, advisor, debater, excel sheet, instructor, logistician, etc.) to better help employees in their specific daily tasks
- Plug and play with any storage system, including [Azure Cosmos DB](https://learn.microsoft.com/en-us/azure/cosmos-db/), [Redis](https://github.com/redis/redis) and [Qdrant](https://github.com/qdrant/qdrant).
- Plug and play storage system, including [Azure Cosmos DB](https://learn.microsoft.com/en-us/azure/cosmos-db/), [Redis](https://github.com/redis/redis) and [Qdrant](https://github.com/qdrant/qdrant).
- Possibility to send temporary messages, for confidentiality
- Salable system based on stateless APIs, cache, progressive web app and events
- Search engine for conversations, based on semantic similarity and AI embeddings
- Unlimited conversation history
- Unlimited conversation history and number of users
- Usage tracking, for better understanding of your employees' usage

![Application screenshot](docs/main.png)

Expand All @@ -35,6 +36,9 @@ store = "cosmos"
# Enum: "redis"
stream = "redis"

[api]
root_path = ""

[openai]
ada_deploy_id = "ada"
ada_max_tokens = 2049
Expand All @@ -49,7 +53,7 @@ max_length = 1000

[logging]
app_level = "DEBUG"
sys_level = "INFO"
sys_level = "WARN"

[oidc]
algorithms = ["RS256"]
Expand All @@ -65,7 +69,7 @@ db = 0
host = "localhost"

[cosmos]
# Containers "conversation" (/user_id), "message" (/conversation_id) and "user" (/dummy) must exist
# Containers "conversation" (/user_id), "message" (/conversation_id), "user" (/dummy), "usage" (/user_id) must exist
url = "https://private-gpt.documents.azure.com:443"
database = "private-gpt"
```
Expand Down
9 changes: 9 additions & 0 deletions cicd/helm/private-gpt/templates/conversation-api-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ metadata:
app.kubernetes.io/component: conversation-api
data:
config.toml: |
[persistence]
search = "qdrant"
store = "cosmos"
stream = "redis"
[api]
root_path = "/{{ include "private-gpt.fullname" . }}-conversation-api"
Expand Down Expand Up @@ -37,3 +42,7 @@ data:
[redis]
db = {{ .Values.redis.db | int }}
host = "{{ include "common.names.fullname" .Subcharts.redis }}-master"
[cosmos]
url = {{ .Values.cosmos.url | quote }}
database = {{ .Values.cosmos.database | quote }}
5 changes: 5 additions & 0 deletions cicd/helm/private-gpt/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ api:
base: null
gpt_deploy_id: gpt-35-turbo

cosmos:
# https://[db].documents.azure.com
url: null
database: null

redis:
auth:
enabled: false
Expand Down
79 changes: 79 additions & 0 deletions src/conversation-api/ai/contentsafety.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Import utils
from utils import (build_logger, get_config)

# Import misc
from azure.core.credentials import AzureKeyCredential
from fastapi import HTTPException, status
from tenacity import retry, stop_after_attempt, wait_random_exponential
import azure.ai.contentsafety as azure_cs
import azure.core.exceptions as azure_exceptions


###
# Init misc
###

logger = build_logger(__name__)

###
# Init Azure Content Safety
###

# Score are following: 0 - Safe, 2 - Low, 4 - Medium, 6 - High
# See: https://review.learn.microsoft.com/en-us/azure/cognitive-services/content-safety/concepts/harm-categories?branch=release-build-content-safety#severity-levels
ACS_SEVERITY_THRESHOLD = 2
ACS_API_BASE = get_config("acs", "api_base", str, required=True)
ACS_API_TOKEN = get_config("acs", "api_token", str, required=True)
ACS_MAX_LENGTH = get_config("acs", "max_length", int, required=True)
logger.info(f"Connected Azure Content Safety to {ACS_API_BASE}")
acs_client = azure_cs.ContentSafetyClient(
ACS_API_BASE, AzureKeyCredential(ACS_API_TOKEN)
)


class ContentSafety:
@retry(
reraise=True,
stop=stop_after_attempt(3),
wait=wait_random_exponential(multiplier=0.5, max=30),
)
async def is_moderated(self, prompt: str) -> bool:
logger.debug(f"Checking moderation for text: {prompt}")

if len(prompt) > ACS_MAX_LENGTH:
logger.info(f"Message ({len(prompt)}) too long for moderation")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Message too long",
)

req = azure_cs.models.AnalyzeTextOptions(
text=prompt,
categories=[
azure_cs.models.TextCategory.HATE,
azure_cs.models.TextCategory.SELF_HARM,
azure_cs.models.TextCategory.SEXUAL,
azure_cs.models.TextCategory.VIOLENCE,
],
)

try:
res = acs_client.analyze_text(req)
except azure_exceptions.ClientAuthenticationError as e:
logger.exception(e)
return False

is_moderated = any(
cat.severity >= ACS_SEVERITY_THRESHOLD
for cat in [
res.hate_result,
res.self_harm_result,
res.sexual_result,
res.violence_result,
]
)
if is_moderated:
logger.info(f"Message is moderated: {prompt}")
logger.debug(f"Moderation result: {res}")

return is_moderated
134 changes: 134 additions & 0 deletions src/conversation-api/ai/openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Import utils
from uuid import UUID
from utils import (build_logger, get_config, hash_token)

# Import misc
from azure.identity import DefaultAzureCredential
from models.user import UserModel
from tenacity import retry, stop_after_attempt, wait_random_exponential
from typing import Any, Dict, List, AsyncGenerator, Union
import asyncio
import openai


###
# Init misc
###

logger = build_logger(__name__)
loop = asyncio.get_running_loop()


###
# Init OpenIA
###

async def refresh_oai_token_background():
"""
Refresh OpenAI token every 15 minutes.
The OpenAI SDK does not support token refresh, so we need to do it manually. We passe manually the token to the SDK. Azure AD tokens are valid for 30 mins, but we refresh every 15 minutes to be safe.
See: https://github.com/openai/openai-python/pull/350#issuecomment-1489813285
"""
while True:
logger.info("Refreshing OpenAI token")
oai_cred = DefaultAzureCredential()
oai_token = oai_cred.get_token("https://cognitiveservices.azure.com/.default")
openai.api_key = oai_token.token
# Execute every 20 minutes
await asyncio.sleep(15 * 60)


openai.api_base = get_config("openai", "api_base", str, required=True)
openai.api_type = "azure_ad"
openai.api_version = "2023-05-15"
logger.info(f"Using Aure private service ({openai.api_base})")
loop.create_task(refresh_oai_token_background())

OAI_GPT_DEPLOY_ID = get_config("openai", "gpt_deploy_id", str, required=True)
OAI_GPT_MAX_TOKENS = get_config("openai", "gpt_max_tokens", int, required=True)
OAI_GPT_MODEL = get_config(
"openai", "gpt_model", str, default="gpt-3.5-turbo", required=True
)
logger.info(
f'Using OpenAI ADA model "{OAI_GPT_MODEL}" ({OAI_GPT_DEPLOY_ID}) with {OAI_GPT_MAX_TOKENS} tokens max'
)

OAI_ADA_DEPLOY_ID = get_config("openai", "ada_deploy_id", str, required=True)
OAI_ADA_MAX_TOKENS = get_config("openai", "ada_max_tokens", int, required=True)
OAI_ADA_MODEL = get_config(
"openai", "ada_model", str, default="text-embedding-ada-002", required=True
)
logger.info(
f'Using OpenAI ADA model "{OAI_ADA_MODEL}" ({OAI_ADA_DEPLOY_ID}) with {OAI_ADA_MAX_TOKENS} tokens max'
)


class OpenAI:
@retry(
reraise=True,
stop=stop_after_attempt(3),
wait=wait_random_exponential(multiplier=0.5, max=30),
)
async def vector_from_text(self, prompt: str, user_id: UUID) -> List[float]:
logger.debug(f"Getting vector for text: {prompt}")
try:
res = openai.Embedding.create(
deployment_id=OAI_ADA_DEPLOY_ID,
input=prompt,
model=OAI_ADA_MODEL,
user=user_id.hex,
)
except openai.error.AuthenticationError as e:
logger.exception(e)
return []

return res.data[0].embedding

@retry(
reraise=True,
stop=stop_after_attempt(3),
wait=wait_random_exponential(multiplier=0.5, max=30),
)
async def completion(self, messages: List[Dict[str, str]], current_user: UserModel) -> Union[str, None]:
try:
# Use chat completion to get a more natural response and lower the usage cost
completion = openai.ChatCompletion.create(
deployment_id=OAI_GPT_DEPLOY_ID,
messages=messages,
model=OAI_GPT_MODEL,
presence_penalty=1, # Increase the model's likelihood to talk about new topics
user=hash_token(current_user.id.bytes).hex,
)
content = completion["choices"][0].message.content
except openai.error.AuthenticationError as e:
logger.exception(e)
return

return content

@retry(
reraise=True,
stop=stop_after_attempt(3),
wait=wait_random_exponential(multiplier=0.5, max=30),
)
async def completion_stream(self, messages: List[Dict[str, str]], current_user: UserModel) -> AsyncGenerator[Any, None]:
try:
# Use chat completion to get a more natural response and lower the usage cost
chunks = openai.ChatCompletion.create(
deployment_id=OAI_GPT_DEPLOY_ID,
messages=messages,
model=OAI_GPT_MODEL,
presence_penalty=1, # Increase the model's likelihood to talk about new topics
stream=True,
user=hash_token(current_user.id.bytes).hex,
)
except openai.error.AuthenticationError as e:
logger.exception(e)
return

for chunk in chunks:
content = chunk["choices"][0].get("delta", {}).get("content")
if content is not None:
yield content
Loading

0 comments on commit f633b84

Please sign in to comment.