diff --git a/assets/schema/dbgpt.sql b/assets/schema/dbgpt.sql index 0b3609b03..462c09644 100644 --- a/assets/schema/dbgpt.sql +++ b/assets/schema/dbgpt.sql @@ -32,7 +32,7 @@ CREATE TABLE IF NOT EXISTS `knowledge_document` `id` int NOT NULL AUTO_INCREMENT COMMENT 'auto increment id', `doc_name` varchar(100) NOT NULL COMMENT 'document path name', `doc_type` varchar(50) NOT NULL COMMENT 'doc type', - `doc_token` varchar(100) NOT NULL COMMENT 'doc token', + `doc_token` varchar(100) NULL COMMENT 'doc token', `space` varchar(50) NOT NULL COMMENT 'knowledge space', `chunk_size` int NOT NULL COMMENT 'chunk size', `last_sync` TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT 'last sync time', @@ -56,7 +56,7 @@ CREATE TABLE IF NOT EXISTS `document_chunk` `document_id` int NOT NULL COMMENT 'document parent id', `content` longtext NOT NULL COMMENT 'chunk content', `questions` text NULL COMMENT 'chunk related questions', - `meta_info` varchar(200) NOT NULL COMMENT 'metadata info', + `meta_info` text NOT NULL COMMENT 'metadata info', `gmt_created` timestamp NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'created time', `gmt_modified` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'update time', PRIMARY KEY (`id`), diff --git a/dbgpt/app/initialization/serve_initialization.py b/dbgpt/app/initialization/serve_initialization.py index 146412ecb..95ae873b0 100644 --- a/dbgpt/app/initialization/serve_initialization.py +++ b/dbgpt/app/initialization/serve_initialization.py @@ -122,3 +122,11 @@ def register_serve_apps(system_app: SystemApp, cfg: Config, webserver_port: int) system_app.register(FileServe) # ################################ File Serve Register End ######################################## + + # ################################ Evaluate Serve Register Begin ####################################### + from dbgpt.serve.evaluate.serve import Serve as EvaluateServe + + # Register serve Evaluate + system_app.register(EvaluateServe) + + # ################################ Evaluate Serve Register End ######################################## diff --git a/dbgpt/app/knowledge/api.py b/dbgpt/app/knowledge/api.py index 6b1a37d38..cb5c8b370 100644 --- a/dbgpt/app/knowledge/api.py +++ b/dbgpt/app/knowledge/api.py @@ -447,7 +447,7 @@ def chunk_list( "doc_type": query_request.doc_type, "content": query_request.content, } - chunk_res = service.get_chunk_list( + chunk_res = service.get_chunk_list_page( query, query_request.page, query_request.page_size ) res = ChunkQueryResponse( diff --git a/dbgpt/client/evaluation.py b/dbgpt/client/evaluation.py new file mode 100644 index 000000000..5d6f80813 --- /dev/null +++ b/dbgpt/client/evaluation.py @@ -0,0 +1,28 @@ +"""Evaluation.""" +from typing import List + +from dbgpt.core.schema.api import Result + +from ..core.interface.evaluation import EvaluationResult +from ..serve.evaluate.api.schemas import EvaluateServeRequest +from .client import Client, ClientException + + +async def run_evaluation( + client: Client, request: EvaluateServeRequest +) -> List[EvaluationResult]: + """Run evaluation. + + Args: + client (Client): The dbgpt client. + request (EvaluateServeRequest): The Evaluate Request. + """ + try: + res = await client.post("/evaluate/evaluation", request.dict()) + result: Result = res.json() + if result["success"]: + return list(result["data"]) + else: + raise ClientException(status=result["err_code"], reason=result) + except Exception as e: + raise ClientException(f"Failed to run evaluation: {e}") diff --git a/dbgpt/core/interface/evaluation.py b/dbgpt/core/interface/evaluation.py index d358bb84d..55776f08b 100644 --- a/dbgpt/core/interface/evaluation.py +++ b/dbgpt/core/interface/evaluation.py @@ -287,7 +287,7 @@ def __init__(self): def register_metric(self, cls: Type[EvaluationMetric]): """Register metric.""" - self.metrics[cls.name] = cls + self.metrics[cls.name()] = cls def get_by_name(self, name: str) -> Type[EvaluationMetric]: """Get by name.""" @@ -308,4 +308,4 @@ def all_metric_infos(self): return result -metric_mange = MetricManage() +metric_manage = MetricManage() diff --git a/dbgpt/rag/evaluation/answer.py b/dbgpt/rag/evaluation/answer.py index 119437064..852824aba 100644 --- a/dbgpt/rag/evaluation/answer.py +++ b/dbgpt/rag/evaluation/answer.py @@ -287,7 +287,7 @@ async def _do_evaluation( contexts=contexts, passing=result.passing, raw_dataset=raw_dataset, - metric_name=metric.name, + metric_name=metric.name(), feedback=result.feedback, ) ) diff --git a/dbgpt/rag/index/base.py b/dbgpt/rag/index/base.py index bc47fd161..c1fdccf17 100644 --- a/dbgpt/rag/index/base.py +++ b/dbgpt/rag/index/base.py @@ -184,6 +184,7 @@ async def aload_document_with_limit( max_threads, ) + @abstractmethod def similar_search( self, text: str, topk: int, filters: Optional[MetadataFilters] = None ) -> List[Chunk]: @@ -196,16 +197,26 @@ def similar_search( Return: List[Chunk]: The similar documents. """ - return self.similar_search_with_scores(text, topk, 0.0, filters) + + async def asimilar_search( + self, + query: str, + topk: int, + filters: Optional[MetadataFilters] = None, + ) -> List[Chunk]: + """Async similar_search in vector database.""" + return await blocking_func_to_async_no_executor( + self.similar_search, query, topk, filters + ) async def asimilar_search_with_scores( self, - doc: str, + query: str, topk: int, score_threshold: float, filters: Optional[MetadataFilters] = None, ) -> List[Chunk]: - """Aynsc similar_search_with_score in vector database.""" + """Async similar_search_with_score in vector database.""" return await blocking_func_to_async_no_executor( - self.similar_search_with_scores, doc, topk, score_threshold, filters + self.similar_search_with_scores, query, topk, score_threshold, filters ) diff --git a/dbgpt/rag/operators/evaluation.py b/dbgpt/rag/operators/evaluation.py index 71c0aff1a..6218d8270 100644 --- a/dbgpt/rag/operators/evaluation.py +++ b/dbgpt/rag/operators/evaluation.py @@ -54,7 +54,7 @@ async def _do_evaluation( contexts=contexts, passing=result.passing, raw_dataset=raw_dataset, - metric_name=metric.name, + metric_name=metric.name(), ) ) return results diff --git a/dbgpt/rag/retriever/embedding.py b/dbgpt/rag/retriever/embedding.py index 29026e60c..96aa86199 100644 --- a/dbgpt/rag/retriever/embedding.py +++ b/dbgpt/rag/retriever/embedding.py @@ -10,7 +10,6 @@ from dbgpt.rag.retriever.rewrite import QueryRewrite from dbgpt.storage.vector_store.filters import MetadataFilters from dbgpt.util.chat_util import run_async_tasks -from dbgpt.util.executor_utils import blocking_func_to_async_no_executor from dbgpt.util.tracer import root_tracer @@ -241,9 +240,7 @@ async def _similarity_search( "query": query, }, ): - return await blocking_func_to_async_no_executor( - self._index_store.similar_search, query, self._top_k, filters - ) + return await self._index_store.asimilar_search(query, self._top_k, filters) async def _run_async_tasks(self, tasks) -> List[Chunk]: """Run async tasks.""" diff --git a/dbgpt/serve/agent/agents/controller.py b/dbgpt/serve/agent/agents/controller.py index 7bd5e09f9..9e7f598cf 100644 --- a/dbgpt/serve/agent/agents/controller.py +++ b/dbgpt/serve/agent/agents/controller.py @@ -21,6 +21,7 @@ DefaultAWELLayoutManager, GptsMemory, LLMConfig, + ResourceType, ShortTermMemory, UserProxyAgent, get_agent_manager, @@ -43,6 +44,7 @@ from dbgpt.util.json_utils import serialize from dbgpt.util.tracer import TracerManager +from ...rag.retriever.knowledge_space import KnowledgeSpaceRetriever from ..db import GptsMessagesDao from ..db.gpts_app import GptsApp, GptsAppDao, GptsAppQuery from ..db.gpts_conversations_db import GptsConversationsDao, GptsConversationsEntity @@ -602,5 +604,26 @@ async def topic_terminate( last_gpts_conversation.conv_id, Status.COMPLETE.value ) + async def get_knowledge_resources(self, app_code: str, question: str): + """Get the knowledge resources.""" + context = [] + app: GptsApp = self.get_app(app_code) + if app and app.details and len(app.details) > 0: + for detail in app.details: + if detail and detail.resources and len(detail.resources) > 0: + for resource in detail.resources: + if resource.type == ResourceType.Knowledge: + retriever = KnowledgeSpaceRetriever( + space_id=str(resource.value), + top_k=CFG.KNOWLEDGE_SEARCH_TOP_SIZE, + ) + chunks = await retriever.aretrieve_with_scores( + question, score_threshold=0.3 + ) + context.extend([chunk.content for chunk in chunks]) + else: + continue + return context + multi_agents = MultiAgents(system_app) diff --git a/dbgpt/serve/agent/evaluation/evaluation.py b/dbgpt/serve/agent/evaluation/evaluation.py index e81f61a80..5562b983f 100644 --- a/dbgpt/serve/agent/evaluation/evaluation.py +++ b/dbgpt/serve/agent/evaluation/evaluation.py @@ -116,8 +116,9 @@ async def _do_evaluation( contexts=contexts, passing=result.passing, raw_dataset=raw_dataset, - metric_name=metric.name, + metric_name=metric.name(), prediction_cost=prediction_cost, + feedback=result.feedback, ) ) return results diff --git a/dbgpt/serve/agent/evaluation/evaluation_metric.py b/dbgpt/serve/agent/evaluation/evaluation_metric.py index c85d2dc85..4d29dcb11 100644 --- a/dbgpt/serve/agent/evaluation/evaluation_metric.py +++ b/dbgpt/serve/agent/evaluation/evaluation_metric.py @@ -6,7 +6,13 @@ from dbgpt.core.interface.evaluation import ( BaseEvaluationResult, EvaluationMetric, - metric_mange, + metric_manage, +) +from dbgpt.rag.evaluation.answer import AnswerRelevancyMetric +from dbgpt.rag.evaluation.retriever import ( + RetrieverHitRateMetric, + RetrieverMRRMetric, + RetrieverSimilarityMetric, ) logger = logging.getLogger(__name__) @@ -116,5 +122,7 @@ def sync_compute( ) -metric_mange.register_metric(IntentMetric) -metric_mange.register_metric(AppLinkMetric) +metric_manage.register_metric(RetrieverHitRateMetric) +metric_manage.register_metric(RetrieverMRRMetric) +metric_manage.register_metric(RetrieverSimilarityMetric) +metric_manage.register_metric(AnswerRelevancyMetric) diff --git a/dbgpt/serve/evaluate/__init__.py b/dbgpt/serve/evaluate/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dbgpt/serve/evaluate/api/__init__.py b/dbgpt/serve/evaluate/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dbgpt/serve/evaluate/api/endpoints.py b/dbgpt/serve/evaluate/api/endpoints.py new file mode 100644 index 000000000..a7b893c23 --- /dev/null +++ b/dbgpt/serve/evaluate/api/endpoints.py @@ -0,0 +1,155 @@ +import logging +from functools import cache +from typing import List, Optional + +from fastapi import APIRouter, Depends, HTTPException +from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer + +from dbgpt.component import ComponentType, SystemApp +from dbgpt.core.interface.evaluation import metric_manage +from dbgpt.model.cluster import BaseModelController, WorkerManager, WorkerManagerFactory +from dbgpt.rag.evaluation.answer import AnswerRelevancyMetric +from dbgpt.serve.core import Result +from dbgpt.serve.evaluate.api.schemas import EvaluateServeRequest, EvaluateServeResponse +from dbgpt.serve.evaluate.config import SERVE_SERVICE_COMPONENT_NAME +from dbgpt.serve.evaluate.service.service import Service + +from ...prompt.service.service import Service as PromptService + +router = APIRouter() + +# Add your API endpoints here + +global_system_app: Optional[SystemApp] = None +logger = logging.getLogger(__name__) + + +def get_service() -> Service: + """Get the service instance""" + return global_system_app.get_component(SERVE_SERVICE_COMPONENT_NAME, Service) + + +def get_prompt_service() -> PromptService: + return global_system_app.get_component("dbgpt_serve_prompt_service", PromptService) + + +def get_worker_manager() -> WorkerManager: + worker_manager = global_system_app.get_component( + ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory + ).create() + return worker_manager + + +def get_model_controller() -> BaseModelController: + controller = global_system_app.get_component( + ComponentType.MODEL_CONTROLLER, BaseModelController + ) + return controller + + +get_bearer_token = HTTPBearer(auto_error=False) + + +@cache +def _parse_api_keys(api_keys: str) -> List[str]: + """Parse the string api keys to a list + + Args: + api_keys (str): The string api keys + + Returns: + List[str]: The list of api keys + """ + if not api_keys: + return [] + return [key.strip() for key in api_keys.split(",")] + + +async def check_api_key( + auth: Optional[HTTPAuthorizationCredentials] = Depends(get_bearer_token), + service: Service = Depends(get_service), +) -> Optional[str]: + """Check the api key + + If the api key is not set, allow all. + + Your can pass the token in you request header like this: + + .. code-block:: python + + import requests + + client_api_key = "your_api_key" + headers = {"Authorization": "Bearer " + client_api_key} + res = requests.get("http://test/hello", headers=headers) + assert res.status_code == 200 + + """ + if service.config.api_keys: + api_keys = _parse_api_keys(service.config.api_keys) + if auth is None or (token := auth.credentials) not in api_keys: + raise HTTPException( + status_code=401, + detail={ + "error": { + "message": "", + "type": "invalid_request_error", + "param": None, + "code": "invalid_api_key", + } + }, + ) + return token + else: + # api_keys not set; allow all + return None + + +@router.get("/health", dependencies=[Depends(check_api_key)]) +async def health(): + """Health check endpoint""" + return {"status": "ok"} + + +@router.get("/test_auth", dependencies=[Depends(check_api_key)]) +async def test_auth(): + """Test auth endpoint""" + return {"status": "ok"} + + +@router.get("/scenes") +async def get_scenes(): + scene_list = [{"recall": "召回评测"}, {"app": "应用评测"}] + + return Result.succ(scene_list) + + +@router.post("/evaluation") +async def evaluation( + request: EvaluateServeRequest, + service: Service = Depends(get_service), +) -> Result: + """Evaluate results by the scene + + Args: + request (EvaluateServeRequest): The request + service (Service): The service + Returns: + ServerResponse: The response + """ + return Result.succ( + await service.run_evaluation( + request.scene_key, + request.scene_value, + request.datasets, + request.context, + request.evaluate_metrics, + ) + ) + + +def init_endpoints(system_app: SystemApp) -> None: + """Initialize the endpoints""" + global global_system_app + system_app.register(Service) + global_system_app = system_app diff --git a/dbgpt/serve/evaluate/api/schemas.py b/dbgpt/serve/evaluate/api/schemas.py new file mode 100644 index 000000000..76a782186 --- /dev/null +++ b/dbgpt/serve/evaluate/api/schemas.py @@ -0,0 +1,63 @@ +from enum import Enum +from typing import Any, Dict, List, Optional + +from dbgpt._private.pydantic import BaseModel, Field + +from ..config import SERVE_APP_NAME_HUMP + + +class EvaluationScene(Enum): + RECALL = "recall" + APP = "app" + + +class DatasetStorageType(Enum): + OSS = "oss" + DB = "db" + + +class EvaluateServeRequest(BaseModel): + evaluate_code: Optional[str] = Field(None, description="evaluation code") + scene_key: Optional[str] = Field(None, description="evaluation scene key") + scene_value: Optional[str] = Field(None, description="evaluation scene value") + datasets_name: Optional[str] = Field(None, description="evaluation datasets name") + datasets: Optional[List[dict]] = Field(None, description="datasets") + evaluate_metrics: Optional[List[str]] = Field( + None, description="evaluation metrics" + ) + context: Optional[dict] = Field(None, description="The context of the evaluate") + user_name: Optional[str] = Field(None, description="user name") + user_id: Optional[str] = Field(None, description="user id") + sys_code: Optional[str] = Field(None, description="system code") + parallel_num: Optional[int] = Field(None, description="system code") + state: Optional[str] = Field(None, description="evaluation state") + result: Optional[str] = Field(None, description="evaluation result") + storage_type: Optional[str] = Field(None, comment="datasets storage type") + average_score: Optional[str] = Field(None, description="evaluation average score") + log_info: Optional[str] = Field(None, description="evaluation log_info") + gmt_create: Optional[str] = Field(None, description="create time") + gmt_modified: Optional[str] = Field(None, description="create time") + + +class EvaluateServeResponse(EvaluateServeRequest): + class Config: + title = f"EvaluateServeResponse for {SERVE_APP_NAME_HUMP}" + + +class DatasetServeRequest(BaseModel): + code: Optional[str] = Field(None, description="dataset code") + name: Optional[str] = Field(None, description="dataset name") + file_type: Optional[str] = Field(None, description="dataset file type") + storage_type: Optional[str] = Field(None, comment="datasets storage type") + storage_position: Optional[str] = Field(None, comment="datasets storage position") + datasets_count: Optional[int] = Field(None, comment="datasets row count") + have_answer: Optional[bool] = Field(None, comment="datasets have answer") + members: Optional[str] = Field(None, comment="datasets manager members") + user_name: Optional[str] = Field(None, description="user name") + user_id: Optional[str] = Field(None, description="user id") + sys_code: Optional[str] = Field(None, description="system code") + + +class DatasetServeResponse(DatasetServeRequest): + gmt_create: Optional[str] = Field(None, description="create time") + gmt_modified: Optional[str] = Field(None, description="create time") diff --git a/dbgpt/serve/evaluate/config.py b/dbgpt/serve/evaluate/config.py new file mode 100644 index 000000000..45c86f43b --- /dev/null +++ b/dbgpt/serve/evaluate/config.py @@ -0,0 +1,31 @@ +from dataclasses import dataclass, field +from typing import Optional + +from dbgpt.serve.core import BaseServeConfig + +APP_NAME = "evaluate" +SERVE_APP_NAME = "dbgpt_serve_evaluate" +SERVE_APP_NAME_HUMP = "dbgpt_serve_evaluate" +SERVE_CONFIG_KEY_PREFIX = "dbgpt.serve.evaluate." +SERVE_SERVICE_COMPONENT_NAME = f"{SERVE_APP_NAME}_service" +# Database table name +SERVER_APP_TABLE_NAME = "dbgpt_serve_evaluate" + + +@dataclass +class ServeConfig(BaseServeConfig): + """Parameters for the serve command""" + + # TODO: add your own parameters here + api_keys: Optional[str] = field( + default=None, metadata={"help": "API keys for the endpoint, if None, allow all"} + ) + + default_user: Optional[str] = field( + default=None, + metadata={"help": "Default user name for evaluate"}, + ) + default_sys_code: Optional[str] = field( + default=None, + metadata={"help": "Default system code for evaluate"}, + ) diff --git a/dbgpt/serve/evaluate/dependencies.py b/dbgpt/serve/evaluate/dependencies.py new file mode 100644 index 000000000..8598ecd97 --- /dev/null +++ b/dbgpt/serve/evaluate/dependencies.py @@ -0,0 +1 @@ +# Define your dependencies here diff --git a/dbgpt/serve/evaluate/models/__init__.py b/dbgpt/serve/evaluate/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dbgpt/serve/evaluate/models/models.py b/dbgpt/serve/evaluate/models/models.py new file mode 100644 index 000000000..94b399503 --- /dev/null +++ b/dbgpt/serve/evaluate/models/models.py @@ -0,0 +1,157 @@ +"""This is an auto-generated model file +You can define your own models and DAOs here +""" +import json +import uuid +from datetime import datetime +from typing import Any, Dict, Union + +from sqlalchemy import Column, DateTime, Index, Integer, String, Text, UniqueConstraint + +from dbgpt.agent.core.schema import Status +from dbgpt.storage.metadata import BaseDao, Model, db + +from ..api.schemas import EvaluateServeRequest, EvaluateServeResponse +from ..config import SERVER_APP_TABLE_NAME, ServeConfig + + +class ServeEntity(Model): + __tablename__ = "evaluate_manage" + __table_args__ = ( + UniqueConstraint( + "evaluate_code", + name="uk_evaluate_code", + ), + ) + id = Column(Integer, primary_key=True, comment="Auto increment id") + evaluate_code = Column(String(256), comment="evaluate Code") + scene_key = Column(String(100), comment="evaluate scene key") + scene_value = Column(String(256), comment="evaluate scene value") + context = Column(Text, comment="evaluate scene run context") + evaluate_metrics = Column(String(599), comment="evaluate metrics") + datasets_name = Column(String(256), comment="datasets name") + datasets = Column(Text, comment="datasets") + storage_type = Column(String(256), comment="datasets storage type") + parallel_num = Column(Integer, comment="datasets run parallel num") + state = Column(String(100), comment="evaluate state") + result = Column(Text, comment="evaluate result") + log_info = Column(Text, comment="evaluate log info") + average_score = Column(Text, comment="evaluate average score") + user_id = Column(String(100), index=True, nullable=True, comment="User id") + user_name = Column(String(128), index=True, nullable=True, comment="User name") + sys_code = Column(String(128), index=True, nullable=True, comment="System code") + gmt_create = Column(DateTime, default=datetime.now, comment="Record creation time") + gmt_modified = Column( + DateTime, + default=datetime.now, + onupdate=datetime.now, + comment="Record update time", + ) + + def __repr__(self): + return f"ServeEntity(id={self.id}, evaluate_code='{self.evaluate_code}', scene_key='{self.scene_key}', scene_value='{self.scene_value}', datasets='{self.datasets}', user_id='{self.user_id}', user_name='{self.user_name}', sys_code='{self.sys_code}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')" + + +class ServeDao(BaseDao[ServeEntity, EvaluateServeRequest, EvaluateServeResponse]): + """The DAO class for Prompt""" + + def __init__(self, serve_config: ServeConfig): + super().__init__() + self._serve_config = serve_config + + def from_request( + self, request: Union[EvaluateServeRequest, Dict[str, Any]] + ) -> ServeEntity: + """Convert the request to an entity + + Args: + request (Union[EvaluateServeRequest, Dict[str, Any]]): The request + + Returns: + T: The entity + """ + request_dict = ( + request.dict() if isinstance(request, EvaluateServeRequest) else request + ) + entity = ServeEntity( + evaluate_code=request_dict.get("evaluate_code", None), + scene_key=request_dict.get("scene_key", None), + scene_value=request_dict.get("scene_value", None), + context=json.dumps(request_dict.get("context", None)) + if request_dict.get("context", None) + else None, + evaluate_metrics=request_dict.get("evaluate_metrics", None), + datasets_name=request_dict.get("datasets_name", None), + datasets=request_dict.get("datasets", None), + storage_type=request_dict.get("storage_type", None), + parallel_num=request_dict.get("parallel_num", 1), + state=request_dict.get("state", Status.TODO.value), + result=request_dict.get("result", None), + average_score=request_dict.get("average_score", None), + log_info=request_dict.get("log_info", None), + user_id=request_dict.get("user_id", None), + user_name=request_dict.get("user_name", None), + sys_code=request_dict.get("sys_code", None), + ) + if not entity.evaluate_code: + entity.evaluate_code = uuid.uuid1().hex + return entity + + def to_request(self, entity: ServeEntity) -> EvaluateServeRequest: + """Convert the entity to a request + + Args: + entity (T): The entity + + Returns: + REQ: The request + """ + + return EvaluateServeRequest( + evaluate_code=entity.evaluate_code, + scene_key=entity.scene_key, + scene_value=entity.scene_value, + datasets_name=entity.datasets_name, + datasets=entity.datasets, + storage_type=entity.storage_type, + evaluate_metrics=entity.evaluate_metrics, + context=json.loads(entity.context) if entity.context else None, + user_name=entity.user_name, + user_id=entity.user_id, + sys_code=entity.sys_code, + state=entity.state, + result=entity.result, + average_score=entity.average_score, + log_info=entity.log_info, + ) + + def to_response(self, entity: ServeEntity) -> EvaluateServeResponse: + """Convert the entity to a response + + Args: + entity (T): The entity + + Returns: + RES: The response + """ + gmt_created_str = entity.gmt_create.strftime("%Y-%m-%d %H:%M:%S") + gmt_modified_str = entity.gmt_modified.strftime("%Y-%m-%d %H:%M:%S") + return EvaluateServeResponse( + evaluate_code=entity.evaluate_code, + scene_key=entity.scene_key, + scene_value=entity.scene_value, + datasets_name=entity.datasets_name, + datasets=entity.datasets, + storage_type=entity.storage_type, + evaluate_metrics=entity.evaluate_metrics, + context=json.loads(entity.context) if entity.context else None, + user_name=entity.user_name, + user_id=entity.user_id, + sys_code=entity.sys_code, + state=entity.state, + result=entity.result, + average_score=entity.average_score, + log_info=entity.log_info, + gmt_create=gmt_created_str, + gmt_modified=gmt_modified_str, + ) diff --git a/dbgpt/serve/evaluate/models/models_dataset.py b/dbgpt/serve/evaluate/models/models_dataset.py new file mode 100644 index 000000000..bc1d495d0 --- /dev/null +++ b/dbgpt/serve/evaluate/models/models_dataset.py @@ -0,0 +1,122 @@ +from datetime import datetime +from typing import Any, Dict, Union + +from sqlalchemy import Column, DateTime, Index, Integer, String, Text, UniqueConstraint + +from dbgpt.storage.metadata import BaseDao, Model, db + +from ..api.schemas import DatasetServeRequest, DatasetServeResponse +from ..config import SERVER_APP_TABLE_NAME, ServeConfig + + +class DatasetServeEntity(Model): + __tablename__ = "evaluate_datasets" + __table_args__ = ( + UniqueConstraint( + "code", + name="uk_dataset", + ), + UniqueConstraint( + "name", + name="uk_dataset_name", + ), + ) + id = Column(Integer, primary_key=True, comment="Auto increment id") + code = Column(String(256), comment="evaluate datasets Code") + name = Column(String(1000), comment="evaluate datasets Name") + file_type = Column(String(256), comment="datasets file type") + storage_type = Column(String(256), comment="datasets storage type") + storage_position = Column(Text, comment="datasets storage position") + datasets_count = Column(Integer, comment="datasets row count") + have_answer = Column(String(10), comment="datasets have answer") + members = Column(String(1000), comment="evaluate datasets members") + user_id = Column(String(100), index=True, nullable=True, comment="User id") + user_name = Column(String(128), index=True, nullable=True, comment="User name") + sys_code = Column(String(128), index=True, nullable=True, comment="System code") + gmt_create = Column(DateTime, default=datetime.now, comment="Record creation time") + gmt_modified = Column( + DateTime, + default=datetime.now, + onupdate=datetime.now, + comment="Record update time", + ) + + def __repr__(self): + return f"ServeEntity(id={self.id}, code='{self.code}', name='{self.name}', file_type='{self.file_type}', storage_type='{self.storage_type}', storage_position='{self.storage_position}', datasets_count='{self.datasets_count}', user_id='{self.user_id}', user_name='{self.user_name}', sys_code='{self.sys_code}', gmt_create='{self.gmt_create}', gmt_modified='{self.gmt_modified}')" + + +class DatasetServeDao( + BaseDao[DatasetServeEntity, DatasetServeRequest, DatasetServeResponse] +): + """The DAO class for Prompt""" + + def __init__(self, serve_config: ServeConfig): + super().__init__() + self._serve_config = serve_config + + def from_request( + self, request: Union[DatasetServeRequest, Dict[str, Any]] + ) -> DatasetServeEntity: + """Convert the request to an entity + + Args: + request (Union[DatasetServeRequest, Dict[str, Any]]): The request + + Returns: + T: The entity + """ + request_dict = ( + request.dict() if isinstance(request, DatasetServeRequest) else request + ) + entity = DatasetServeEntity(**request_dict) + return entity + + def to_request(self, entity: DatasetServeEntity) -> DatasetServeRequest: + """Convert the entity to a request + + Args: + entity (T): The entity + + Returns: + REQ: The request + """ + return DatasetServeRequest( + code=entity.code, + name=entity.name, + file_type=entity.file_type, + storage_type=entity.storage_type, + storage_position=entity.storage_position, + datasets_count=entity.datasets_count, + have_answer=entity.have_answer, + members=entity.members, + user_name=entity.user_name, + user_id=entity.user_id, + sys_code=entity.sys_code, + ) + + def to_response(self, entity: DatasetServeEntity) -> DatasetServeResponse: + """Convert the entity to a response + + Args: + entity (T): The entity + + Returns: + RES: The response + """ + gmt_created_str = entity.gmt_create.strftime("%Y-%m-%d %H:%M:%S") + gmt_modified_str = entity.gmt_modified.strftime("%Y-%m-%d %H:%M:%S") + return DatasetServeResponse( + code=entity.code, + name=entity.name, + file_type=entity.file_type, + storage_type=entity.storage_type, + storage_position=entity.storage_position, + datasets_count=entity.datasets_count, + have_answer=entity.have_answer, + members=entity.members, + user_name=entity.user_name, + user_id=entity.user_id, + sys_code=entity.sys_code, + gmt_create=gmt_created_str, + gmt_modified=gmt_modified_str, + ) diff --git a/dbgpt/serve/evaluate/serve.py b/dbgpt/serve/evaluate/serve.py new file mode 100644 index 000000000..6c9a7f224 --- /dev/null +++ b/dbgpt/serve/evaluate/serve.py @@ -0,0 +1,119 @@ +import logging +from typing import List, Optional, Union + +from sqlalchemy import URL + +from dbgpt.component import SystemApp +from dbgpt.serve.core import BaseServe +from dbgpt.storage.metadata import DatabaseManager + +from .api.endpoints import init_endpoints, router +from .config import APP_NAME, SERVE_APP_NAME, SERVE_APP_NAME_HUMP + +logger = logging.getLogger(__name__) + + +class Serve(BaseServe): + """Serve component + + Examples: + + Register the serve component to the system app + + .. code-block:: python + + from fastapi import FastAPI + from dbgpt import SystemApp + from dbgpt.core import PromptTemplate + from dbgpt.serve.prompt.serve import Serve, SERVE_APP_NAME + + app = FastAPI() + system_app = SystemApp(app) + system_app.register(Serve, api_prefix="/api/v1/prompt") + system_app.on_init() + # Run before start hook + system_app.before_start() + + prompt_serve = system_app.get_component(SERVE_APP_NAME, Serve) + + # Get the prompt manager + prompt_manager = prompt_serve.prompt_manager + prompt_manager.save( + PromptTemplate(template="Hello {name}", input_variables=["name"]), + prompt_name="prompt_name", + ) + + With your database url + + .. code-block:: python + + from fastapi import FastAPI + from dbgpt import SystemApp + from dbgpt.core import PromptTemplate + from dbgpt.serve.prompt.serve import Serve, SERVE_APP_NAME + + app = FastAPI() + system_app = SystemApp(app) + system_app.register( + Serve, + api_prefix="/api/v1/prompt", + db_url_or_db="sqlite:///:memory:", + try_create_tables=True, + ) + system_app.on_init() + # Run before start hook + system_app.before_start() + + prompt_serve = system_app.get_component(SERVE_APP_NAME, Serve) + + # Get the prompt manager + prompt_manager = prompt_serve.prompt_manager + prompt_manager.save( + PromptTemplate(template="Hello {name}", input_variables=["name"]), + prompt_name="prompt_name", + ) + + """ + + name = SERVE_APP_NAME + + def __init__( + self, + system_app: SystemApp, + api_prefix: Optional[List[str]] = None, + api_tags: Optional[List[str]] = None, + db_url_or_db: Union[str, URL, DatabaseManager] = None, + try_create_tables: Optional[bool] = False, + ): + if api_prefix is None: + api_prefix = [f"/api/v1/{APP_NAME}", f"/api/v2/serve/{APP_NAME}"] + if api_tags is None: + api_tags = [SERVE_APP_NAME_HUMP] + super().__init__( + system_app, api_prefix, api_tags, db_url_or_db, try_create_tables + ) + + def init_app(self, system_app: SystemApp): + if self._app_has_initiated: + return + self._system_app = system_app + for prefix in self._api_prefix: + self._system_app.app.include_router( + router, prefix=prefix, tags=self._api_tags + ) + init_endpoints(self._system_app) + self._app_has_initiated = True + + def on_init(self): + """Called before the start of the application. + + You can do some initialization here. + """ + # import your own module here to ensure the module is loaded before the application starts + + def before_start(self): + """Called before the start of the application. + + You can do some initialization here. + """ + # import your own module here to ensure the module is loaded before the application starts diff --git a/dbgpt/serve/evaluate/service/__init__.py b/dbgpt/serve/evaluate/service/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dbgpt/serve/evaluate/service/service.py b/dbgpt/serve/evaluate/service/service.py new file mode 100644 index 000000000..2e94a2621 --- /dev/null +++ b/dbgpt/serve/evaluate/service/service.py @@ -0,0 +1,186 @@ +import asyncio +import io +import json +import logging +from concurrent.futures import ThreadPoolExecutor +from typing import List, Optional + +from dbgpt._private.config import Config +from dbgpt.component import ComponentType, SystemApp +from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG +from dbgpt.core.interface.evaluation import ( + EVALUATE_FILE_COL_ANSWER, + EvaluationResult, + metric_manage, +) +from dbgpt.model import DefaultLLMClient +from dbgpt.model.cluster import WorkerManagerFactory +from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory +from dbgpt.rag.evaluation import RetrieverEvaluator +from dbgpt.rag.evaluation.answer import AnswerRelevancyMetric +from dbgpt.rag.evaluation.retriever import RetrieverSimilarityMetric +from dbgpt.serve.core import BaseService +from dbgpt.serve.rag.operators.knowledge_space import SpaceRetrieverOperator +from dbgpt.storage.metadata import BaseDao +from dbgpt.storage.vector_store.base import VectorStoreConfig + +from ...agent.agents.controller import multi_agents +from ...agent.evaluation.evaluation import AgentEvaluator, AgentOutputOperator +from ...agent.evaluation.evaluation_metric import IntentMetric +from ...prompt.service.service import Service as PromptService +from ...rag.connector import VectorStoreConnector +from ...rag.service.service import Service as RagService +from ..api.schemas import EvaluateServeRequest, EvaluateServeResponse, EvaluationScene +from ..config import SERVE_CONFIG_KEY_PREFIX, SERVE_SERVICE_COMPONENT_NAME, ServeConfig +from ..models.models import ServeDao, ServeEntity + +logger = logging.getLogger(__name__) + +CFG = Config() +executor = ThreadPoolExecutor(max_workers=5) + + +def get_rag_service(system_app) -> RagService: + return system_app.get_component("dbgpt_rag_service", RagService) + + +def get_prompt_service(system_app) -> PromptService: + return system_app.get_component("dbgpt_serve_prompt_service", PromptService) + + +class Service(BaseService[ServeEntity, EvaluateServeRequest, EvaluateServeResponse]): + """The service class for Evaluate""" + + name = SERVE_SERVICE_COMPONENT_NAME + + def __init__(self, system_app: SystemApp, dao: Optional[ServeDao] = None): + self._system_app = None + self._serve_config: ServeConfig = None + self._dao: ServeDao = dao + super().__init__(system_app) + self.rag_service = get_rag_service(system_app) + self.prompt_service = get_prompt_service(system_app) + + def init_app(self, system_app: SystemApp) -> None: + """Initialize the service + + Args: + system_app (SystemApp): The system app + """ + self._serve_config = ServeConfig.from_app_config( + system_app.config, SERVE_CONFIG_KEY_PREFIX + ) + self._dao = self._dao or ServeDao(self._serve_config) + self._system_app = system_app + + @property + def dao(self) -> BaseDao[ServeEntity, EvaluateServeRequest, EvaluateServeResponse]: + """Returns the internal DAO.""" + return self._dao + + @property + def config(self) -> ServeConfig: + """Returns the internal ServeConfig.""" + return self._serve_config + + async def run_evaluation( + self, + scene_key, + scene_value, + datasets: List[dict], + context: Optional[dict] = None, + evaluate_metrics: Optional[List[str]] = None, + parallel_num: Optional[int] = 1, + ) -> List[List[EvaluationResult]]: + """Evaluate results + + Args: + scene_key (str): The scene_key + scene_value (str): The scene_value + datasets (List[dict]): The datasets + context (Optional[dict]): The run context + evaluate_metrics (Optional[str]): The metric_names + parallel_num (Optional[int]): The parallel_num + + Returns: + List[List[EvaluationResult]]: The response + """ + + results = [] + if EvaluationScene.RECALL.value == scene_key: + embedding_factory = CFG.SYSTEM_APP.get_component( + "embedding_factory", EmbeddingFactory + ) + embeddings = embedding_factory.create( + EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] + ) + + config = VectorStoreConfig( + name=scene_value, + embedding_fn=embeddings, + ) + vector_store_connector = VectorStoreConnector( + vector_store_type=CFG.VECTOR_STORE_TYPE, + vector_store_config=config, + ) + evaluator = RetrieverEvaluator( + operator_cls=SpaceRetrieverOperator, + embeddings=embeddings, + operator_kwargs={ + "space_id": str(scene_value), + "top_k": CFG.KNOWLEDGE_SEARCH_TOP_SIZE, + "vector_store_connector": vector_store_connector, + }, + ) + metrics = [] + metric_name_list = evaluate_metrics + for name in metric_name_list: + if name == "RetrieverSimilarityMetric": + metrics.append(RetrieverSimilarityMetric(embeddings=embeddings)) + else: + metrics.append(metric_manage.get_by_name(name)()) + + for dataset in datasets: + chunks = self.rag_service.get_chunk_list( + {"doc_name": dataset.get("doc_name")} + ) + contexts = [chunk.content for chunk in chunks] + dataset["contexts"] = contexts + results = await evaluator.evaluate( + datasets, metrics=metrics, parallel_num=parallel_num + ) + elif EvaluationScene.APP.value == scene_key: + evaluator = AgentEvaluator( + operator_cls=AgentOutputOperator, + operator_kwargs={ + "app_code": scene_value, + }, + ) + + metrics = [] + metric_name_list = evaluate_metrics + for name in metric_name_list: + if name == AnswerRelevancyMetric.name(): + worker_manager = CFG.SYSTEM_APP.get_component( + ComponentType.WORKER_MANAGER_FACTORY, WorkerManagerFactory + ).create() + llm_client = DefaultLLMClient(worker_manager=worker_manager) + prompt = self.prompt_service.get_template(context.get("prompt")) + metrics.append( + AnswerRelevancyMetric( + llm_client=llm_client, + model_name=context.get("model"), + prompt_template=prompt.template, + ) + ) + for dataset in datasets: + context = await multi_agents.get_knowledge_resources( + app_code=scene_value, question=dataset.get("query") + ) + dataset[EVALUATE_FILE_COL_ANSWER] = context + else: + metrics.append(metric_manage.get_by_name(name)()) + results = await evaluator.evaluate( + dataset=datasets, metrics=metrics, parallel_num=parallel_num + ) + return results diff --git a/dbgpt/serve/rag/operators/knowledge_space.py b/dbgpt/serve/rag/operators/knowledge_space.py index 3d2e1d846..f719547ff 100644 --- a/dbgpt/serve/rag/operators/knowledge_space.py +++ b/dbgpt/serve/rag/operators/knowledge_space.py @@ -23,10 +23,8 @@ ) from dbgpt.core.awel.task.base import IN, OUT from dbgpt.core.interface.operators.prompt_operator import BasePromptBuilderOperator -from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory -from dbgpt.rag.retriever.embedding import EmbeddingRetriever -from dbgpt.serve.rag.connector import VectorStoreConnector -from dbgpt.storage.vector_store.base import VectorStoreConfig +from dbgpt.core.interface.operators.retriever import RetrieverOperator +from dbgpt.serve.rag.retriever.knowledge_space import KnowledgeSpaceRetriever from dbgpt.util.function_utils import rearrange_args_by_type from dbgpt.util.i18n_utils import _ @@ -40,7 +38,7 @@ def _load_space_name() -> List[OptionValue]: ] -class SpaceRetrieverOperator(MapOperator[IN, OUT]): +class SpaceRetrieverOperator(RetrieverOperator[IN, OUT]): """knowledge space retriever operator.""" metadata = ViewMetadata( @@ -71,64 +69,48 @@ class SpaceRetrieverOperator(MapOperator[IN, OUT]): documentation_url="https://github.com/openai/openai-python", ) - def __init__(self, space_name: str, recall_score: Optional[float] = 0.3, **kwargs): + def __init__( + self, + space_id: str, + top_k: Optional[int] = 5, + recall_score: Optional[float] = 0.3, + **kwargs, + ): """ Args: - space_name (str): The space name. + space_id (str): The space name. + top_k (Optional[int]): top k. recall_score (Optional[float], optional): The recall score. Defaults to 0.3. """ - self._space_name = space_name + self._space_id = space_id + self._top_k = top_k self._recall_score = recall_score self._service = KnowledgeService() - embedding_factory = CFG.SYSTEM_APP.get_component( - "embedding_factory", EmbeddingFactory - ) - embedding_fn = embedding_factory.create( - model_name=EMBEDDING_MODEL_CONFIG[CFG.EMBEDDING_MODEL] - ) - config = VectorStoreConfig(name=self._space_name, embedding_fn=embedding_fn) - self._vector_store_connector = VectorStoreConnector( - vector_store_type=CFG.VECTOR_STORE_TYPE, - vector_store_config=config, - ) super().__init__(**kwargs) - async def map(self, query: IN) -> OUT: + def retrieve(self, query: IN) -> OUT: """Map input value to output value. Args: - input_value (IN): The input value. + query (IN): The input value. Returns: OUT: The output value. """ - space_context = self._service.get_space_context(self._space_name) - top_k = ( - CFG.KNOWLEDGE_SEARCH_TOP_SIZE - if space_context is None - else int(space_context["embedding"]["topk"]) - ) - recall_score = ( - CFG.KNOWLEDGE_SEARCH_RECALL_SCORE - if space_context is None - else float(space_context["embedding"]["recall_score"]) - ) - embedding_retriever = EmbeddingRetriever( - top_k=top_k, - vector_store_connector=self._vector_store_connector, + space_retriever = KnowledgeSpaceRetriever( + space_id=self._space_id, + top_k=self._top_k, ) if isinstance(query, str): - candidates = await embedding_retriever.aretrieve_with_scores( - query, recall_score - ) + candidates = space_retriever.retrieve_with_scores(query, self._recall_score) elif isinstance(query, list): candidates = [ - await embedding_retriever.aretrieve_with_scores(q, recall_score) + space_retriever.retrieve_with_scores(q, self._recall_score) for q in query ] candidates = reduce(lambda x, y: x + y, candidates) - return [candidate.content for candidate in candidates] + return candidates class KnowledgeSpacePromptBuilderOperator( diff --git a/dbgpt/serve/rag/service/service.py b/dbgpt/serve/rag/service/service.py index dbc308d99..890d58a52 100644 --- a/dbgpt/serve/rag/service/service.py +++ b/dbgpt/serve/rag/service/service.py @@ -320,6 +320,10 @@ def update_document(self, request: DocumentServeRequest): entity = self._document_dao.from_response(document) if request.doc_name: entity.doc_name = request.doc_name + update_chunk = self._chunk_dao.get_one({"document_id": entity.id}) + if update_chunk: + update_chunk.doc_name = request.doc_name + self._chunk_dao.update_chunk(update_chunk) if len(request.questions) == 0: request.questions = "" questions = [ @@ -411,13 +415,20 @@ def get_document_list( """ return self._document_dao.get_list_page(request, page, page_size) - def get_chunk_list(self, request: QUERY_SPEC, page: int, page_size: int): - """get document chunks + def get_chunk_list_page(self, request: QUERY_SPEC, page: int, page_size: int): + """get document chunks with page Args: - request: QUERY_SPEC """ return self._chunk_dao.get_list_page(request, page, page_size) + def get_chunk_list(self, request: QUERY_SPEC): + """get document chunks + Args: + - request: QUERY_SPEC + """ + return self._chunk_dao.get_list(request) + def update_chunk(self, request: ChunkServeRequest): """update knowledge document chunk""" if not request.id: diff --git a/docs/docs/api/evaluation.md b/docs/docs/api/evaluation.md new file mode 100644 index 000000000..571d32e95 --- /dev/null +++ b/docs/docs/api/evaluation.md @@ -0,0 +1,205 @@ +# Evaluation + +Get started with the Evaluation API + + +### Create Evaluation + +```python +POST /api/v2/serve/evaluate/evaluation +``` +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + + + + +```shell +DBGPT_API_KEY=dbgpt +SPACE_ID={YOUR_SPACE_ID} + +curl -X POST "http://localhost:5670/api/v2/serve/evaluate/evaluation" +-H "Authorization: Bearer $DBGPT_API_KEY" \ +-H "accept: application/json" \ +-H "Content-Type: application/json" \ +-d '{ + "scene_key": "recall", + "scene_value":147, + "context":{"top_k":5}, + "sys_code":"xx", + "evaluate_metrics":["RetrieverHitRateMetric","RetrieverMRRMetric","RetrieverSimilarityMetric"], + "datasets": [{ + "query": "what awel talked about", + "doc_name":"awel.md" + }] +}' + +``` + + + + + +```python +from dbgpt.client import Client +from dbgpt.client.evaluation import run_evaluation +from dbgpt.serve.evaluate.api.schemas import EvaluateServeRequest + +DBGPT_API_KEY = "dbgpt" +client = Client(api_key=DBGPT_API_KEY) +request = EvaluateServeRequest( + # The scene type of the evaluation, e.g. support app, recall + scene_key="recall", + # e.g. app id(when scene_key is app), space id(when scene_key is recall) + scene_value="147", + context={"top_k": 5}, + evaluate_metrics=[ + "RetrieverHitRateMetric", + "RetrieverMRRMetric", + "RetrieverSimilarityMetric", + ], + datasets=[ + { + "query": "what awel talked about", + "doc_name": "awel.md", + } + ], +) +data = await run_evaluation(client, request=request) + +``` + + + + +#### Request body +Request Evaluation Object + +when scene_key is app, the request body should be like this: +```json + +{ + "scene_key": "app", + "scene_value":"2c76eea2-83b6-11ef-b482-acde48001122", + "context":{"top_k":5, "prompt":"942acd7e33b54ce28565f89f9b278044","model":"zhipu_proxyllm"}, + "sys_code":"xx", + "evaluate_metrics":["AnswerRelevancyMetric"], + "datasets": [{ + "query": "what awel talked about", + "doc_name":"awel.md" + }] +} +``` + +when scene_key is recall, the request body should be like this: +```json + +{ + "scene_key": "recall", + "scene_value":"2c76eea2-83b6-11ef-b482-acde48001122", + "context":{"top_k":5, "prompt":"942acd7e33b54ce28565f89f9b278044","model":"zhipu_proxyllm"}, + "evaluate_metrics":["RetrieverHitRateMetric", "RetrieverMRRMetric", "RetrieverSimilarityMetric"], + "datasets": [{ + "query": "what awel talked about", + "doc_name":"awel.md" + }] +} +``` + +#### Response body +Return Evaluation Object List + + +### The Evaluation Request Object + +________ +scene_key string Required + +The scene type of the evaluation, e.g. support app, recall + +-------- +scene_value string Required + +The scene value of the evaluation, e.g. app id(when scene_key is app), space id(when scene_key is recall) + +-------- +context object Required + +The context of the evaluation +- top_k int Required +- prompt string prompt code +- model string llm model name + +-------- +evaluate_metrics array Required + +The evaluate metrics of the evaluation, +e.g. +- AnswerRelevancyMetric: the answer relevancy metric(when scene_key is app) +- RetrieverHitRateMetric: Hit rate calculates the fraction of queries where the correct answer is found + within the top-k retrieved documents. In simpler terms, it’s about how often our + system gets it right within the top few guesses. (when scene_key is recall) +- RetrieverMRRMetric: For each query, MRR evaluates the system’s accuracy by looking at the rank of the + highest-placed relevant document. Specifically, it’s the average of the reciprocals + of these ranks across all the queries. So, if the first relevant document is the + top result, the reciprocal rank is 1; if it’s second, the reciprocal rank is 1/2, + and so on. (when scene_key is recall) +- RetrieverSimilarityMetric: Embedding Similarity Metric (when scene_key is recall) + +-------- +datasets array Required + + +The datasets of the evaluation + + +-------- + + +### The Evaluation Result + +________ +prediction string + +The prediction result +________ +contexts string + +The contexts of RAG Retrieve chunk +________ +score float + +The score of the prediction +________ +passing bool + +The passing of the prediction +________ +metric_name string + +The metric name of the evaluation +________ +prediction_cost int + +The prediction cost of the evaluation +________ +query string + +The query of the evaluation +________ +raw_dataset object + +The raw dataset of the evaluation +________ +feedback string + +The feedback of the llm evaluation +________ diff --git a/docs/sidebars.js b/docs/sidebars.js index 75751fbd7..a02c870e3 100755 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -421,6 +421,9 @@ const sidebars = { },{ type: 'doc', id: 'api/datasource' + },{ + type: 'doc', + id: 'api/evaluation' }, ], link: { diff --git a/examples/client/client_evaluation.py b/examples/client/client_evaluation.py new file mode 100644 index 000000000..7ecd6aab8 --- /dev/null +++ b/examples/client/client_evaluation.py @@ -0,0 +1,91 @@ +"""Client: run evaluation example. + +This example demonstrates how to use the dbgpt client to evaluate with the rag recall +and app answer. + +Example: + .. code-block:: python + + DBGPT_API_KEY = "dbgpt" + client = Client(api_key=DBGPT_API_KEY) + + # 1. evaluate with rag recall + request = EvaluateServeRequest( + # The scene type of the evaluation, e.g. support app, recall + scene_key="recall", + # e.g. app id(when scene_key is app), space id(when scene_key is recall) + scene_value="147", + context={"top_k": 5}, + evaluate_metrics=[ + "RetrieverHitRateMetric", + "RetrieverMRRMetric", + "RetrieverSimilarityMetric", + ], + datasets=[ + { + "query": "what awel talked about", + "doc_name": "awel.md", + } + ], + ) + # 2. evaluate with app answer + request = EvaluateServeRequest( + # The scene type of the evaluation, e.g. support app, recall + scene_key="app", + # e.g. app id(when scene_key is app), space id(when scene_key is recall) + scene_value="2c76eea2-83b6-11ef-b482-acde48001122", + "context"={ + "top_k": 5, + "prompt": "942acd7e33b54ce28565f89f9b278044", + "model": "zhipu_proxyllm", + }, + evaluate_metrics=[ + "AnswerRelevancyMetric", + ], + datasets=[ + { + "query": "what awel talked about", + "doc_name": "awel.md", + } + ], + ) + data = await run_evaluation(client, request=request) + print(data) +""" + +import asyncio + +from dbgpt.client import Client +from dbgpt.client.evaluation import run_evaluation +from dbgpt.serve.evaluate.api.schemas import EvaluateServeRequest + + +async def main(): + # initialize client + DBGPT_API_KEY = "dbgpt" + SPACE_ID = "147" + client = Client(api_key=DBGPT_API_KEY) + request = EvaluateServeRequest( + # The scene type of the evaluation, e.g. support app, recall + scene_key="recall", + # e.g. app id(when scene_key is app), space id(when scene_key is recall) + scene_value=SPACE_ID, + context={"top_k": 5}, + evaluate_metrics=[ + "RetrieverHitRateMetric", + "RetrieverMRRMetric", + "RetrieverSimilarityMetric", + ], + datasets=[ + { + "query": "what awel talked about", + "doc_name": "awel.md", + } + ], + ) + data = await run_evaluation(client, request=request) + print(data) + + +if __name__ == "__main__": + asyncio.run(main())