Merge branch 'main' into devs/peiwen/fix_chatgroup_run

microsoft · May 11, 2024 · b233cf7 · b233cf7
2 parents 47922fe + 2cf0fbe
commit b233cf7
Show file tree

Hide file tree

Showing 15 changed files with 294 additions and 24 deletions.
diff --git a/docs/reference/pf-command-reference.md b/docs/reference/pf-command-reference.md
@@ -294,6 +294,7 @@ pf flow serve --source
               [--verbose]
               [--debug]
               [--skip-open-browser]
+              [--engine]
 ```
 
 #### Examples
@@ -310,6 +311,12 @@ Serve flow as an endpoint with specific port and host.
 pf flow serve --source <path-to-flow> --port <port> --host <host> --environment-variables key1="`${my_connection.api_key}`" key2="value2"
 ```
 
+Serve flow as an endpoint with specific port, host, environment-variables and fastapi serving engine.
+
+```bash
+pf flow serve --source <path-to-flow> --port <port> --host <host> --environment-variables key1="`${my_connection.api_key}`" key2="value2" --engine fastapi
+```
+
 #### Required Parameter
 
 `--source`
@@ -342,6 +349,10 @@ Show debug information during serve.
 
 Skip opening browser after serve. Store true parameter.
 
+`--engine`
+
+Switch python serving engine between `flask` amd `fastapi`, default to `flask`.
+
 ## pf connection
 
 Manage prompt flow connections.

diff --git a/src/promptflow-core/promptflow/executor/_prompty_executor.py b/src/promptflow-core/promptflow/executor/_prompty_executor.py
@@ -66,3 +66,6 @@ def _init_input_sign(self):
         self._inputs_sign = flow.inputs
         # The init signature only used for flex flow, so we set the _init_sign to empty dict for prompty flow.
         self._init_sign = {}
+
+    def get_inputs_definition(self):
+        return self._inputs
diff --git a/src/promptflow-core/promptflow/executor/_service/apis/batch.py b/src/promptflow-core/promptflow/executor/_service/apis/batch.py
@@ -24,13 +24,13 @@
 @router.post("/initialize")
 def initialize(request: InitializationRequest):
     with get_log_context(request, enable_service_logger=True):
-        # validate request and get operation context
+        # Validate request and get operation context.
         request.validate_request()
         operation_context = update_and_get_operation_context(request.operation_context)
         service_logger.info(f"Received batch init request, executor version: {operation_context.get_user_agent()}.")
-        # resolve environment variables
+        # Resolve environment variables.
         set_environment_variables(request.environment_variables)
-        # init batch coordinator to validate flow and create process pool
+        # Init batch coordinator to validate flow and create process pool.
         batch_coordinator = BatchCoordinator(
             working_dir=request.working_dir,
             flow_file=request.flow_file,
@@ -42,8 +42,8 @@ def initialize(request: InitializationRequest):
             init_kwargs=request.init_kwargs,
         )
         batch_coordinator.start()
-        # return json response
-        return {"status": "initialized"}
+        # Return some flow infos including the flow inputs definition and whether it has aggregation nodes.
+        return batch_coordinator.get_flow_infos()
 
 
 @router.post("/execution")

diff --git a/src/promptflow-core/promptflow/executor/_service/utils/batch_coordinator.py b/src/promptflow-core/promptflow/executor/_service/utils/batch_coordinator.py
@@ -79,6 +79,12 @@ def get_instance(cls):
     def get_log_context(self):
         return self._log_context
 
+    def get_flow_infos(self):
+        return {
+            "inputs_definition": self._flow_executor.get_inputs_definition(),
+            "has_aggregation": self._flow_executor.has_aggregation_node,
+        }
+
     def start(self):
         """Start the process pool."""
         self._process_pool.start()

diff --git a/src/promptflow-core/promptflow/executor/flow_executor.py b/src/promptflow-core/promptflow/executor/flow_executor.py
@@ -681,6 +681,9 @@ def _exec_in_thread(self, args) -> LineResult:
         self._completed_idx[line_number] = thread_name
         return results
 
+    def get_inputs_definition(self):
+        return self._flow.inputs
+
     def exec_line(
         self,
         inputs: Mapping[str, Any],

diff --git a/src/promptflow-devkit/CHANGELOG.md b/src/promptflow-devkit/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### Improvements
 - Interactive browser credential is excluded by default when using Azure AI connections, user could set `PF_NO_INTERACTIVE_LOGIN=False` to enable it.
 - Visualize flex flow run(s) switches to trace UI page.
+- Add new `--engine` parameter for `pf flow serve`. This parameter can be used to switch python serving engine between `flask` and `fastapi`, currently it defaults to `flask`.
 - Return the secrets in the connection object by default to improve flex flow experience.
   - Behaviors not changed: 'pf connection' command will scrub secrets.
   - New behavior: connection object by `client.connection.get` will have real secrets. `print(connection_obj)` directly will scrub those secrets. `print(connection_obj.api_key)` or `print(connection_obj.secrets)` will print the REAL secrets.

diff --git a/src/promptflow-devkit/promptflow/_cli/_pf/_flow.py b/src/promptflow-devkit/promptflow/_cli/_pf/_flow.py
@@ -199,6 +199,9 @@ def add_parser_serve_flow(subparsers):
     add_param_skip_browser = lambda parser: parser.add_argument(  # noqa: E731
         "--skip-open-browser", action="store_true", default=False, help="Skip open browser for flow serving."
     )
+    add_param_engine = lambda parser: parser.add_argument(  # noqa: E731
+        "--engine", type=str, default="flask", help="The engine to serve the flow, can be flask or fastapi."
+    )
     activate_action(
         name="serve",
         description="Serving a flow as an endpoint.",
@@ -207,6 +210,7 @@ def add_parser_serve_flow(subparsers):
             add_param_source,
             add_param_port,
             add_param_host,
+            add_param_engine,
             add_param_static_folder,
             add_param_environment_variables,
             add_param_config,
@@ -595,6 +599,7 @@ def serve_flow(args):
         host=args.host,
         port=args.port,
         skip_open_browser=args.skip_open_browser,
+        engine=args.engine,
     )
     logger.info("Promptflow app ended")
 

diff --git a/src/promptflow-devkit/promptflow/_sdk/_utilities/serve_utils.py b/src/promptflow-devkit/promptflow/_sdk/_utilities/serve_utils.py
@@ -58,6 +58,7 @@ def start_flow_service(
     environment_variables: Dict[str, str] = None,
     init: Dict[str, Any] = None,
     skip_open_browser: bool = True,
+    engine: str = "flask",
 ):
     logger.info(
         "Start promptflow server with port %s",
@@ -72,6 +73,11 @@ def start_flow_service(
                 message_format="Support directory `source` for Python flow only for now, but got {source}.",
                 source=source,
             )
+        if engine not in ["flask", "fastapi"]:
+            raise UserErrorException(
+                message_format="Unsupported engine {engine} for Python flow, only support 'flask' and 'fastapi'.",
+                engine=engine,
+            )
         serve_python_flow(
             flow_file_name=flow_file_name,
             flow_dir=flow_dir,
@@ -82,6 +88,7 @@ def start_flow_service(
             config=config or {},
             environment_variables=environment_variables or {},
             skip_open_browser=skip_open_browser,
+            engine=engine,
         )
     else:
         serve_csharp_flow(
@@ -103,6 +110,7 @@ def serve_python_flow(
     environment_variables,
     init,
     skip_open_browser: bool,
+    engine,
 ):
     from promptflow._sdk._configuration import Configuration
     from promptflow.core._serving.app import create_app
@@ -121,13 +129,24 @@ def serve_python_flow(
         environment_variables=environment_variables,
         connection_provider=connection_provider,
         init=init,
+        engine=engine,
     )
     if not skip_open_browser:
         target = f"http://{host}:{port}"
         logger.info(f"Opening browser {target}...")
         webbrowser.open(target)
     # Debug is not supported for now as debug will rerun command, and we changed working directory.
-    app.run(port=port, host=host)
+    if engine == "flask":
+        app.run(port=port, host=host)
+    else:
+        try:
+            import uvicorn
+
+            uvicorn.run(app, host=host, port=port, access_log=False, log_config=None)
+        except ImportError:
+            raise UserErrorException(
+                message_format="FastAPI engine requires uvicorn, please install uvicorn by `pip install uvicorn`."
+            )
 
 
 @contextlib.contextmanager

diff --git a/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_flow_serve_cli.py b/src/promptflow-devkit/tests/sdk_cli_test/unittests/test_flow_serve_cli.py
@@ -49,6 +49,17 @@ def test_flow_serve(self, source: Path):
                 "--skip-open-browser",
             )
             mock_run.assert_called_once_with(port=8080, host="localhost")
+        with mock.patch("uvicorn.run") as mock_run:
+            run_pf_command(
+                "flow",
+                "serve",
+                "--source",
+                source.as_posix(),
+                "--skip-open-browser",
+                "--engine",
+                "fastapi",
+            )
+            mock_run.assert_called_once()
 
     @pytest.mark.parametrize(
         "source",
@@ -71,3 +82,25 @@ def test_flow_serve_failed(self, source: Path, capsys):
             "pf.flow.serve failed with UserErrorException: Support directory `source` for Python flow only for now"
             in out
         )
+
+    @pytest.mark.parametrize(
+        "source",
+        [
+            pytest.param(EAGER_FLOWS_DIR / "simple_with_yaml", id="simple_with_yaml_file"),
+            pytest.param(FLOWS_DIR / "simple_hello_world", id="simple_hello_world_file"),
+        ],
+    )
+    def test_flow_serve_invalid_engine(self, source: Path, capsys):
+        invalid_engine = "invalid_engine"
+        with pytest.raises(SystemExit):
+            run_pf_command(
+                "flow",
+                "serve",
+                "--source",
+                source.as_posix(),
+                "--skip-open-browser",
+                "--engine",
+                invalid_engine,
+            )
+        out, err = capsys.readouterr()
+        assert f"Unsupported engine {invalid_engine} for Python flow, only support 'flask' and 'fastapi'." in out
diff --git a/src/promptflow-tracing/promptflow/tracing/_span_enricher.py b/src/promptflow-tracing/promptflow/tracing/_span_enricher.py
@@ -0,0 +1,49 @@
+from typing import Dict
+
+from .contracts.trace import TraceType
+
+
+class SpanEnricher:
+    def __init__(self):
+        pass
+
+    def enrich(self, span, inputs, output):
+        """This method is used to enrich the span with the inputs and output of the traced function.
+        Note that this method is called after the function is called, so some inputs related logic is not here.
+        """
+        #  TODO: Also move input related logic here.
+        from ._trace import enrich_span_with_output
+
+        enrich_span_with_output(span, output)
+
+
+class SpanEnricherManager:
+    _instance = None
+
+    def __init__(self):
+        self._type2enricher: Dict[str, SpanEnricher] = {}
+        self._base_enricher = SpanEnricher()
+
+    @classmethod
+    def get_instance(cls) -> "SpanEnricherManager":
+        if cls._instance is None:
+            cls._instance = SpanEnricherManager()
+        return cls._instance
+
+    @classmethod
+    def register(cls, trace_type, enricher: SpanEnricher):
+        cls.get_instance()._register(trace_type, enricher)
+
+    @classmethod
+    def enrich(cls, span, inputs, output, trace_type):
+        cls.get_instance()._enrich(span, inputs, output, trace_type)
+
+    def _register(self, trace_type, enricher: SpanEnricher):
+        self._type2enricher[trace_type] = enricher
+
+    def _enrich(self, span, inputs, output, trace_type):
+        enricher = self._type2enricher.get(trace_type, self._base_enricher)
+        enricher.enrich(span, inputs, output)
+
+
+SpanEnricherManager.register(TraceType.FUNCTION, SpanEnricher())
diff --git a/src/promptflow-tracing/promptflow/tracing/_trace.py b/src/promptflow-tracing/promptflow/tracing/_trace.py
@@ -21,6 +21,7 @@
 
 from ._openai_utils import OpenAIMetricsCalculator, OpenAIResponseParser
 from ._operation_context import OperationContext
+from ._span_enricher import SpanEnricher, SpanEnricherManager
 from ._tracer import Tracer, _create_trace_from_function_call, get_node_name_from_context
 from ._utils import get_input_names_for_prompt_template, get_prompt_param_name_from_func, serialize
 from .contracts.generator_proxy import AsyncGeneratorProxy, GeneratorProxy
@@ -148,17 +149,10 @@ def enrich_span_with_input(span, input):
 
 
 def enrich_span_with_trace_type(span, inputs, output, trace_type):
-    if trace_type == TraceType.LLM:
-        # Handle the non-streaming output of LLM, the streaming output will be handled in traced_generator.
-        token_collector.collect_openai_tokens(span, output)
-        enrich_span_with_llm_output(span, output)
-    elif trace_type == TraceType.EMBEDDING:
-        token_collector.collect_openai_tokens(span, output)
-        enrich_span_with_embedding(span, inputs, output)
+    SpanEnricherManager.enrich(span, inputs, output, trace_type)
+    # TODO: Move the following logic to SpanEnricher
     enrich_span_with_openai_tokens(span, trace_type)
-    enrich_span_with_output(span, output)
-    output = trace_iterator_if_needed(span, inputs, output)
-    return output
+    return trace_iterator_if_needed(span, inputs, output)
 
 
 def trace_iterator_if_needed(span, inputs, output):
@@ -519,3 +513,21 @@ async def greetings_async(user_id):
     """
 
     return _traced(func, trace_type=TraceType.FUNCTION)
+
+
+class LLMSpanEnricher(SpanEnricher):
+    def enrich(self, span, inputs, output):
+        token_collector.collect_openai_tokens(span, output)
+        enrich_span_with_llm_output(span, output)
+        super().enrich(span, inputs, output)
+
+
+class EmbeddingSpanEnricher(SpanEnricher):
+    def enrich(self, span, inputs, output):
+        token_collector.collect_openai_tokens(span, output)
+        enrich_span_with_embedding(span, inputs, output)
+        super().enrich(span, inputs, output)
+
+
+SpanEnricherManager.register(TraceType.LLM, LLMSpanEnricher())
+SpanEnricherManager.register(TraceType.EMBEDDING, EmbeddingSpanEnricher())
diff --git a/src/promptflow-tracing/promptflow/tracing/contracts/trace.py b/src/promptflow-tracing/promptflow/tracing/contracts/trace.py
@@ -15,6 +15,7 @@ class TraceType(str, Enum):
     LANGCHAIN = "LangChain"
     FLOW = "Flow"
     EMBEDDING = "Embedding"
+    RETRIEVAL = "Retrieval"
 
 
 @dataclass

diff --git a/src/promptflow/CHANGELOG.md b/src/promptflow/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Improvements
 - [promptflow-devkit]: Interactive browser credential is excluded by default when using Azure AI connections, user could set `PF_NO_INTERACTIVE_LOGIN=False` to enable it.
+- [promptflow-devkit]: Add new `--engine` parameter for `pf flow serve`. This parameter can be used to switch python serving engine between `flask` and `fastapi`, currently it defaults to `flask`.
 - [promptflow-azure]: Refine trace Cosmos DB setup process to print setup status during the process, and display error message from service when setup failed.
 - [promptflow-devkit][promptflow-azure] - Return the secrets in the connection object by default to improve flex flow experience.
   - Reach the sub package docs for more details about this. [promptflow-devkit](https://microsoft.github.io/promptflow/reference/changelog/promptflow-devkit.html) [promptflow-azure](https://microsoft.github.io/promptflow/reference/changelog/promptflow-azure.html)