DAGWorks-Inc · skrawcz · Dec 27, 2024 · Dec 27, 2024
diff --git a/examples/pytest/README.md b/examples/pytest/README.md
@@ -128,7 +128,9 @@ E.g. we only pass tests if all the outputs are as expected, or we pass if 80% of
 also log this to a file, or a database, etc. for further inspection and record keeping, or combining it with
 open source frameworks [mlflow](https://mlflow.org) and using their [evaluate functionality](https://mlflow.org/docs/latest/llms/llm-evaluate/index.html).
 
-Note: we can also combine `results_bag` with ``pytest.mark.parametrize`` to run the same test with different inputs and expected outputs:
+Note (1): if you want to build your own way to aggregate, you can see another way using a very simple fixture in `conftest.py`.
+
+Note (2) : we can also combine `results_bag` with ``pytest.mark.parametrize`` to run the same test with different inputs and expected outputs:
 
 ```python
 import pytest
@@ -298,9 +300,11 @@ def test_an_agent_e2e_with_tracker(input_state, expected_state, results_bag, tra
 # An example
 Here in this directory we have:
 
- - `some_actions.py` - a file that defines an augmented LLM application (it's not a full agent) with some actions
+ - `some_actions.py` - a file that defines an augmented LLM application (it's not a full agent) with some actions. See image below - note the hypotheses action runs multiple in parallel.
  - `test_some_actions.py` - a file that defines some tests for the actions in `some_actions.py`.
 
+![toy example](diagnosis.png)
+
 You'll see that we use the `results_bag` fixture to log the results of our tests,and then we can access these results
 via the `module_results_df` fixture that provides a pandas dataframe of the results. This dataframe is then
 saved as a CSV for uploading to google sheets, etc. for further analysis. You will also see uses of `pytest.mark.parametrize`

diff --git a/examples/pytest/some_actions.py b/examples/pytest/some_actions.py
@@ -1,3 +1,8 @@
+"""
+This is an example module that defines a Burr application.
+
+It hypothetically transcribes audio and then runs a hypothesis on the transcription to determine a medical diagnosis.
+"""
 from typing import Any, Callable, Dict, Generator, List, Tuple
 
 import openai
@@ -10,12 +15,14 @@
 
 @action(reads=["audio"], writes=["transcription"])
 def transcribe_audio(state: State) -> State:
+    """Action to transcribe audio."""
     # here we fake transcription. For this example the audio is text already...
     return state.update(transcription=state["audio"])
 
 
 @action(reads=["hypothesis", "transcription"], writes=["diagnosis"])
 def run_hypothesis(state: State) -> State:
+    """Action to run a hypothesis on a transcription."""
     client = openai.Client()  # here for simplicity because clients and SERDE don't mix well.
     hypothesis = state["hypothesis"]
     transcription = state["transcription"]
@@ -42,15 +49,20 @@ def run_hypothesis(state: State) -> State:
 
 
 class TestMultipleHypotheses(MapStates):
+    """Parallel action to test multiple hypotheses."""
+
     def action(self, state: State, inputs: Dict[str, Any]) -> Action | Callable | RunnableGraph:
+        """which action to run for each state."""
         return run_hypothesis
 
     def states(
         self, state: State, context: ApplicationContext, inputs: Dict[str, Any]
     ) -> Generator[State, None, None]:
-        # You could easily have a list_hypotheses upstream action that writes to "hypothesis" in state
-        # And loop through those
-        # This hardcodes for simplicity
+        """Generate the states to run the action on.
+        You could easily have a list_hypotheses upstream action that writes to "hypothesis" in state
+        And loop through those
+        This hardcodes for simplicity
+        """
         for hypothesis in [
             "Common cold",
             "Sprained ankle",
@@ -59,6 +71,7 @@ def states(
             yield state.update(hypothesis=hypothesis)
 
     def reduce(self, state: State, states: Generator[State, None, None]) -> State:
+        """Combine the outputs of the parallel action."""
         all_diagnoses_outputs = []
         for _sub_state in states:
             all_diagnoses_outputs.append(
@@ -77,6 +90,7 @@ def writes(self) -> List[str]:
 
 @action(reads=["diagnosis_outputs"], writes=["final_diagnosis"])
 def determine_diagnosis(state: State) -> State:
+    """Action to determine the final diagnosis."""
     # could also get an LLM to decide here, or have a human decide, etc.
     possible_hypotheses = [d for d in state["diagnosis_outputs"] if d["diagnosis"].lower() == "yes"]
     if len(possible_hypotheses) == 1:
@@ -90,6 +104,7 @@ def determine_diagnosis(state: State) -> State:
 
 
 def build_graph() -> core.Graph:
+    """Builds the graph for the application"""
     graph = (
         GraphBuilder()
         .with_actions(
@@ -115,7 +130,17 @@ def build_application(
     tracker,
     use_otel_tracing: bool = False,
 ) -> core.Application:
-    """Builds an application with the given parameters."""
+    """Builds an application with the given parameters.
+
+    :param app_id:
+    :param graph:
+    :param initial_state:
+    :param initial_entrypoint:
+    :param partition_key:
+    :param tracker:
+    :param use_otel_tracing:
+    :return:
+    """
     app_builder = (
         core.ApplicationBuilder()
         .with_graph(graph)
@@ -132,6 +157,13 @@ def build_application(
 def run_my_agent(
     input_audio: str, partition_key: str = None, app_id: str = None, tracking_project: str = None
 ) -> Tuple[str, str]:
+    """Runs the agent with the given input audio (in this case a string transcription...).
+    :param input_audio: we fake it here, and ask for a string...
+    :param partition_key:
+    :param app_id:
+    :param tracking_project:
+    :return:
+    """
     graph = build_graph()
     tracker = None
     if tracking_project:

diff --git a/examples/pytest/test_some_actions.py b/examples/pytest/test_some_actions.py
@@ -158,6 +158,9 @@ def test_run_hypothesis_burr_fixture_e2e_with_tracker(input_state, expected_stat
 
 
 def test_print_results(module_results_df):
+    """This is an example using pytest-harvest to return results to a central location.
+    You could use other plugins, or create your own fixtures (e.g. see conftest.py for a simpler custom fixture).
+    """
     print(module_results_df.columns)
     print(module_results_df.head())
     # compute statistics