Skip to content

Commit

Permalink
Adds more docs to the pytest example code
Browse files Browse the repository at this point in the history
  • Loading branch information
skrawcz committed Dec 27, 2024
1 parent e149616 commit a74e101
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 6 deletions.
8 changes: 6 additions & 2 deletions examples/pytest/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ E.g. we only pass tests if all the outputs are as expected, or we pass if 80% of
also log this to a file, or a database, etc. for further inspection and record keeping, or combining it with
open source frameworks [mlflow](https://mlflow.org) and using their [evaluate functionality](https://mlflow.org/docs/latest/llms/llm-evaluate/index.html).

Note: we can also combine `results_bag` with ``pytest.mark.parametrize`` to run the same test with different inputs and expected outputs:
Note (1): if you want to build your own way to aggregate, you can see another way using a very simple fixture in `conftest.py`.

Note (2) : we can also combine `results_bag` with ``pytest.mark.parametrize`` to run the same test with different inputs and expected outputs:

```python
import pytest
Expand Down Expand Up @@ -298,9 +300,11 @@ def test_an_agent_e2e_with_tracker(input_state, expected_state, results_bag, tra
# An example
Here in this directory we have:

- `some_actions.py` - a file that defines an augmented LLM application (it's not a full agent) with some actions
- `some_actions.py` - a file that defines an augmented LLM application (it's not a full agent) with some actions. See image below - note the hypotheses action runs multiple in parallel.
- `test_some_actions.py` - a file that defines some tests for the actions in `some_actions.py`.

![toy example](diagnosis.png)

You'll see that we use the `results_bag` fixture to log the results of our tests,and then we can access these results
via the `module_results_df` fixture that provides a pandas dataframe of the results. This dataframe is then
saved as a CSV for uploading to google sheets, etc. for further analysis. You will also see uses of `pytest.mark.parametrize`
Expand Down
40 changes: 36 additions & 4 deletions examples/pytest/some_actions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
This is an example module that defines a Burr application.
It hypothetically transcribes audio and then runs a hypothesis on the transcription to determine a medical diagnosis.
"""
from typing import Any, Callable, Dict, Generator, List, Tuple

import openai
Expand All @@ -10,12 +15,14 @@

@action(reads=["audio"], writes=["transcription"])
def transcribe_audio(state: State) -> State:
"""Action to transcribe audio."""
# here we fake transcription. For this example the audio is text already...
return state.update(transcription=state["audio"])


@action(reads=["hypothesis", "transcription"], writes=["diagnosis"])
def run_hypothesis(state: State) -> State:
"""Action to run a hypothesis on a transcription."""
client = openai.Client() # here for simplicity because clients and SERDE don't mix well.
hypothesis = state["hypothesis"]
transcription = state["transcription"]
Expand All @@ -42,15 +49,20 @@ def run_hypothesis(state: State) -> State:


class TestMultipleHypotheses(MapStates):
"""Parallel action to test multiple hypotheses."""

def action(self, state: State, inputs: Dict[str, Any]) -> Action | Callable | RunnableGraph:
"""which action to run for each state."""
return run_hypothesis

def states(
self, state: State, context: ApplicationContext, inputs: Dict[str, Any]
) -> Generator[State, None, None]:
# You could easily have a list_hypotheses upstream action that writes to "hypothesis" in state
# And loop through those
# This hardcodes for simplicity
"""Generate the states to run the action on.
You could easily have a list_hypotheses upstream action that writes to "hypothesis" in state
And loop through those
This hardcodes for simplicity
"""
for hypothesis in [
"Common cold",
"Sprained ankle",
Expand All @@ -59,6 +71,7 @@ def states(
yield state.update(hypothesis=hypothesis)

def reduce(self, state: State, states: Generator[State, None, None]) -> State:
"""Combine the outputs of the parallel action."""
all_diagnoses_outputs = []
for _sub_state in states:
all_diagnoses_outputs.append(
Expand All @@ -77,6 +90,7 @@ def writes(self) -> List[str]:

@action(reads=["diagnosis_outputs"], writes=["final_diagnosis"])
def determine_diagnosis(state: State) -> State:
"""Action to determine the final diagnosis."""
# could also get an LLM to decide here, or have a human decide, etc.
possible_hypotheses = [d for d in state["diagnosis_outputs"] if d["diagnosis"].lower() == "yes"]
if len(possible_hypotheses) == 1:
Expand All @@ -90,6 +104,7 @@ def determine_diagnosis(state: State) -> State:


def build_graph() -> core.Graph:
"""Builds the graph for the application"""
graph = (
GraphBuilder()
.with_actions(
Expand All @@ -115,7 +130,17 @@ def build_application(
tracker,
use_otel_tracing: bool = False,
) -> core.Application:
"""Builds an application with the given parameters."""
"""Builds an application with the given parameters.
:param app_id:
:param graph:
:param initial_state:
:param initial_entrypoint:
:param partition_key:
:param tracker:
:param use_otel_tracing:
:return:
"""
app_builder = (
core.ApplicationBuilder()
.with_graph(graph)
Expand All @@ -132,6 +157,13 @@ def build_application(
def run_my_agent(
input_audio: str, partition_key: str = None, app_id: str = None, tracking_project: str = None
) -> Tuple[str, str]:
"""Runs the agent with the given input audio (in this case a string transcription...).
:param input_audio: we fake it here, and ask for a string...
:param partition_key:
:param app_id:
:param tracking_project:
:return:
"""
graph = build_graph()
tracker = None
if tracking_project:
Expand Down
3 changes: 3 additions & 0 deletions examples/pytest/test_some_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def test_run_hypothesis_burr_fixture_e2e_with_tracker(input_state, expected_stat


def test_print_results(module_results_df):
"""This is an example using pytest-harvest to return results to a central location.
You could use other plugins, or create your own fixtures (e.g. see conftest.py for a simpler custom fixture).
"""
print(module_results_df.columns)
print(module_results_df.head())
# compute statistics
Expand Down

0 comments on commit a74e101

Please sign in to comment.