From 3b2500f1c51b40dfe284e81e81871c3f59766bbb Mon Sep 17 00:00:00 2001
From: Stefan Krawczyk <stefan@dagworks.io>
Date: Fri, 13 Dec 2024 10:36:05 -0800
Subject: [PATCH] Add file for parameterization for pytest example

---
 examples/pytest/hypotheses_test_cases.json | 35 ++++++++++++++++++
 examples/pytest/test_some_actions.py       | 41 ++++++++++++++++++++--
 2 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 examples/pytest/hypotheses_test_cases.json

diff --git a/examples/pytest/hypotheses_test_cases.json b/examples/pytest/hypotheses_test_cases.json
new file mode 100644
index 00000000..cfedf631
--- /dev/null
+++ b/examples/pytest/hypotheses_test_cases.json
@@ -0,0 +1,35 @@
+[
+  {
+    "action": "run_hypothesis",
+    "name": "run_hypothesis_0",
+    "input_state": {
+      "transcription": "Patient exhibits mucus dripping from nostrils and coughing.",
+      "hypothesis": "Common cold"
+    },
+    "expected_state": {
+      "diagnosis": "yes"
+    }
+  },
+  {
+    "action": "run_hypothesis",
+    "name": "run_hypothesis_1",
+    "input_state": {
+      "transcription": "Patient has a limp and is unable to flex right ankle. Ankle is swollen.",
+      "hypothesis": "Sprained ankle"
+    },
+    "expected_state": {
+      "diagnosis": "yes"
+    }
+  },
+  {
+    "action": "run_hypothesis",
+    "name": "run_hypothesis_2",
+    "input_state": {
+      "transcription": "Patient fell off and landed on their right arm. Their right wrist is swollen, they can still move their fingers, and there is only minor pain or discomfort when the wrist is moved or touched.",
+      "hypothesis": "Broken arm"
+    },
+    "expected_state": {
+      "diagnosis": "no"
+    }
+  }
+]
diff --git a/examples/pytest/test_some_actions.py b/examples/pytest/test_some_actions.py
index f32b881f..903724b1 100644
--- a/examples/pytest/test_some_actions.py
+++ b/examples/pytest/test_some_actions.py
@@ -72,6 +72,7 @@ def test_run_hypothesis(results_bag):
 def test_run_hypothesis_parameterized(input, hypothesis, expected, results_bag):
     """Example showing how to parameterize this."""
     results_bag.input = input
+    results_bag.hypothesis = hypothesis
     results_bag.expected = expected
     results_bag.test_function = "test_run_hypothesis_parameterized"
     input_state = state.State({"hypothesis": hypothesis, "transcription": input})
@@ -85,8 +86,33 @@ def test_run_hypothesis_parameterized(input, hypothesis, expected, results_bag):
     assert end_state["diagnosis"] != ""
 
 
-def test_run_hypothesis_burr_fixture(input, hypothesis, expected, results_bag):
-    """This example shows how to scale parameterized with a file of inputs and expected outputs."""
+# the following is required to run file based parameterized tests
+from burr.testing import pytest_generate_tests  # noqa: F401
+
+
+@pytest.mark.file_name(
+    "hypotheses_test_cases.json"
+)  # our fixture file with the expected inputs and outputs
+def test_run_hypothesis_burr_fixture(input_state, expected_state, results_bag):
+    """This example shows how to scale parameterized with a file of inputs and expected outputs using Burr's."""
+    input_state = state.State.deserialize(input_state)
+    expected_state = state.State.deserialize(expected_state)
+    results_bag.input = input_state["transcription"]
+    results_bag.hypothesis = input_state["hypothesis"]
+    results_bag.expected = expected_state["diagnosis"]
+    results_bag.test_function = "test_run_hypothesis_parameterized"
+    input_state = state.State(
+        {"hypothesis": input_state["hypothesis"], "transcription": input_state["transcription"]}
+    )
+    end_state = some_actions.run_hypothesis(input_state)
+    results_bag.actual = end_state["diagnosis"]
+    results_bag.exact_match = end_state["diagnosis"].lower() == expected_state["diagnosis"]
+    print(results_bag)
+    # results_bag.jaccard = ... # other measures here
+    # e.g. LLM as judge if applicable
+    # place asserts at end
+    assert end_state["diagnosis"] is not None
+    assert end_state["diagnosis"] != ""
 
 
 def test_print_results(module_results_df):
@@ -100,7 +126,16 @@ def test_print_results(module_results_df):
     accuracy = sum(tests_of_interest["exact_match"]) / len(tests_of_interest)
     # save to CSV
     tests_of_interest[
-        ["test_function", "duration_ms", "status", "input", "expected", "actual", "exact_match"]
+        [
+            "test_function",
+            "duration_ms",
+            "status",
+            "input",
+            "hypothesis",
+            "expected",
+            "actual",
+            "exact_match",
+        ]
     ].to_csv("results.csv", index=True, quoting=1)
     # upload to google sheets or other storage, etc.