GoogleCloudPlatform · kmaphoenix · Dec 10, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 9, 2024
diff --git a/src/dfcx_scrapi/tools/evaluations.py b/src/dfcx_scrapi/tools/evaluations.py
@@ -108,6 +108,7 @@ def __init__(
             generation_model=self.generation_model,
             embedding_model=self.embedding_model
             )
+        self.unexpected_rows = []
 
         if debug:
             logging.basicConfig(level=logging.DEBUG, force=True)
@@ -171,33 +172,47 @@ def process_flow_invocations(
 
     @staticmethod
     def process_tool_invocations(
-        tool_responses: List[str],
+        tool_responses: List[Dict],
         index: int,
         row: pd.Series,
-        df: pd.DataFrame) -> pd.DataFrame:
-        # Check if our golden contained a tool_idx or wasn't
-        # expecting tools
+        df: pd.DataFrame,
+    ) -> pd.DataFrame:
+        """Process tool invocations and map them
+        to the correct rows in the dataframe."""
+        # Get the list of indices where tool responses should be mapped
         if row["tool_pair"] in [None, "", "NaN", "nan"]:
             tool_index_list = [index]
         else:
             tool_index_list = literal_eval(row["tool_pair"])
 
-        for idx in tool_index_list:
-            tool = tool_responses.pop(0)
-            df.loc[
-                int(idx),
-                [
+        # Process each tool response and map it to the corresponding index
+        for i, idx in enumerate(tool_index_list):
+            if i < len(tool_responses):
+                tool = tool_responses[i]
+                df.loc[int(idx), "res_tool_name"] = (
+                    tool.get("tool_name", "")
+                )
+                df.loc[int(idx), "res_tool_action"] = (
+                    tool.get("tool_action", "")
+                )
+                df.loc[int(idx), "res_input_params"] = (
+                    str(tool.get("input_params", {}))
+                )
+                df.loc[int(idx), "res_output_params"] = (
+                    str(tool.get("output_params", {}))
+                )
+            else:
+                df.loc[int(idx), [
                     "res_tool_name",
                     "res_tool_action",
                     "res_input_params",
-                    "res_output_params",
-                ],
-            ] = [
-                tool["tool_name"],
-                tool["tool_action"],
-                tool["input_params"],
-                tool["output_params"],
-            ]
+                    "res_output_params"
+                    ]] = [
+                    "NO_TOOL_RESPONSE",
+                    "NO_TOOL_RESPONSE",
+                    "NO_TOOL_RESPONSE",
+                    "NO_TOOL_RESPONSE"
+                ]
 
         return df
 
@@ -403,15 +418,30 @@ def run_detect_intent_queries(self, df: pd.DataFrame) -> pd.DataFrame:
                 text=row["action_input"],
                 parameters=session_parameters
             )
-
             # Add data to the existing row
             df.loc[index, ["session_id", "agent_id"]] = [
                 data["session_id"],
                 data["agent_id"],
             ]
             text_res = self.ar._extract_text(res)
-            utterance_idx = int(row["utterance_pair"])
-            df.loc[utterance_idx, ["agent_response"]] = [text_res]
+
+            # Handle Agent Responses
+            if row["utterance_pair"] != "":
+                utterance_idx = int(row["utterance_pair"])
+                df.loc[utterance_idx, ["agent_response"]] = [text_res]
+
+            else:
+                # collect the data for inserting later
+                self.unexpected_rows.append(
+                    {
+                        "session_id": data["session_id"],
+                        "agent_id": data["agent_id"],
+                        "action_type": "UNEXPECTED Agent Response",
+                        "index": index,
+                        "column": "agent_response",
+                        "data": text_res
+                    }
+                    )
 
             # Handle Playbook Invocations
             playbook_responses = (
@@ -434,13 +464,37 @@ def run_detect_intent_queries(self, df: pd.DataFrame) -> pd.DataFrame:
                 tool_responses = (
                     self.sessions_client.collect_tool_responses(res)
                 )
-                if len(tool_responses) > 0:
+                if tool_responses:  # Only call if not empty
                     df = self.process_tool_invocations(
-                        tool_responses, index, row, df
+                        tool_responses,
+                        index,
+                        row,
+                        df
                     )
 
         return df
 
+    def insert_unexpected_rows(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Insert any unexpected rows collected during runtime."""
+        if self.unexpected_rows:
+            for row in reversed(self.unexpected_rows):
+                index = row["index"]
+                new_row = pd.DataFrame(columns=df.columns, index=[index])
+                new_row["session_id"] = row["session_id"]
+                new_row["agent_id"] = row["agent_id"]
+                new_row["action_type"] = row["action_type"]
+                new_row[row["column"]] = row["data"]
+                df = pd.concat(
+                    [
+                        df.iloc[:index],
+                        new_row,
+                        df.iloc[index:]
+                    ])
+
+        df = df.sort_index()
+
+        return df
+
     def run_evals(self, df: pd.DataFrame) -> pd.DataFrame:
         print("Starting Evals...")
 
@@ -449,9 +503,15 @@ def run_evals(self, df: pd.DataFrame) -> pd.DataFrame:
 
         return df
 
-    def run_query_and_eval(self, df: pd.DataFrame) -> pd.DataFrame:
+    def scrape_results(self, df: pd.DataFrame) -> pd.DataFrame:
         df = self.add_response_columns(df)
         df = self.run_detect_intent_queries(df)
+        df = self.insert_unexpected_rows(df)
+
+        return df
+
+    def run_query_and_eval(self, df: pd.DataFrame) -> pd.DataFrame:
+        df = self.scrape_results(df)
         df = self.run_evals(df)
         df = self.clean_outputs(df)
 
@@ -602,27 +662,37 @@ def get_model_name(settings: types.GenerativeSettings) -> str:
 
         return model_map.get(model_name, "")
 
-
     def pair_tool_calls(self, df: pd.DataFrame) -> pd.DataFrame:
-        "Identifies pairings of agent_utterance/tool_invocation by eval_id."
+        """Pairs user utterances with indices of relevant tool invocations."""
+
         df["tool_pair"] = pd.Series(dtype="string")
         grouped = df.groupby("eval_id")
 
         for _, group in grouped:
-            user = group[
-                group["action_type"] == "User Utterance"
-            ].index.tolist()
-            tool_list = group[
-                group["action_type"] == "Tool Invocation"
-            ].index.tolist()
-
-            pairs = self.get_matching_list_idx(
-                user, tool_list
-            )
-
-            # Create pairs of user/tool_list row indices
-            for pair in pairs:
-                df.loc[pair[0], "tool_pair"] = str(pair[1])
+            tool_indices = []
+            last_user_utterance_index = None
+
+            for index, row in group.iterrows():
+                if row["action_type"] == "User Utterance":
+                    # Assign accumulated tool indices to
+                    # the *previous* user utterance (if any)
+                    if last_user_utterance_index is not None:
+                        df.loc[last_user_utterance_index, "tool_pair"] = (
+                            str(tool_indices)
+                        )
+                    # Reset for the current user utterance:
+                    tool_indices = []
+                    last_user_utterance_index = index
+
+                elif row["action_type"] == "Tool Invocation":
+                    tool_indices.append(index)
+
+            # After processing the group, assign any remaining
+            # tool indices to the last user utterance
+            if last_user_utterance_index is not None and tool_indices:
+                df.loc[last_user_utterance_index, "tool_pair"] = (
+                    str(tool_indices)
+                )
 
         return df