Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix/evaluations multiple tool pairing and empty utterance pairing #267

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 109 additions & 39 deletions src/dfcx_scrapi/tools/evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def __init__(
generation_model=self.generation_model,
embedding_model=self.embedding_model
)
self.unexpected_rows = []

if debug:
logging.basicConfig(level=logging.DEBUG, force=True)
Expand Down Expand Up @@ -171,33 +172,47 @@ def process_flow_invocations(

@staticmethod
def process_tool_invocations(
tool_responses: List[str],
tool_responses: List[Dict],
index: int,
row: pd.Series,
df: pd.DataFrame) -> pd.DataFrame:
# Check if our golden contained a tool_idx or wasn't
# expecting tools
df: pd.DataFrame,
) -> pd.DataFrame:
"""Process tool invocations and map them
to the correct rows in the dataframe."""
# Get the list of indices where tool responses should be mapped
if row["tool_pair"] in [None, "", "NaN", "nan"]:
tool_index_list = [index]
else:
tool_index_list = literal_eval(row["tool_pair"])

for idx in tool_index_list:
tool = tool_responses.pop(0)
df.loc[
int(idx),
[
# Process each tool response and map it to the corresponding index
for i, idx in enumerate(tool_index_list):
if i < len(tool_responses):
tool = tool_responses[i]
df.loc[int(idx), "res_tool_name"] = (
tool.get("tool_name", "")
)
df.loc[int(idx), "res_tool_action"] = (
tool.get("tool_action", "")
)
df.loc[int(idx), "res_input_params"] = (
str(tool.get("input_params", {}))
)
df.loc[int(idx), "res_output_params"] = (
str(tool.get("output_params", {}))
)
else:
df.loc[int(idx), [
"res_tool_name",
"res_tool_action",
"res_input_params",
"res_output_params",
],
] = [
tool["tool_name"],
tool["tool_action"],
tool["input_params"],
tool["output_params"],
]
"res_output_params"
]] = [
"NO_TOOL_RESPONSE",
"NO_TOOL_RESPONSE",
"NO_TOOL_RESPONSE",
"NO_TOOL_RESPONSE"
]

return df

Expand Down Expand Up @@ -403,15 +418,30 @@ def run_detect_intent_queries(self, df: pd.DataFrame) -> pd.DataFrame:
text=row["action_input"],
parameters=session_parameters
)

# Add data to the existing row
df.loc[index, ["session_id", "agent_id"]] = [
data["session_id"],
data["agent_id"],
]
text_res = self.ar._extract_text(res)
utterance_idx = int(row["utterance_pair"])
df.loc[utterance_idx, ["agent_response"]] = [text_res]

# Handle Agent Responses
if row["utterance_pair"] != "":
utterance_idx = int(row["utterance_pair"])
df.loc[utterance_idx, ["agent_response"]] = [text_res]

else:
# collect the data for inserting later
self.unexpected_rows.append(
{
"session_id": data["session_id"],
"agent_id": data["agent_id"],
"action_type": "UNEXPECTED Agent Response",
"index": index,
"column": "agent_response",
"data": text_res
}
)

# Handle Playbook Invocations
playbook_responses = (
Expand All @@ -434,13 +464,37 @@ def run_detect_intent_queries(self, df: pd.DataFrame) -> pd.DataFrame:
tool_responses = (
self.sessions_client.collect_tool_responses(res)
)
if len(tool_responses) > 0:
if tool_responses: # Only call if not empty
df = self.process_tool_invocations(
tool_responses, index, row, df
tool_responses,
index,
row,
df
)

return df

def insert_unexpected_rows(self, df: pd.DataFrame) -> pd.DataFrame:
"""Insert any unexpected rows collected during runtime."""
if self.unexpected_rows:
for row in reversed(self.unexpected_rows):
index = row["index"]
new_row = pd.DataFrame(columns=df.columns, index=[index])
new_row["session_id"] = row["session_id"]
new_row["agent_id"] = row["agent_id"]
new_row["action_type"] = row["action_type"]
new_row[row["column"]] = row["data"]
df = pd.concat(
[
df.iloc[:index],
new_row,
df.iloc[index:]
])

df = df.sort_index()

return df

def run_evals(self, df: pd.DataFrame) -> pd.DataFrame:
print("Starting Evals...")

Expand All @@ -449,9 +503,15 @@ def run_evals(self, df: pd.DataFrame) -> pd.DataFrame:

return df

def run_query_and_eval(self, df: pd.DataFrame) -> pd.DataFrame:
def scrape_results(self, df: pd.DataFrame) -> pd.DataFrame:
df = self.add_response_columns(df)
df = self.run_detect_intent_queries(df)
df = self.insert_unexpected_rows(df)

return df

def run_query_and_eval(self, df: pd.DataFrame) -> pd.DataFrame:
df = self.scrape_results(df)
df = self.run_evals(df)
df = self.clean_outputs(df)

Expand Down Expand Up @@ -602,27 +662,37 @@ def get_model_name(settings: types.GenerativeSettings) -> str:

return model_map.get(model_name, "")


def pair_tool_calls(self, df: pd.DataFrame) -> pd.DataFrame:
"Identifies pairings of agent_utterance/tool_invocation by eval_id."
"""Pairs user utterances with indices of relevant tool invocations."""

df["tool_pair"] = pd.Series(dtype="string")
grouped = df.groupby("eval_id")

for _, group in grouped:
user = group[
group["action_type"] == "User Utterance"
].index.tolist()
tool_list = group[
group["action_type"] == "Tool Invocation"
].index.tolist()

pairs = self.get_matching_list_idx(
user, tool_list
)

# Create pairs of user/tool_list row indices
for pair in pairs:
df.loc[pair[0], "tool_pair"] = str(pair[1])
tool_indices = []
last_user_utterance_index = None

for index, row in group.iterrows():
if row["action_type"] == "User Utterance":
# Assign accumulated tool indices to
# the *previous* user utterance (if any)
if last_user_utterance_index is not None:
df.loc[last_user_utterance_index, "tool_pair"] = (
str(tool_indices)
)
# Reset for the current user utterance:
tool_indices = []
last_user_utterance_index = index

elif row["action_type"] == "Tool Invocation":
tool_indices.append(index)

# After processing the group, assign any remaining
# tool indices to the last user utterance
if last_user_utterance_index is not None and tool_indices:
df.loc[last_user_utterance_index, "tool_pair"] = (
str(tool_indices)
)

return df

Expand Down
Loading