Skip to content

Commit

Permalink
Merge pull request #116 from Azure-Samples/evaluations
Browse files Browse the repository at this point in the history
adding evaluate folder.
  • Loading branch information
marlenezw authored Aug 13, 2024
2 parents 65572c9 + 7d4ebe3 commit 9ca0288
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 0 deletions.
Empty file.
3 changes: 3 additions & 0 deletions src/api/evaluate/eval_inputs.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"research_context": "Can you find the latest camping trends and what folks are doing in the winter?", "product_context": "Can you use a selection of tents and sleeping bags as context?", "assignment_context": "Write a fun and engaging article that includes the research and product information. The article should be between 800 and 1000 words."}
{"research_context": "Can you find the latest trends in hiking shoes?" , "product_context":"Can you use a selection of hiking shoes as context?", "assignment_context": "Write an article about the best kind of hiking shoes"}
{"research_context": "Find information about the best snow camping spots in the world","product_context":"Can you use a selection of tents that are good for snow as context?", "assignment_context": "Write an article about the best kind of tents for snow camping"}
150 changes: 150 additions & 0 deletions src/api/evaluate/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# https://github.com/Azure-Samples/contoso-chat/blob/may-2024-updates/evaluations/evaluate-chat-flow-sdk.ipynb
import os
import sys
import json
import concurrent.futures
from pathlib import Path
from datetime import datetime
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluate import evaluate
from evaluate.evaluators import ArticleEvaluator
from orchestrator import create

from dotenv import load_dotenv

load_dotenv()
folder = Path(__file__).parent.absolute().as_posix()

# # Add the api directory to the sys.path
# sys.path.append(os.path.abspath('../src/api'))

def evaluate_aistudio(model_config, data_path):
# create unique id for each run with date and time
run_prefix = datetime.now().strftime("%Y%m%d%H%M%S")
run_id = f"{run_prefix}_chat_evaluation_sdk"
print(run_id)

result = evaluate(
evaluation_name=run_id,
data=data_path,
evaluators={
"article": ArticleEvaluator(model_config),
},
evaluator_config={
"defaults": {
"query": "${data.query}",
"response": "${data.response}",
"context": "${data.context}",
},
},
)
return result

def evaluate_data(model_config, data_path):
writer_evaluator = ArticleEvaluator(model_config)

data = []
with open(data_path) as f:
for line in f:
data.append(json.loads(line))

results = []
for row in data:
result = writer_evaluator(query=row["query"], context=row["context"], response=row["response"])
print("Evaluation results: ", result)
results.append(result)

return results

def run_orchestrator(research_context, product_context, assignment_context):
query = {"research_context": research_context, "product_context": product_context, "assignment_context": assignment_context}
context = {}
response = None

for result in create(research_context, product_context, assignment_context):
if result[0] == "researcher":
context['research'] = result[1]
if result[0] == "products":
context['products'] = result[1]
if result[0] == "writer":
response = result[1]

return {
"query": json.dumps(query),
"context": json.dumps(context),
"response": json.dumps(response),
}

def evaluate_orchestrator(model_config, data_path):
writer_evaluator = ArticleEvaluator(model_config)

data = []
with open(data_path) as f:
for line in f:
data.append(json.loads(line))

eval_data = []
eval_results = []

results = []
futures = []
def evaluate_row(research_context, product_context, assignment_context):
result = { "research_context": research_context }
print("Running orchestrator...")
eval_data = run_orchestrator(research_context, product_context, assignment_context)
print("Evaluating results...")
eval_result = writer_evaluator(query=eval_data["query"], context=eval_data["context"], response=eval_data["response"])
result.update(eval_result)
print("Evaluation results: ", eval_result)
eval_results.append(result)

with concurrent.futures.ThreadPoolExecutor() as executor:
for row in data:
futures.append(executor.submit(evaluate_row, row["research_context"], row["product_context"], row["assignment_context"]))
for future in futures:
results.append(future.result())

# write out eval data to a file so we can re-run evaluation on it
with jsonlines.open(folder + '/eval_data.jsonl', 'w') as writer:
for row in eval_data:
writer.write(row)

import pandas as pd

print("Evaluation summary:\n")
results_df = pd.DataFrame.from_dict(eval_results)
print(results_df)

mean_df = results_df.drop("research_context", axis=1).mean()
print("\nAverage scores:")
print(mean_df)

results_df.to_markdown(folder + '/eval_results.md')
with open(folder + '/eval_results.md', 'a') as file:
file.write("\n\nAverages scores:\n\n")
mean_df.to_markdown(folder + '/eval_results.md', 'a')

with jsonlines.open(folder + '/eval_results.jsonl', 'w') as writer:
writer.write(eval_results)

return eval_results

if __name__ == "__main__":
import time
import jsonlines

# Initialize Azure OpenAI Connection
model_config = AzureOpenAIModelConfiguration(
azure_deployment=os.environ["AZURE_OPENAI_4_EVAL_DEPLOYMENT_NAME"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_NAME')}.cognitiveservices.azure.com/"
)

start=time.time()
print(f"Starting evaluate...")
print(os.environ["BING_SEARCH_ENDPOINT"])
print("value: ", os.environ["BING_SEARCH_KEY"], len(os.environ["BING_SEARCH_KEY"]))
eval_result = evaluate_orchestrator(model_config, data_path=folder +"/eval_inputs.jsonl")

end=time.time()
print(f"Finished evaluate in {end - start}s")
66 changes: 66 additions & 0 deletions src/api/evaluate/evaluators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import json
from threading import Thread
from opentelemetry import trace
from opentelemetry.trace import set_span_in_context
from promptflow.core import AzureOpenAIModelConfiguration
from promptflow.evals.evaluators import RelevanceEvaluator, GroundednessEvaluator, FluencyEvaluator, CoherenceEvaluator


class ArticleEvaluator:
def __init__(self, model_config):
self.evaluators = [
RelevanceEvaluator(model_config),
FluencyEvaluator(model_config),
CoherenceEvaluator(model_config),
GroundednessEvaluator(model_config),
]

def __call__(self, *, query: str, context: str, response: str, **kwargs):
output = {}
for evaluator in self.evaluators:
result = evaluator(
question=query,
context=context,
answer=response,
)
output.update(result)
return output

def evaluate_article(data, trace_context):
print("starting offline evals")

tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("run_evaluators", context=trace_context) as span:
span.set_attribute("inputs", json.dumps(data))
configuration = AzureOpenAIModelConfiguration(
azure_deployment=os.environ["AZURE_OPENAI_4_EVAL_DEPLOYMENT_NAME"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
azure_endpoint=f"https://{os.getenv('AZURE_OPENAI_NAME')}.cognitiveservices.azure.com/"
)
evaluator = ArticleEvaluator(configuration)
results = evaluator(query=data['query'], context=data['context'], response=data['response'])
resultsJson = json.dumps(results)
span.set_attribute("output", resultsJson)

print("results: ", resultsJson)

def evaluate_article_in_background(research_context, product_context, assignment_context, research, products, article):
eval_data = {
"query": json.dumps({
"research_context": research_context,
"product_context": product_context,
"assignment_context": assignment_context
}),
"context": json.dumps({
"research": research,
"products": products,
}),
"response": json.dumps(article)
}

# propagate trace context to the new thread
span = trace.get_current_span()
trace_context = set_span_in_context(span)
thread = Thread(target=evaluate_article, args=(eval_data, trace_context,))
thread.start()

0 comments on commit 9ca0288

Please sign in to comment.