Update gsq component to azure-ai-evaluation sdk

Azure · Dec 4, 2024 · 0938c08 · 0938c08
1 parent a3874d5
commit 0938c08
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 28 deletions.
diff --git a/...el_monitoring/components/generation_safety_quality/annotation_compute_histogram/spec.yaml b/...el_monitoring/components/generation_safety_quality/annotation_compute_histogram/spec.yaml
@@ -113,16 +113,16 @@ conf:
     dependencies:
       - python=3.10
       - pip:
-          - azure-cli-core~=2.62.0
-          - promptflow-evals==0.3.1
-          - openai~=1.11.1
+          - azure-cli-core~=2.66.0
+          - azure-ai-evaluation~=1.0.1
+          - azure-ai-ml~=1.22.4
+          - openai~=1.56.2
           - json5==0.9.11
           - mltable~=1.6.1
-          - promptflow[azure]~=1.13.0
-          - promptflow-tools~=1.4.0
           - keyrings.alt~=5.0.0
           - azureml-mlflow~=1.56.0
-          - mlflow~=2.14.3
+          - protobuf<5.29.0
+          - mlflow~=2.17.2
           - azureml-fsspec~=1.3.1
           - fsspec~=2023.4.0
           - pyopenssl<23.0.0

diff --git a/...odel_monitoring/components/generation_safety_quality/annotation_compute_metrics/spec.yaml b/...odel_monitoring/components/generation_safety_quality/annotation_compute_metrics/spec.yaml
@@ -69,15 +69,15 @@ conf:
     dependencies:
       - python=3.10
       - pip:
-        - azure-cli-core~=2.62.0
-        - promptflow-evals==0.3.1
-        - openai~=1.11.1
+        - azure-cli-core~=2.66.0
+        - azure-ai-evaluation~=1.0.1
+        - azure-ai-ml~=1.22.4
+        - openai~=1.56.2
         - json5==0.9.11
         - mltable~=1.6.1
-        - promptflow[azure]~=1.13.0
-        - promptflow-tools~=1.4.0
         - keyrings.alt~=5.0.0
-        - mlflow~=2.14.3
+        - mlflow~=2.17.2
+        - protobuf<5.29.0
         - azureml-fsspec~=1.3.1
         - fsspec~=2023.4.0
     name: momo-gsq-spark

diff --git a/...l_monitoring/components/src/generation_safety_quality/annotation_compute_histogram/run.py b/...l_monitoring/components/src/generation_safety_quality/annotation_compute_histogram/run.py
@@ -21,9 +21,9 @@
 import uuid
 
 import pandas as pd
-from promptflow.core import AzureOpenAIModelConfiguration
-from promptflow.evals.evaluate import evaluate
-from promptflow.evals.evaluators import (
+from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration
+from azure.ai.evaluation import (
+    evaluate,
     CoherenceEvaluator,
     FluencyEvaluator,
     GroundednessEvaluator,
@@ -49,6 +49,9 @@
 COMPLETION = "completion"
 CONTEXT = "context"
 GROUND_TRUTH = "ground_truth"
+QUERY = "query"
+RESPONSE = "response"
+COLUMN_MAPPING = "column_mapping"
 CORRELATION_ID = "correlationid"
 TRACE_ID = "trace_id"
 ROOT_SPAN = "root_span"
@@ -514,7 +517,7 @@ def annotate_batch(iterator):
                 input_columns.append(GROUND_TRUTH)
             passthrough_cols = get_passthrough_cols(batch)
             for index, row in batch.iterrows():
-                qca = {PROMPT: row[PROMPT], COMPLETION: row[COMPLETION]}
+                qca = {QUERY: row[PROMPT], RESPONSE: row[COMPLETION]}
                 if has_context:
                     qca[CONTEXT] = row[CONTEXT]
                 if has_ground_truth:
@@ -543,14 +546,16 @@ def annotate_batch(iterator):
                 evaluators[metric_name_compact] = evaluator(
                     model_config=model_config)
                 evaluator_config[metric_name_compact] = {
-                    "answer": format_data_column(COMPLETION),
-                    "question": format_data_column(PROMPT)
+                    COLUMN_MAPPING: {
+                        RESPONSE: format_data_column(RESPONSE),
+                        QUERY: format_data_column(QUERY)
+                    }
                 }
                 config = evaluator_config[metric_name_compact]
                 if has_context:
-                    config["context"] = format_data_column(CONTEXT)
+                    config[COLUMN_MAPPING][CONTEXT] = format_data_column(CONTEXT)
                 if has_ground_truth:
-                    config["ground_truth"] = format_data_column(GROUND_TRUTH)
+                    config[COLUMN_MAPPING][GROUND_TRUTH] = format_data_column(GROUND_TRUTH)
             # write rows to jsonl file
             input_file_name = "eval_input_" + str(uuid.uuid4()) + ".jsonl"
             input_file_path = os.path.join(input_dir.name, input_file_name)
@@ -590,7 +595,12 @@ def annotate_batch(iterator):
                     tabular_result.rename(columns={result_name: metric_name_compact}, inplace=True)
                 for column_name in input_columns:
                     # input column names follow schema like "inputs.context"
-                    result_name = "inputs." + column_name
+                    if column_name == PROMPT:
+                        result_name = "inputs." + QUERY
+                    elif column_name == COMPLETION:
+                        result_name = "inputs." + RESPONSE
+                    else:
+                        result_name = "inputs." + column_name
                     tabular_result.rename(columns={result_name: column_name}, inplace=True)
             except KeyError as e:
                 # raise new user error with more context

diff --git a/assets/model_monitoring/components/tests/gsq-requirements.txt b/assets/model_monitoring/components/tests/gsq-requirements.txt
@@ -1,15 +1,16 @@
-azure-ai-ml==1.13.0
-promptflow-evals==0.3.1
-azure-cli-core~=2.62.0
-azure-identity~=1.12.0
+azure-ai-ml~=1.22.4
+azure-ai-evaluation~=1.0.1
+azure-cli-core~=2.66.0
+azure-identity~=1.19.0
 # apparently this is still needed for some azureml paths, see:
 # assets/model_monitoring/components/src/model_data_collector_preprocessor/store_url.py
-azureml-core~=1.56.0
-mlflow~=2.14.3
+azureml-core~=1.58.0
+mlflow~=2.17.2
+protobuf<5.29.0
 fsspec~=2023.4.0
 azureml-fsspec==1.3.1
 mltable~=1.6.1
-openai~=1.12.0
+openai~=1.56.2
 pandas~=2.0.3
 ruamel.yaml==0.17.21
 scipy~=1.10.0