Skip to content

Commit

Permalink
Merge branch 'main' into users/ninhu/prompty_evaluators_to_async
Browse files Browse the repository at this point in the history
  • Loading branch information
ninghu committed Jul 18, 2024
2 parents 3afd662 + 7362dc1 commit b0d6159
Show file tree
Hide file tree
Showing 37 changed files with 779 additions and 750 deletions.
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Please add an informative description that covers that changes made by the pull
# All Promptflow Contribution checklist:
- [ ] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.**
- [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **I have read the [contribution guidelines](https://github.com/microsoft/promptflow/blob/main/CONTRIBUTING.md).**
- [ ] **I confirm that all new dependencies are compatible with the MIT license.**
- [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).**

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/promptflow-evals-e2e-test-local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ jobs:
- name: install test dependency group
run: poetry install --only test
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install recording
run: poetry run pip install -e ../promptflow-recording
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install promptflow packages in editable mode
run: |
poetry run pip install -e ../promptflow
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/promptflow-evals-unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
- name: run unit tests
id: run_unit_tests
run: |
poetry run pytest -m unittest --cov=promptflow --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report=xml --cov-fail-under=63
poetry run pytest -m unittest --cov=promptflow --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report=xml --cov-fail-under=58
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: upload coverage report
uses: actions/upload-artifact@v4
Expand Down
1 change: 0 additions & 1 deletion scripts/code_qa/assert_local_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class TestPackagesNotInstalles():
@pytest.mark.parametrize('package', [
'promptflow.azure',
'azure.ai.ml',
'azure.identity',
'azure.storage.blob'
])
def test_promptflow_azure(self, package):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from concurrent.futures import ThreadPoolExecutor, as_completed

from ._hate_unfairness import HateUnfairnessEvaluator
from ._self_harm import SelfHarmEvaluator
from ._sexual import SexualEvaluator
from ._violence import ViolenceEvaluator
try:
from ._hate_unfairness import HateUnfairnessEvaluator
from ._self_harm import SelfHarmEvaluator
from ._sexual import SexualEvaluator
from ._violence import ViolenceEvaluator
except ImportError:
from _hate_unfairness import HateUnfairnessEvaluator
from _self_harm import SelfHarmEvaluator
from _sexual import SexualEvaluator
from _violence import ViolenceEvaluator


class ContentSafetyEvaluator:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, List

import numpy as np

from ._hate_unfairness import HateUnfairnessEvaluator
from ._self_harm import SelfHarmEvaluator
from ._sexual import SexualEvaluator
from ._violence import ViolenceEvaluator
try:
from ._hate_unfairness import HateUnfairnessEvaluator
from ._self_harm import SelfHarmEvaluator
from ._sexual import SexualEvaluator
from ._violence import ViolenceEvaluator
except ImportError:
from _hate_unfairness import HateUnfairnessEvaluator
from _self_harm import SelfHarmEvaluator
from _sexual import SexualEvaluator
from _violence import ViolenceEvaluator


logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from abc import ABC

try:
from .common.constants import EvaluationMetrics
from .common.evaluate_with_rai_service import evaluate_with_rai_service
from .common.validate_inputs import validate_inputs
except ImportError:
from common.constants import EvaluationMetrics
from common.evaluate_with_rai_service import evaluate_with_rai_service
from common.validate_inputs import validate_inputs


class ContentSafetySubEvaluatorBase(ABC):
"""
Initialize a evaluator for a specified Evaluation Metric. Base class that is not
meant to be instantiated by users.
:param metric: The metric to be evaluated.
:type metric: ~promptflow.evals.evaluators._content_safety.flow.constants.EvaluationMetrics
:param project_scope: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type project_scope: dict
:param credential: The credential for connecting to Azure AI project.
:type credential: TokenCredential
"""

def __init__(self, metric: EvaluationMetrics, project_scope: dict, credential=None):
self._metric = metric
self._project_scope = project_scope
self._credential = credential

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Evaluates content according to this evaluator's metric.
:param question: The question to be evaluated.
:type question: str
:param answer: The answer to be evaluated.
:type answer: str
:return: The evaluation score.
:rtype: dict
"""
# Validate inputs
# Raises value error if failed, so execution alone signifies success.
_ = validate_inputs(question=question, answer=answer)

# question: str, answer: str, metric_name: str, project_scope: dict, credential: TokenCredential
# Run score computation based on supplied metric.
result = evaluate_with_rai_service(
metric_name=self._metric,
question=question,
answer=answer,
project_scope=self._project_scope,
credential=self._credential,
)
return result
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from pathlib import Path

from promptflow.client import load_flow

from .flow.constants import EvaluationMetrics


class HateUnfairnessEvaluator:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
try:
from .common.constants import EvaluationMetrics
from ._content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase
except ImportError:
from common.constants import EvaluationMetrics
from _content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase


class HateUnfairnessEvaluator(ContentSafetySubEvaluatorBase):
"""
Initialize a hate-unfairness evaluator for hate unfairness score.
Expand Down Expand Up @@ -39,13 +43,13 @@ class HateUnfairnessEvaluator:
"""

def __init__(self, project_scope: dict, credential=None):
self._project_scope = project_scope
self._credential = credential

# Load the flow as function
current_dir = Path(__file__).resolve().parent
flow_dir = current_dir / "flow"
self._flow = load_flow(source=flow_dir)
# Hate_fairness is the actual backend metric name. Which, uh, doesn't sound great.
# so invert the name.
super().__init__(
metric=EvaluationMetrics.HATE_FAIRNESS,
project_scope=project_scope,
credential=credential,
)

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Expand All @@ -58,14 +62,4 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:return: The hate unfairness score.
:rtype: dict
"""

# Run the evaluation flow
output = self._flow(
metric_name=EvaluationMetrics.HATE_FAIRNESS,
question=question,
answer=answer,
project_scope=self._project_scope,
credential=self._credential,
)

return output["result"]
return super().__call__(question=question, answer=answer, **kwargs)
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from pathlib import Path

from promptflow.client import load_flow

from .flow.constants import EvaluationMetrics


class SelfHarmEvaluator:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
try:
from .common.constants import EvaluationMetrics
from ._content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase
except ImportError:
from common.constants import EvaluationMetrics
from _content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase


class SelfHarmEvaluator(ContentSafetySubEvaluatorBase):
"""
Initialize a self harm evaluator for self harm score.
Expand Down Expand Up @@ -39,13 +43,11 @@ class SelfHarmEvaluator:
"""

def __init__(self, project_scope: dict, credential=None):
self._project_scope = project_scope
self._credential = credential

# Load the flow as function
current_dir = Path(__file__).resolve().parent
flow_dir = current_dir / "flow"
self._flow = load_flow(source=flow_dir)
super().__init__(
metric=EvaluationMetrics.SELF_HARM,
project_scope=project_scope,
credential=credential,
)

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Expand All @@ -59,13 +61,4 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:rtype: dict
"""

# Run the evaluation flow
output = self._flow(
metric_name=EvaluationMetrics.SELF_HARM,
question=question,
answer=answer,
project_scope=self._project_scope,
credential=self._credential,
)

return output["result"]
return super().__call__(question=question, answer=answer, **kwargs)
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from pathlib import Path

from promptflow.client import load_flow

from .flow.constants import EvaluationMetrics


class SexualEvaluator:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
try:
from .common.constants import EvaluationMetrics
from ._content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase
except ImportError:
from common.constants import EvaluationMetrics
from _content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase


class SexualEvaluator(ContentSafetySubEvaluatorBase):
"""
Initialize a sexual evaluator for sexual score.
Expand Down Expand Up @@ -39,13 +43,11 @@ class SexualEvaluator:
"""

def __init__(self, project_scope: dict, credential=None):
self._project_scope = project_scope
self._credential = credential

# Load the flow as function
current_dir = Path(__file__).resolve().parent
flow_dir = current_dir / "flow"
self._flow = load_flow(source=flow_dir)
super().__init__(
metric=EvaluationMetrics.SEXUAL,
project_scope=project_scope,
credential=credential,
)

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Expand All @@ -58,14 +60,4 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:return: The sexual score.
:rtype: dict
"""

# Run the evaluation flow
output = self._flow(
metric_name=EvaluationMetrics.SEXUAL,
question=question,
answer=answer,
project_scope=self._project_scope,
credential=self._credential,
)

return output["result"]
return super().__call__(question=question, answer=answer, **kwargs)
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from pathlib import Path

from promptflow.client import load_flow

from .flow.constants import EvaluationMetrics


class ViolenceEvaluator:
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
try:
from .common.constants import EvaluationMetrics
from ._content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase
except ImportError:
from common.constants import EvaluationMetrics
from _content_safety_sub_evaluator_base import ContentSafetySubEvaluatorBase


class ViolenceEvaluator(ContentSafetySubEvaluatorBase):
"""
Initialize a violence evaluator for violence score.
Expand Down Expand Up @@ -39,13 +43,11 @@ class ViolenceEvaluator:
"""

def __init__(self, project_scope: dict, credential=None):
self._project_scope = project_scope
self._credential = credential

# Load the flow as function
current_dir = Path(__file__).resolve().parent
flow_dir = current_dir / "flow"
self._flow = load_flow(source=flow_dir)
super().__init__(
metric=EvaluationMetrics.VIOLENCE,
project_scope=project_scope,
credential=credential,
)

def __call__(self, *, question: str, answer: str, **kwargs):
"""
Expand All @@ -58,14 +60,4 @@ def __call__(self, *, question: str, answer: str, **kwargs):
:return: The violence score.
:rtype: dict
"""

# Run the evaluation flow
output = self._flow(
metric_name=EvaluationMetrics.VIOLENCE,
question=question,
answer=answer,
project_scope=self._project_scope,
credential=self._credential,
)

return output["result"]
return super().__call__(question=question, answer=answer, **kwargs)
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------

from . import constants, evaluate_with_rai_service, validate_inputs, utils

__all__ = [
"constants",
"evaluate_with_rai_service",
"validate_inputs",
"utils",
]
Loading

0 comments on commit b0d6159

Please sign in to comment.