Skip to content

Make evaluation run a context manager instead of a singleton. #41

Make evaluation run a context manager instead of a singleton.

Make evaluation run a context manager instead of a singleton. #41

name: promptflow-evals-regression-test
on:
schedule:
- cron: "40 10 * * *" # 2:40 PST every day
pull_request:
paths:
- src/promptflow-evals/**
- .github/workflows/promptflow-evals-regression-test.yml
workflow_dispatch:
env:
IS_IN_CI_PIPELINE: "true"
WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
PROMPT_FLOW_TEST_MODE: "live"
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: snok/install-poetry@v1
- name: build
run: poetry build
working-directory: ${{ env.WORKING_DIRECTORY }}
- uses: actions/upload-artifact@v4
with:
name: promptflow-evals
path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl
test:
needs: build
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
# TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
# https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
# Add 3.9 back after we figure out the issue
python-version: ['3.8', '3.10', '3.11']
fail-fast: false
# snok/install-poetry need this to support Windows
defaults:
run:
shell: bash
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: snok/install-poetry@v1
- uses: actions/download-artifact@v4
with:
name: promptflow-evals
path: ${{ env.WORKING_DIRECTORY }}
- name: install test dependency group
run: poetry install --only test
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install promptflow-evals from wheel
id: install_promptflow
run: |
# Estimate the installation time.
export start_tm=`date +%s`
poetry run pip install -e ../promptflow
poetry run pip install -e ../promptflow-core
poetry run pip install -e ../promptflow-devkit
poetry run pip install -e ../promptflow-tracing
poetry run pip install -e ../promptflow-tools
poetry run pip install -e ../promptflow-azure
poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
export install_time=$((`date +%s` - ${start_tm}))
poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity install_time_s --value $install_time --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
test ${install_time} -le $TIME_LIMIT || echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: install recording
run: poetry run pip install -e ../promptflow-recording
working-directory: ${{ env.WORKING_DIRECTORY }}
- name: generate end-to-end test config from secret
run: echo '${{ secrets.PF_EVALS_E2E_TEST_CONFIG }}' >> connections.json
working-directory: ${{ env.WORKING_DIRECTORY }}
- uses: azure/login@v2
with:
creds: ${{ secrets.PF_EVALS_SP_CREDENTIALS }}
enable-AzPSSession: true
- name: run performance tests
id: performance_tests
run: |
# Estimate the run time for evaluator.
poetry run pytest -m performance_test --junit-xml=test-results.xml
poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity evaluator_live_tests_run_time_s --junit-xml test-results.xml --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
working-directory: ${{ env.WORKING_DIRECTORY }}