Make evaluation run a context manager instead of a singleton. #735
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: promptflow-evals-e2e-test | |
on: | |
schedule: | |
- cron: "40 10 * * *" # 2:40 PST every day | |
pull_request: | |
paths: | |
- src/promptflow-evals/** | |
- .github/workflows/promptflow-evals-e2e-test.yml | |
workflow_dispatch: | |
env: | |
IS_IN_CI_PIPELINE: "true" | |
WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: snok/install-poetry@v1 | |
- name: build | |
run: poetry build | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- uses: actions/upload-artifact@v4 | |
with: | |
name: promptflow-evals | |
path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl | |
test: | |
needs: build | |
strategy: | |
matrix: | |
os: [ubuntu-latest, windows-latest, macos-13] | |
# TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package | |
# https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158 | |
# Add 3.9 back after we figure out the issue | |
python-version: ['3.8', '3.10', '3.11'] | |
fail-fast: false | |
# snok/install-poetry need this to support Windows | |
defaults: | |
run: | |
shell: bash | |
runs-on: ${{ matrix.os }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: set test mode | |
# Always run in replay mode for now until we figure out the test resource to run live mode | |
run: echo "PROMPT_FLOW_TEST_MODE=replay" >> $GITHUB_ENV | |
#run: echo "PROMPT_FLOW_TEST_MODE=$(if [[ "${{ github.event_name }}" == "pull_request" ]]; then echo replay; else echo live; fi)" >> $GITHUB_ENV | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- uses: snok/install-poetry@v1 | |
- uses: actions/download-artifact@v4 | |
with: | |
name: promptflow-evals | |
path: ${{ env.WORKING_DIRECTORY }} | |
- name: install test dependency group | |
run: poetry install --only test | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- name: install promptflow packages in editable mode | |
run: | | |
poetry run pip install -e ../promptflow | |
poetry run pip install -e ../promptflow-core | |
poetry run pip install -e ../promptflow-devkit | |
poetry run pip install -e ../promptflow-tracing | |
poetry run pip install -e ../promptflow-tools | |
poetry run pip install -e ../promptflow-azure | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- name: install promptflow-evals from wheel | |
# wildcard expansion (*) does not work in Windows, so leverage python to find and install | |
run: poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])") | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- name: install recording | |
run: poetry run pip install -e ../promptflow-recording | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- name: generate end-to-end test config from secret | |
run: echo '${{ secrets.PF_EVALS_E2E_TEST_CONFIG }}' >> connections.json | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- uses: azure/login@v2 | |
with: | |
creds: ${{ secrets.PF_EVALS_SP_CREDENTIALS }} | |
enable-AzPSSession: true | |
- name: run e2e tests | |
id: run_all_e2e_tests | |
run: | | |
poetry run pytest -m e2etest --cov=promptflow --cov-config=pyproject.toml --cov-report=term --cov-report=html --cov-report=xml | |
poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity all_e2e_tests_run_times --junit-xml test-results.xml --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }} | |
working-directory: ${{ env.WORKING_DIRECTORY }} | |
- name: upload coverage report | |
uses: actions/upload-artifact@v4 | |
with: | |
name: report-${{ matrix.os }}-py${{ matrix.python-version }} | |
path: | | |
${{ env.WORKING_DIRECTORY }}/*.xml | |
${{ env.WORKING_DIRECTORY }}/htmlcov/ | |
report: | |
needs: test | |
runs-on: ubuntu-latest | |
permissions: | |
checks: write | |
pull-requests: write | |
contents: read | |
issues: read | |
steps: | |
- uses: actions/download-artifact@v4 | |
with: | |
path: artifacts | |
- uses: EnricoMi/publish-unit-test-result-action@v2 | |
with: | |
check_name: promptflow-evals test result | |
comment_title: promptflow-evals test result | |
files: "artifacts/**/test-results.xml" # align with `--junit-xml` in pyproject.toml | |
- uses: irongut/[email protected] | |
with: | |
filename: "artifacts/report-ubuntu-latest-py3.11/coverage.xml" | |
badge: true | |
fail_below_min: false | |
format: markdown | |
hide_complexity: true | |
output: both | |
thresholds: 40 80 |