Make evaluation run a context manager instead of a singleton. #41

Workflow file for this run

.github/workflows/promptflow-evals-regression-test.yml at 7ee1ea2

	name: promptflow-evals-regression-test

	on:
	schedule:
	- cron: "40 10 * * *" # 2:40 PST every day
	pull_request:
	paths:
	- src/promptflow-evals/**
	- .github/workflows/promptflow-evals-regression-test.yml
	workflow_dispatch:

	env:
	IS_IN_CI_PIPELINE: "true"
	WORKING_DIRECTORY: ${{ github.workspace }}/src/promptflow-evals
	PROMPT_FLOW_TEST_MODE: "live"

	jobs:
	build:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	- uses: snok/install-poetry@v1
	- name: build
	run: poetry build
	working-directory: ${{ env.WORKING_DIRECTORY }}
	- uses: actions/upload-artifact@v4
	with:
	name: promptflow-evals
	path: ${{ env.WORKING_DIRECTORY }}/dist/promptflow_evals-*.whl

	test:
	needs: build
	strategy:
	matrix:
	os: [ubuntu-latest, windows-latest, macos-13]
	# TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
	# https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
	# Add 3.9 back after we figure out the issue
	python-version: ['3.8', '3.10', '3.11']
	fail-fast: false
	# snok/install-poetry need this to support Windows
	defaults:
	run:
	shell: bash
	runs-on: ${{ matrix.os }}
	steps:
	- uses: actions/checkout@v4
	- uses: actions/setup-python@v5
	with:
	python-version: ${{ matrix.python-version }}
	- uses: snok/install-poetry@v1
	- uses: actions/download-artifact@v4
	with:
	name: promptflow-evals
	path: ${{ env.WORKING_DIRECTORY }}
	- name: install test dependency group
	run: poetry install --only test
	working-directory: ${{ env.WORKING_DIRECTORY }}
	- name: install promptflow-evals from wheel
	id: install_promptflow
	run: \|
	# Estimate the installation time.
	export start_tm=`date +%s`
	poetry run pip install -e ../promptflow
	poetry run pip install -e ../promptflow-core
	poetry run pip install -e ../promptflow-devkit
	poetry run pip install -e ../promptflow-tracing
	poetry run pip install -e ../promptflow-tools
	poetry run pip install -e ../promptflow-azure
	poetry run pip install --pre $(python -c "import glob; print(glob.glob('promptflow_evals-*.whl')[0])")
	export install_time=$((`date +%s` - ${start_tm}))
	poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity install_time_s --value $install_time --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
	test ${install_time} -le $TIME_LIMIT \|\| echo "::warning file=pyproject.toml,line=40,col=0::The installation took ${install_time} minutes, the limit is ${TIME_LIMIT}."
	working-directory: ${{ env.WORKING_DIRECTORY }}
	- name: install recording
	run: poetry run pip install -e ../promptflow-recording
	working-directory: ${{ env.WORKING_DIRECTORY }}
	- name: generate end-to-end test config from secret
	run: echo '${{ secrets.PF_EVALS_E2E_TEST_CONFIG }}' >> connections.json
	working-directory: ${{ env.WORKING_DIRECTORY }}
	- uses: azure/login@v2
	with:
	creds: ${{ secrets.PF_EVALS_SP_CREDENTIALS }}
	enable-AzPSSession: true
	- name: run performance tests
	id: performance_tests
	run: \|
	# Estimate the run time for evaluator.
	poetry run pytest -m performance_test --junit-xml=test-results.xml
	poetry run python ../../scripts/code_qa/report_to_app_insights.py --activity evaluator_live_tests_run_time_s --junit-xml test-results.xml --git-hub-action-run-id ${{ github.run_id }} --git-hub-workflow ${{ github.workflow }} --git-hub-action ${{ github.action }} --git-branch ${{ github.ref }}
	working-directory: ${{ env.WORKING_DIRECTORY }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Make evaluation run a context manager instead of a singleton. #41

Workflow file

Make evaluation run a context manager instead of a singleton. #41

Jobs

Run details

Workflow file for this run