Skip to content

Run evaluation from microsoft/genai-evals and merge evaluation output #280

Run evaluation from microsoft/genai-evals and merge evaluation output

Run evaluation from microsoft/genai-evals and merge evaluation output #280

Workflow file for this run

name: Evaluate
on:
workflow_dispatch:
push:
# Run when commits are pushed to mainline branch (main or master)
# Set this to the mainline branch you are using
branches:
- main
- ignite2024
pull_request:
# Run when pull requests are opened or updated
branches:
- main
- ignite2024
# Set up permissions for deploying with secretless Azure federated credentials
# https://learn.microsoft.com/en-us/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
permissions:
id-token: write
contents: read
jobs:
evaluate:
runs-on: ubuntu-latest
env:
AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
AZURE_AI_PROJECT_NAME: ${{ vars.AZURE_AI_PROJECT_NAME }}
AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_API_VERSION: ${{ vars.AZURE_OPENAI_API_VERSION }}
AZURE_OPENAI_4_EVAL_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_4_EVAL_DEPLOYMENT_NAME }}
AZURE_OPENAI_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_DEPLOYMENT_NAME }}
AZURE_OPENAI_35_TURBO_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_35_TURBO_DEPLOYMENT_NAME }}
AZURE_RESOURCE_GROUP: ${{ vars.AZURE_RESOURCE_GROUP }}
AZURE_SEARCH_ENDPOINT: ${{ vars.AZURE_SEARCH_ENDPOINT }}
AZURE_OPENAI_NAME: ${{ vars.AZURE_OPENAI_NAME }}
BING_SEARCH_ENDPOINT: ${{ vars.BING_SEARCH_ENDPOINT }}
BING_SEARCH_KEY: ${{ secrets.BING_SEARCH_KEY }}
EVAL_CONFIG_FILE_PATH: ${{ github.workspace }}/evaluate-config.json
GENAI_EVALS_INPUT_FILE_PATH: ${{ github.workspace }}/src/api/evaluate/eval_results.jsonl
GENAI_EVALS_OUTPUT_FILE_PATH: ${{ github.workspace }}/src/api/evaluate/genai_evals_data.jsonl
steps:
- name: checkout repo content
uses: actions/checkout@v4 # checkout the repository content
- name: Install azd
uses: Azure/[email protected]
- name: setup python
uses: actions/setup-python@v5
with:
python-version: '3.10' # install the python version needed
- name: install python packages
run: |
python -m pip install --upgrade pip
pip install -r src/api/requirements.txt
- name: Azure login
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Set az account
uses: azure/CLI@v2
with:
inlineScript: |
az account set --subscription ${{env.AZURE_SUBSCRIPTION_ID}}
# - name: evaluate orchestrator
# working-directory: ./src/api
# run: |
# python -m evaluate.evaluate
# - name: Upload eval results as build artifact
# uses: actions/upload-artifact@v4
# with:
# name: eval_result
# path: ./src/api/evaluate/eval_results.jsonl
# - name: Upload image eval results as build artifact
# uses: actions/upload-artifact@v4
# with:
# name: eval_image_result
# path: ./src/api/evaluate/image_eval_results.jsonl
# - name: GitHub Summary Step
# if: ${{ success() }}
# working-directory: ./src/api
# run: |
# echo "" >> $GITHUB_STEP_SUMMARY
# echo "📊 Evaluation Results" >> $GITHUB_STEP_SUMMARY
# cat evaluate/eval_results.md >> $GITHUB_STEP_SUMMARY
# cat evaluate/image_eval_results.md >> $GITHUB_STEP_SUMMARY
- name: Convert to microsoft/genai-evals Data Format
run: |
python -m src.api.evaluate.genai_evals_convert ${{ env.GENAI_EVALS_INPUT_FILE_PATH }} ${{ env.GENAI_EVALS_OUTPUT_FILE_PATH }}
- name: Prepare AI evaluation configuration file
run: |
cat > ${{ env.EVAL_CONFIG_FILE_PATH }}<<EOF
{
"data": "${{ env.GENAI_EVALS_OUTPUT_FILE_PATH }}",
"evaluators": {
"coherence": "CoherenceEvaluator",
"fluency": "FluencyEvaluator"
},
"ai_model_configuration": {
"type": "azure_openai",
"azure_endpoint": "${{ vars.AZURE_OPENAI_ENDPOINT }}",
"azure_deployment": "${{ vars.AZURE_OPENAI_DEPLOYMENT_NAME }}",
"api_version": "${{ vars.AZURE_OPENAI_API_VERSION }}"
}
}
EOF
- name: Run AI Evaluation
id: run-ai-evaluation
uses: microsoft/genai-evals@users/daviwu/ignite2024
with:
evaluate-configuration: ${{ env.EVAL_CONFIG_FILE_PATH }}
show-summary: true
show-raw-output: true