Skip to content

Haystack 1.x Benchmarks #15

Haystack 1.x Benchmarks

Haystack 1.x Benchmarks #15

Workflow file for this run

name: Haystack 1.x Benchmarks
on:
workflow_dispatch:
schedule:
# At 00:01 on Sunday
- cron: "1 0 * * 0"
permissions:
id-token: write
contents: read
env:
AWS_REGION: eu-central-1
jobs:
deploy-runner:
runs-on: ubuntu-latest
outputs:
cml_runner_id: ${{ steps.deploy.outputs.cml_runner_id }}
steps:
- uses: actions/checkout@v4
- uses: iterative/setup-cml@v2
- name: AWS authentication
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}
- name: Launch EC2 instance and deploy runner
id: deploy
env:
repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
run: |
OUTPUT=$(cml runner launch \
--cloud aws \
--cloud-region ${{ env.AWS_REGION }} \
--cloud-type=p3.2xlarge \
--cloud-hdd-size=64 \
--labels=cml 2>&1 | tee /dev/fd/2)
# Extract 'id' from the log and set it as an environment variable
ID_VALUE=$(echo "$OUTPUT" | jq -r '.message? | fromjson? | select(.id != null) | .id // empty')
echo "cml_runner_id=$ID_VALUE" >> "$GITHUB_OUTPUT"
run-reader-benchmarks:
needs: deploy-runner
runs-on: [self-hosted, cml]
container:
image: docker://iterativeai/cml:0-dvc2-base1-gpu
options: --gpus all
timeout-minutes: 2880
steps:
- uses: actions/checkout@v4
with:
ref: v1.x
- name: Install Haystack + Datadog requirements
run: |
pip install .[metrics,benchmarks,inference]
pip install -r test/benchmarks/datadog/requirements.txt
- name: Run benchmarks
working-directory: test/benchmarks
run: |
mkdir +p out
for f in ./configs/reader/*.yml; do
name="${f%.*}"
echo "=== Running benchmarks for $name ===";
config_name="$(basename "$name")"
python run.py --output "out/$config_name.json" "$f";
echo "=== Benchmarks done for $name (or failed) ===";
done
- name: Send Benchmark results to Datadog
working-directory: test/benchmarks
run: |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu
- name: Archive benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-reader
path: test/benchmarks/out/
run-elasticsearch-benchmarks:
needs:
- deploy-runner
- run-reader-benchmarks
runs-on: [self-hosted, cml]
container:
image: docker://iterativeai/cml:0-dvc2-base1-gpu
options: --gpus all
services:
elasticsearch:
image: elasticsearch:7.17.6
env:
discovery.type: "single-node"
ports:
- 9201:9200
timeout-minutes: 2880
steps:
- uses: actions/checkout@v4
with:
ref: v1.x
- name: Install Haystack + Datadog requirements
run: |
pip install .[metrics,elasticsearch,benchmarks,inference]
pip install -r test/benchmarks/datadog/requirements.txt
- name: Run benchmarks
working-directory: test/benchmarks
run: |
mkdir +p out
for f in ./configs/**/*-elasticsearch-*.yml; do
name="${f%.*}"
echo "=== Running benchmarks for $name ===";
config_name="$(basename "$name")"
python run.py --output "out/$config_name.json" "$f";
echo "=== Benchmarks done for $name (or failed) ===";
done
- name: Send Benchmark results to Datadog
working-directory: test/benchmarks
run: |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu
- name: Archive benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-elasticsearch
path: test/benchmarks/out/
run-weaviate-benchmarks:
needs:
- deploy-runner
- run-elasticsearch-benchmarks
runs-on: [self-hosted, cml]
container:
image: docker://iterativeai/cml:0-dvc2-base1-gpu
options: --gpus all
services:
weaviate:
image: semitechnologies/weaviate:1.17.2
env:
AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "true"
PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
ports:
- 8080:8080
timeout-minutes: 2880
steps:
- uses: actions/checkout@v4
with:
ref: v1.x
- name: Install Haystack + Datadog requirements
run: |
pip install .[metrics,weaviate,benchmarks,inference]
pip install -r test/benchmarks/datadog/requirements.txt
- name: Run benchmarks
working-directory: test/benchmarks
run: |
mkdir +p out
for f in ./configs/**/*-weaviate-*.yml; do
name="${f%.*}"
echo "=== Running benchmarks for $name ===";
config_name="$(basename "$name")"
python run.py --output "out/$config_name.json" "$f";
echo "=== Benchmarks done for $name (or failed) ===";
done
- name: Send Benchmark results to Datadog
working-directory: test/benchmarks
run: |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu
- name: Archive benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-weaviate
path: test/benchmarks/out/
run-opensearch-benchmarks:
needs:
- deploy-runner
- run-weaviate-benchmarks
runs-on: [self-hosted, cml]
container:
image: docker://iterativeai/cml:0-dvc2-base1-gpu
options: --gpus all
services:
opensearch:
image: opensearchproject/opensearch:1.3.5
env:
discovery.type: "single-node"
OPENSEARCH_JAVA_OPTS: "-Xms4096m -Xmx4096m"
ports:
- 9200:9200
timeout-minutes: 2880
steps:
- uses: actions/checkout@v4
with:
ref: v1.x
- name: Install Haystack + Datadog requirements
run: |
pip install .[metrics,opensearch,benchmarks,inference]
pip install -r test/benchmarks/datadog/requirements.txt
- name: Run benchmarks
working-directory: test/benchmarks
run: |
mkdir +p out
for f in ./configs/**/*-opensearch-*.yml; do
name="${f%.*}"
echo "=== Running benchmarks for $name ===";
config_name="$(basename "$name")"
python run.py --output "out/$config_name.json" "$f";
echo "=== Benchmarks done for $name (or failed) ===";
done
- name: Send Benchmark results to Datadog
working-directory: test/benchmarks
run: |
python datadog/send_metrics.py out/ ${{ secrets.CORE_DATADOG_API_KEY }} https://api.datadoghq.eu
- name: Archive benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-opensearch
path: test/benchmarks/out/
terminate-runner:
if: always()
needs:
- deploy-runner
- run-opensearch-benchmarks
runs-on: ubuntu-latest
steps:
- name: AWS authentication
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a
with:
aws-region: ${{ env.AWS_REGION }}
role-to-assume: ${{ secrets.AWS_CI_ROLE_ARN }}
- name: Terminate EC2 instance
env:
CML_RUNNER_ID: ${{needs.deploy-runner.outputs.cml_runner_id}}
run: |
# Get the instance ID using its Name tag and terminate the instance
INSTANCE_ID=$(aws ec2 describe-instances --filters "Name=tag:Name,Values=${{ env.CML_RUNNER_ID }}" --query "Reservations[*].Instances[*].[InstanceId]" --output text)
aws ec2 terminate-instances --instance-ids "$INSTANCE_ID"