Whl skip nccl #283
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmark | |
on: | |
pull_request: | |
types: [opened, review_requested, ready_for_review, synchronize, unlocked] | |
concurrency: | |
group: benchmark-${{ github.ref }} | |
cancel-in-progress: true | |
env: | |
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }} | |
OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }} | |
ONEFLOW_TIMEOUT_SECONDS: 90 | |
FLOW_VISION_SRC: flow_vision | |
TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0-cuda10.2-cudnn7-runtime:70729b0680b5a32daba6f50b56e0c169cd1636fa | |
ONEFLOW_SRC: oneflow-src | |
ONEFLOW_REF: master | |
jobs: | |
find-benchmark-cache: | |
name: "Find benchmark cache" | |
if: github.event.pull_request.draft == false && github.base_ref == 'main' | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.find-cache.outputs.matrix }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Checkout Oneflow-Inc/oneflow | |
uses: actions/checkout@v2 | |
with: | |
repository: Oneflow-Inc/oneflow | |
ref: ${{ env.ONEFLOW_REF }} | |
path: ${{ env.ONEFLOW_SRC }} | |
- uses: ./cache-complete/matrix/test | |
name: find cache | |
id: find-cache | |
timeout-minutes: 5 | |
with: | |
runner-labels: | | |
self-hosted | |
linux | |
provision | |
oneflow-src: ${{ env.ONEFLOW_SRC }} | |
devices: | | |
cuda | |
tests: | | |
benchmark | |
benchmark: | |
name: Benchmark suite | |
runs-on: ${{ matrix.runs-on }} | |
if: github.event.pull_request.draft == false && github.base_ref == 'main' | |
needs: [find-benchmark-cache] | |
strategy: | |
fail-fast: true | |
max-parallel: 1 | |
matrix: ${{ fromJson(needs.find-benchmark-cache.outputs.matrix) }} | |
env: | |
ONEFLOW_SRC: . | |
TEST_CONTAINER_NAME: "ci-benchmark" | |
SSH_TANK_HOST: 192.168.1.13 | |
SSH_TANK_PATH: /tank | |
steps: | |
- name: Fix permissions | |
if: ${{ contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
set -x | |
docker run --rm -v $PWD:$PWD -w $PWD busybox rm -rf * | |
docker run --rm -v $PWD:$PWD -w $PWD busybox rm -rf .pytest_cache | |
- name: Checkout Oneflow-Inc/oneflow | |
uses: actions/checkout@v2 | |
with: | |
ref: ${{ env.ONEFLOW_REF }} | |
repository: Oneflow-Inc/oneflow | |
- name: Checkout Oneflow-Inc/vision | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
uses: actions/checkout@v2 | |
with: | |
repository: Oneflow-Inc/vision | |
# please use a commit here | |
ref: ca8ebc663b58667cf8cd1b6ef0c861522780b7bb | |
path: ${{ env.FLOW_VISION_SRC}} | |
- uses: actions/checkout@v2 | |
with: | |
path: get-oneflow | |
- name: Remove container | |
timeout-minutes: 45 | |
if: ${{ contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true | |
- name: Set environment variables | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
set -x | |
echo "ONEFLOW_TEST_CACHE_DIR=$HOME/ci-cache/test_cache" >> $GITHUB_ENV | |
- name: Set environment variables (distributed) | |
if: ${{ fromJson(matrix.is-distributed) }} | |
run: | | |
set -x | |
EXTRA_DOCKER_ARGS+=" --network host " | |
echo "EXTRA_DOCKER_ARGS=${EXTRA_DOCKER_ARGS}" >> $GITHUB_ENV | |
- name: Enable ONEFLOW_TEST_VERBOSE | |
if: ${{ contains(github.event.pull_request.labels.*.name, 'need-test-verbose') }} | |
run: | | |
EXTRA_DOCKER_ARGS+=" --env ONEFLOW_TEST_VERBOSE=1" | |
echo "EXTRA_DOCKER_ARGS=${EXTRA_DOCKER_ARGS}" >> $GITHUB_ENV | |
- name: Start container | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
working-directory: ${{ env.ONEFLOW_SRC }} | |
run: | | |
docker run -d --rm --privileged --shm-size=8g \ | |
--pids-limit -1 \ | |
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ | |
--runtime=nvidia \ | |
-v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \ | |
-v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \ | |
-v $HOME/test-container-cache/dot-local:/root/.local \ | |
-v $HOME/test-container-cache/dot-cache:/root/.cache \ | |
-e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \ | |
-e ONEFLOW_CI=1 \ | |
-v $PWD:$PWD \ | |
-w $PWD \ | |
-v ${ONEFLOW_TEST_CACHE_DIR}:${ONEFLOW_TEST_CACHE_DIR} \ | |
-e CUDA_VISIBLE_DEVICES=1 \ | |
-e ONEFLOW_TEST_CACHE_DIR=${ONEFLOW_TEST_CACHE_DIR} \ | |
-e ONEFLOW_TIMEOUT_SECONDS=${{ env.ONEFLOW_TIMEOUT_SECONDS }} \ | |
-e ONEFLOW_MLIR_ENABLE_ROUND_TRIP=1 \ | |
--name ${TEST_CONTAINER_NAME} \ | |
${{ env.EXTRA_DOCKER_ARGS }} \ | |
${{ env.TEST_WITH_TORCH_IMG_TAG }} \ | |
sleep 5400 | |
- name: Test container | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
docker exec ${{ env.TEST_CONTAINER_NAME }} ls | |
- name: Install OneFlow | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
ls ${ONEFLOW_WHEEL_PATH} | |
docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple | |
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu102 | |
- name: Install Flow Vision | |
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.FLOW_VISION_SRC}} | |
- name: Benchmark Test | |
timeout-minutes: 100 | |
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }} | |
uses: ./get-oneflow/pytest-benchmark | |
with: | |
collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark | |
container-name: ${{ env.TEST_CONTAINER_NAME }} | |
unknown-threshold: 10 | |
error-threshold: 40 | |
- name: Remove container | |
timeout-minutes: 45 | |
if: ${{ always() && contains(matrix.runs-on, 'self-hosted') }} | |
run: | | |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true | |
docker run --rm -v $PWD:$PWD -w $PWD busybox rm -rf * | |
update-history: | |
runs-on: ubuntu-latest | |
needs: [benchmark] | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: ./update-benchmark-history | |
name: Update benchmark history | |
timeout-minutes: 5 |