From 8e1c9bd12babfda71be285695c4bf9b7a1db20e7 Mon Sep 17 00:00:00 2001 From: Brian <23239305+b-chu@users.noreply.github.com> Date: Thu, 29 Feb 2024 11:27:29 -0500 Subject: [PATCH] Use ci repo for cpu tests (#3062) --- .github/mcli/mcli_pytest.py | 117 -------------------------- .github/workflows/code-quality.yaml | 2 +- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/coverage.yaml | 2 +- .github/workflows/daily.yaml | 8 +- .github/workflows/pr-cpu.yaml | 9 +- .github/workflows/pr-gpu.yaml | 4 +- .github/workflows/pytest-cpu.yaml | 92 -------------------- .github/workflows/pytest-gpu.yaml | 89 -------------------- .github/workflows/smoketest.yaml | 2 +- 10 files changed, 17 insertions(+), 310 deletions(-) delete mode 100644 .github/mcli/mcli_pytest.py delete mode 100644 .github/workflows/pytest-cpu.yaml delete mode 100644 .github/workflows/pytest-gpu.yaml diff --git a/.github/mcli/mcli_pytest.py b/.github/mcli/mcli_pytest.py deleted file mode 100644 index 9c4174b226..0000000000 --- a/.github/mcli/mcli_pytest.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright 2022 MosaicML Composer authors -# SPDX-License-Identifier: Apache-2.0 - -"""Run pytest using MCLI.""" - -import argparse -import time - -from mcli import RunConfig, RunStatus, create_run, follow_run_logs, wait_for_run_status - -if __name__ == '__main__': - - parser = argparse.ArgumentParser() - parser.add_argument('--name', type=str, default='mcli-pytest', help='Base name of run') - parser.add_argument('--cluster', type=str, default='r1z4', help='Cluster to use') - parser.add_argument('--gpu_type', type=str, default='a100_40gb', help='Type of GPU to use') - parser.add_argument('--gpu_num', type=int, default=2, help='Number of the GPU to use') - parser.add_argument('--image', type=str, default='mosaicml/pytorch:latest', help='Docker image to use') - parser.add_argument('--git_branch', type=str, help='Git branch to check out') - parser.add_argument('--git_commit', type=str, help='Git commit to check out. Overrides git_branch if specified') - parser.add_argument('--pip_package_name', type=str, help='Name of pip package to install before running tests') - parser.add_argument('--pr_number', - type=int, - help='PR number to check out. Overrides git_branch/git_commit if specified') - parser.add_argument('--pytest_markers', type=str, help='Markers to pass to pytest') - parser.add_argument('--pytest_command', type=str, help='Command to run pytest') - parser.add_argument('--timeout', type=int, default=2700, help='Timeout for run (in seconds)') - args = parser.parse_args() - - name = args.name - git_integration = { - 'integration_type': 'git_repo', - 'git_repo': 'mosaicml/composer', - 'ssh_clone': 'False', - } - if args.git_branch is not None and args.git_commit is None: - name += f'-branch-{args.git_branch}' - git_integration['git_branch'] = args.git_branch - if args.git_commit is not None: - name += f'-commit-{args.git_commit}' - git_integration['git_commit'] = args.git_commit - - command = 'cd composer' - - # Checkout a specific PR if specified - if args.pr_number is not None: - name += f'-pr-{args.pr_number}' - command += f''' - - git fetch origin pull/{args.pr_number}/head:pr_branch - - git checkout pr_branch - - ''' - - # Shorten name if too long - if len(name) > 56: - name = name[:56] - - clear_tmp_path_flag = '-o tmp_path_retention_policy=none' - command += f''' - - export COMPOSER_PACKAGE_NAME='{args.pip_package_name}' - - pip install --upgrade --user .[all] - - export COMMON_ARGS="-v --durations=20 -m '{args.pytest_markers}' {clear_tmp_path_flag}" - - make test PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS --codeblocks" - - make test-dist PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS" WORLD_SIZE=2 - - python -m coverage combine - - python -m coverage report - ''' - config = RunConfig( - name=name, - compute={ - 'cluster': args.cluster, - 'gpu_type': args.gpu_type, - 'gpus': args.gpu_num - }, - image=args.image, - integrations=[git_integration], - command=command, - scheduling={'max_duration': args.timeout / 60 / 60}, - env_variables=[ - { - 'key': 'MOSAICML_PLATFORM', - 'value': 'False', - }, - { - 'key': 'PYTHONUNBUFFERED', - 'value': '1', - }, - ], - ) - - # Create run - run = create_run(config) - print(f'[GHA] Run created: {run.name}') - - # Wait until run starts before fetching logs - run = wait_for_run_status(run, status='running') - start_time = time.time() - print('[GHA] Run started. Following logs...') - - # Print logs - for line in follow_run_logs(run): - print(line, end='') - - print('[GHA] Run completed. Waiting for run to finish...') - run = wait_for_run_status(run, status=RunStatus.COMPLETED) - - # Fail if command exited with non-zero exit code or timed out (didn't reach COMPLETED) - assert run.status == RunStatus.COMPLETED, f'Run {run.name} did not complete: {run.status} ({run.reason})' diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 36a54ba9a3..665420657c 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.1 + ref: v0.0.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/code-quality with: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 3f9a079918..90ef8b81f4 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -45,7 +45,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.1 + ref: v0.0.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/codeql-analysis with: diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 7e8a05782c..0cb96ca03d 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -16,7 +16,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.1 + ref: v0.0.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/coverage with: diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml index 39d1dc9d42..c0ab4b816d 100644 --- a/.github/workflows/daily.yaml +++ b/.github/workflows/daily.yaml @@ -14,7 +14,7 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} jobs: daily-pytest-cpu: - uses: ./.github/workflows/pytest-cpu.yaml + uses: mosaicml/ci-testing/.github/workflows/pytest-cpu.yaml@v0.0.2 strategy: matrix: include: @@ -73,11 +73,13 @@ jobs: with: container: ${{ matrix.container }} name: ${{ matrix.name }} + pip_deps: "[all]" pytest-command: ${{ matrix.pytest_command }} pytest-markers: ${{ matrix.markers }} composer_package_name: ${{ matrix.composer_package_name }} pytest-wandb-entity: "mosaicml-public-integration-tests" pytest-wandb-project: "integration-tests-${{ github.sha }}" + safe_directory: composer secrets: aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} @@ -98,7 +100,7 @@ jobs: download-path: artifacts daily-pytest-gpu: - uses: ./.github/workflows/pytest-gpu.yaml + uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.2 strategy: matrix: # Unlike CPU tests, we run daily tests together with GPU tests to minimize launch time @@ -124,8 +126,10 @@ jobs: with: composer_package_name: ${{ matrix.composer_package_name }} container: ${{ matrix.container }} + git_repo: mosaicml/composer mcloud-timeout: 2700 name: ${{ matrix.name }} + pip_deps: "[all]" pytest-command: ${{ matrix.pytest_command }} pytest-markers: ${{ matrix.markers }} python-version: 3.9 diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml index 6eee54cb0b..24438ec213 100644 --- a/.github/workflows/pr-cpu.yaml +++ b/.github/workflows/pr-cpu.yaml @@ -9,7 +9,7 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} jobs: pytest-cpu: - uses: ./.github/workflows/pytest-cpu.yaml + uses: mosaicml/ci-testing/.github/workflows/pytest-cpu.yaml@v0.0.2 strategy: matrix: include: @@ -17,25 +17,24 @@ jobs: container: mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04 markers: not daily and not remote and not gpu and not doctest pytest_command: coverage run -m pytest - composer_package_name: mosaicml - name: cpu-3.10-2.1 container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04 markers: not daily and not remote and not gpu and not doctest pytest_command: coverage run -m pytest - composer_package_name: mosaicml - name: cpu-doctest container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04 markers: not daily and not remote and not gpu and doctest pytest_command: coverage run -m pytest tests/test_docs.py - composer_package_name: mosaicml name: ${{ matrix.name }} if: github.repository_owner == 'mosaicml' with: - composer_package_name: ${{ matrix.composer_package_name }} + composer_package_name: mosaicml container: ${{ matrix.container }} name: ${{ matrix.name }} + pip_deps: "[all]" pytest-command: ${{ matrix.pytest_command }} pytest-markers: ${{ matrix.markers }} + safe_directory: composer coverage: uses: ./.github/workflows/coverage.yaml name: Coverage Results diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index 1b02fc9c51..cece24386d 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -9,7 +9,7 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} jobs: pytest-gpu: - uses: ./.github/workflows/pytest-gpu.yaml + uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.2 strategy: matrix: include: @@ -23,8 +23,10 @@ jobs: with: composer_package_name: ${{ matrix.composer_package_name }} container: ${{ matrix.container }} + git_repo: mosaicml/composer mcloud-timeout: 1500 name: ${{ matrix.name }} + pip_deps: "[all]" pytest-command: ${{ matrix.pytest_command }} pytest-markers: ${{ matrix.markers }} python-version: 3.9 diff --git a/.github/workflows/pytest-cpu.yaml b/.github/workflows/pytest-cpu.yaml deleted file mode 100644 index 68d040f175..0000000000 --- a/.github/workflows/pytest-cpu.yaml +++ /dev/null @@ -1,92 +0,0 @@ -name: Pytest CPU -on: - workflow_call: - inputs: - composer_package_name: - required: true - type: string - container: - required: true - type: string - name: - required: true - type: string - pytest-command: - required: true - type: string - pytest-markers: - required: true - type: string - pytest-wandb-entity: - required: false - type: string - pytest-wandb-project: - required: false - type: string - secrets: - wandb-api-key: - required: false - aws-access-key-id: - required: false - aws-secret-access-key: - required: false - slack-notifications-bot-token: - required: false - code-eval-device: - required: false - code-eval-url: - required: false - code-eval-apikey: - required: false - gcs-key: - required: false - gcs-secret: - required: false - azure-account-name: - required: false - azure-account-access-key: - required: false -jobs: - pytest-cpu: - timeout-minutes: 30 - runs-on: ubuntu-latest - container: ${{ inputs.container }} - steps: - - name: Checkout Repo - uses: actions/checkout@v3 - - name: Setup - run: | - set -ex - export PATH=/composer-python:$PATH - export COMPOSER_PACKAGE_NAME='${{ inputs.composer_package_name }}' - python -m pip install --upgrade 'pip<23' wheel - python -m pip install --upgrade .[all] - - name: Run Tests - id: tests - run: | - set -ex - export PATH=/composer-python:$PATH - export WANDB_API_KEY='${{ secrets.wandb-api-key }}' - export WANDB_ENTITY='${{ inputs.pytest-wandb-entity }}' - export WANDB_PROJECT='${{ inputs.pytest-wandb-project }}' - export AWS_ACCESS_KEY_ID='${{ secrets.aws-access-key-id }}' - export AWS_SECRET_ACCESS_KEY='${{ secrets.aws-secret-access-key }}' - export CODE_EVAL_DEVICE='${{ secrets.code-eval-device }}' - export CODE_EVAL_URL='${{ secrets.code-eval-url }}' - export CODE_EVAL_APIKEY='${{ secrets.code-eval-apikey }}' - export GCS_KEY='${{ secrets.gcs-key }}' - export GCS_SECRET='${{ secrets.gcs-secret }}' - export AZURE_ACCOUNT_NAME='${{ secrets.azure-account-name }}' - export AZURE_ACCOUNT_ACCESS_KEY='${{ secrets.azure-account-access-key }}' - export COMMON_ARGS="-v --durations=20 -m '${{ inputs.pytest-markers }}' -o tmp_path_retention_policy=none" - - # Necessary to run git diff for doctests - git config --global --add safe.directory /__w/composer/composer - make test PYTEST='${{ inputs.pytest-command }}' EXTRA_ARGS="$COMMON_ARGS --codeblocks" - make test-dist PYTEST='${{ inputs.pytest-command }}' EXTRA_ARGS="$COMMON_ARGS" WORLD_SIZE=2 - - python -m coverage combine - - uses: actions/upload-artifact@v3 - with: - name: coverage-${{ github.sha }}-${{ inputs.name }} - path: .coverage diff --git a/.github/workflows/pytest-gpu.yaml b/.github/workflows/pytest-gpu.yaml deleted file mode 100644 index 550a306746..0000000000 --- a/.github/workflows/pytest-gpu.yaml +++ /dev/null @@ -1,89 +0,0 @@ -name: Pytest GPU -on: - workflow_call: - inputs: - composer_package_name: - required: true - type: string - container: - required: true - type: string - mcloud-timeout: - required: false - type: number - default: 2700 - name: - required: true - type: string - pytest-command: - required: true - type: string - pytest-markers: - required: true - type: string - python-version: - required: false - type: string - default: 3.9 - secrets: - mcloud-api-key: - required: true - slack-notifications-bot-token: - required: false - code-eval-device: - required: false - code-eval-url: - required: false - code-eval-apikey: - required: false -jobs: - pytest-gpu: - timeout-minutes: 60 # ${{ inputs.gha-timeout }} for some reason not able to turn this into an input - runs-on: ubuntu-latest - env: - MOSAICML_API_KEY: ${{ secrets.mcloud-api-key }} - steps: - - name: Checkout Repo - uses: actions/checkout@v3 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: ${{ inputs.python-version }} - - name: Cache pip - uses: actions/cache@v3 - with: - # This path is specific to Ubuntu - path: ~/.cache/pip - # Look to see if there is a cache hit for the corresponding requirements file - key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- - ${{ runner.os }}- - - name: Setup MCLI - run: | - set -ex - python -m pip install mosaicml-cli - mcli version - - name: Submit Run - id: tests - run: | - set -ex - - PR_NUMBER="$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH")" - REF_ARGS="" - - # Use the PR number if it exists, commit SHA for protected branches and the branch name otherwise - if [ -z "$PR_NUMBER" ] || [ "$PR_NUMBER" = "null" ]; then - if [[ "$GITHUB_REF" =~ "refs/heads/dev" || "$GITHUB_REF" =~ "refs/heads/main" || \ - "$GITHUB_REF" =~ "refs/heads/release" ]]; then - REF_ARGS="--git_commit $GITHUB_SHA" - else - REF_ARGS="--git_branch $GITHUB_REF_NAME" - fi - else - REF_ARGS="--pr_number $PR_NUMBER" - fi - - python .github/mcli/mcli_pytest.py --image '${{ inputs.container }}' --pip_package_name \ - '${{ inputs.composer_package_name }}' --pytest_markers '${{ inputs.pytest-markers }}' --pytest_command \ - '${{ inputs.pytest-command }}' --timeout ${{ inputs.mcloud-timeout }} ${REF_ARGS} diff --git a/.github/workflows/smoketest.yaml b/.github/workflows/smoketest.yaml index 566142511b..5a24f9e636 100644 --- a/.github/workflows/smoketest.yaml +++ b/.github/workflows/smoketest.yaml @@ -33,7 +33,7 @@ jobs: uses: actions/checkout@v3 with: repository: mosaicml/ci-testing - ref: v0.0.1 + ref: v0.0.2 path: ./ci-testing - uses: ./ci-testing/.github/actions/smoketest with: