From 3c0172d5a8ef37f44cf71f7ffb6afc20d548c3d8 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Tue, 14 May 2024 13:05:45 -0700 Subject: [PATCH] Use torch generic workflow for CI ghstack-source-id: 3a4a6a3d7e557386bb78e9ad629bd9af429cade2 Pull Request resolved: https://github.com/pytorch/torchtitan/pull/325 --- .github/workflows/unit_test_4gpu.yaml | 82 ++++++++++++++++----------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/.github/workflows/unit_test_4gpu.yaml b/.github/workflows/unit_test_4gpu.yaml index 0088bb3ee..198e73781 100644 --- a/.github/workflows/unit_test_4gpu.yaml +++ b/.github/workflows/unit_test_4gpu.yaml @@ -1,42 +1,58 @@ name: 4 GPU Unit Test + on: push: branches: [ main ] pull_request: -concurrency: - group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} - cancel-in-progress: true +jobs: + build-test: + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.12xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.6" + repository: "pytorch/torchtitan" + script: | + pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 + python -m pip install -r requirements.txt + python -m pip install -r dev-requirements.txt + python ./test_runner.py -defaults: - run: - shell: bash -l -eo pipefail {0} -jobs: - unit_tests_4gpu: - runs-on: linux.g5.12xlarge.nvidia.gpu - strategy: - matrix: - python-version: ['3.10'] - steps: - - name: Check out repo - uses: actions/checkout@v3 - - name: Setup conda env - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - miniconda-version: "latest" - activate-environment: test - python-version: ${{ matrix.python-version }} - - name: Update pip - run: python -m pip install --upgrade pip - - name: Install dependencies - run: | - pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 - python -m pip install -r requirements.txt - python -m pip install -r dev-requirements.txt - - name: Run test_runner.py - run: python ./test_runner.py - - name: Upload Coverage to Codecov - uses: codecov/codecov-action@v3 +# concurrency: +# group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} +# cancel-in-progress: true + +# defaults: +# run: +# shell: bash -l -eo pipefail {0} + +# jobs: +# unit_tests_4gpu: +# runs-on: linux.g5.12xlarge.nvidia.gpu +# strategy: +# matrix: +# python-version: ['3.10'] +# steps: +# - name: Check out repo +# uses: actions/checkout@v3 +# - name: Setup conda env +# uses: conda-incubator/setup-miniconda@v2 +# with: +# auto-update-conda: true +# miniconda-version: "latest" +# activate-environment: test +# python-version: ${{ matrix.python-version }} +# - name: Update pip +# run: python -m pip install --upgrade pip +# - name: Install dependencies +# run: | +# pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121 +# python -m pip install -r requirements.txt +# python -m pip install -r dev-requirements.txt +# - name: Run test_runner.py +# run: python ./test_runner.py +# - name: Upload Coverage to Codecov +# uses: codecov/codecov-action@v3