From d0f568f3dd0da30848278eb6897fec3a72c5054e Mon Sep 17 00:00:00 2001 From: Will Constable Date: Tue, 14 May 2024 21:03:26 -0700 Subject: [PATCH] try nvidia-cuda ghstack-source-id: 180109d062e87c9c02101a16a282b782edcc1022 Pull Request resolved: https://github.com/pytorch/torchtitan/pull/332 --- .github/workflows/unit_test_4gpu.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit_test_4gpu.yaml b/.github/workflows/unit_test_4gpu.yaml index cf077b0c6..5f280f77e 100644 --- a/.github/workflows/unit_test_4gpu.yaml +++ b/.github/workflows/unit_test_4gpu.yaml @@ -16,14 +16,20 @@ jobs: runner: linux.g5.12xlarge.nvidia.gpu gpu-arch-type: cuda gpu-arch-version: "12.1" - # This image is faster to clone than the default, but it lacks CC needed by triton - # (1m25s vs 2m37s) - docker-image: "pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime" + # Trying how much faster the nvidia-cuda image is + docker-image: "nvidia/cuda:12.4.1-runtime-ubuntu22.04" repository: "pytorch/torchtitan" upload-artifact: "outputs" - # conda create -n "test" python=3.10 - # conda activate test + # ~/miniconda3/bin/conda init bash script: | + apt update && apt install -y wget + mkdir -p ~/miniconda3 + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh + bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 + rm -rf ~/miniconda3/miniconda.sh + source ~/miniconda3/bin/activate + conda create -n "test" python=3.10 + conda activate test conda install -y -q git clang clangxx export CC=clang export CXX=clangxx