From 23e8befdc52d9ef4bba238a58da92b27c2ebe81d Mon Sep 17 00:00:00 2001 From: Matteo Bunino <48362942+matbun@users.noreply.github.com> Date: Mon, 20 May 2024 16:08:42 +0200 Subject: [PATCH] Interactive distrib ml (#140) Update tutorial --- tutorials/distributed-ml/torch-tutorial-0-basics/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tutorials/distributed-ml/torch-tutorial-0-basics/README.md b/tutorials/distributed-ml/torch-tutorial-0-basics/README.md index 366e8245..bee09dcf 100644 --- a/tutorials/distributed-ml/torch-tutorial-0-basics/README.md +++ b/tutorials/distributed-ml/torch-tutorial-0-basics/README.md @@ -21,7 +21,6 @@ If you want to use SLURM in interactive mode, do the following: ```bash # Allocate resources -$ salloc --account=intertwin --partition=batch --nodes=1 --ntasks-per-node=1 --cpus-per-task=4 --gpus-per-node=4 #--time=00:30:00 $ salloc --partition=batch --nodes=1 --account=intertwin --gres=gpu:4 --time=1:59:00 job ID is XXXX # Get a shell in the compute node (if using SLURM) @@ -29,14 +28,13 @@ $ srun --jobid XXXX --overlap --pty /bin/bash # Now you are inside the compute node # On JSC, you may need to load some modules... +ml --force purge ml Stages/2024 GCC OpenMPI CUDA/12 MPI-settings/CUDA Python HDF5 PnetCDF libaio mpi4py # ...before activating the Python environment (adapt this to your env name/path) source ../../../envAI_hdfml/bin/activate ``` - - To launch the training with torch DDP use: ```bash