From ccb5e5647e658c16c1910b4d5694596b03cfb428 Mon Sep 17 00:00:00 2001
From: Charles Tang <j316chuck@users.noreply.github.com>
Date: Wed, 21 Feb 2024 18:37:15 -0800
Subject: [PATCH 1/3] Bump composer version (#3048)

---
 composer/_version.py            |  2 +-
 docker/README.md                |  4 ++--
 docker/build_matrix.yaml        | 12 ++++++------
 docker/generate_build_matrix.py |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/composer/_version.py b/composer/_version.py
index 6a46c95e08..e638cdc5a7 100644
--- a/composer/_version.py
+++ b/composer/_version.py
@@ -3,4 +3,4 @@
 
 """The Composer Version."""
 
-__version__ = '0.19.1'
+__version__ = '0.19.2'
diff --git a/docker/README.md b/docker/README.md
index e66ab3049f..b7f7832c7b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
 <!-- BEGIN_COMPOSER_BUILD_MATRIX -->
 | Composer Version   | CUDA Support   | Docker Tag                                                     |
 |--------------------|----------------|----------------------------------------------------------------|
-| 0.19.1             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.19.1`         |
-| 0.19.1             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.19.1_cpu` |
+| 0.19.2             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.19.2`         |
+| 0.19.2             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.19.2_cpu` |
 <!-- END_COMPOSER_BUILD_MATRIX -->
 
 **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually
diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
index 13803e45db..765d6cf180 100644
--- a/docker/build_matrix.yaml
+++ b/docker/build_matrix.yaml
@@ -246,9 +246,9 @@
   TORCHVISION_VERSION: 0.18.0
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.1
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.2
   CUDA_VERSION: 12.1.0
-  IMAGE_NAME: composer-0-19-1
+  IMAGE_NAME: composer-0-19-2
   MOFED_VERSION: 5.5-1.0.3.2
   NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
     brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
@@ -269,15 +269,15 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.1.2
   TAGS:
-  - mosaicml/composer:0.19.1
+  - mosaicml/composer:0.19.2
   - mosaicml/composer:latest
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.16.2
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: ubuntu:20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.1
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.2
   CUDA_VERSION: ''
-  IMAGE_NAME: composer-0-19-1-cpu
+  IMAGE_NAME: composer-0-19-2-cpu
   MOFED_VERSION: 5.5-1.0.3.2
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.10'
@@ -285,7 +285,7 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.1.2
   TAGS:
-  - mosaicml/composer:0.19.1_cpu
+  - mosaicml/composer:0.19.2_cpu
   - mosaicml/composer:latest_cpu
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.16.2
diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
index d59219f811..cd4b0e0f27 100644
--- a/docker/generate_build_matrix.py
+++ b/docker/generate_build_matrix.py
@@ -261,7 +261,7 @@ def _main():
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
-    composer_versions = ['0.19.1']  # Only build images for the latest composer version
+    composer_versions = ['0.19.2']  # Only build images for the latest composer version
     composer_python_versions = [PRODUCTION_PYTHON_VERSION]  # just build composer against the latest
 
     for product in itertools.product(composer_python_versions, composer_versions, cuda_options):

From c0a9697940de5ea3e144210d26d533c3a651ac60 Mon Sep 17 00:00:00 2001
From: Bruce Fontaine <bruce@2.7182.net>
Date: Thu, 22 Feb 2024 11:17:18 -0800
Subject: [PATCH 2/3] Update XLA support (#2964)

* Fix initialization and microbatching for TPUs

* add version check for PyTortch XLA >= 2.1
---
 composer/devices/device_tpu.py |  1 +
 composer/trainer/trainer.py    |  5 +++++
 composer/utils/dist.py         | 17 +++++++++++++++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/composer/devices/device_tpu.py b/composer/devices/device_tpu.py
index b91d1bc478..813fc49924 100644
--- a/composer/devices/device_tpu.py
+++ b/composer/devices/device_tpu.py
@@ -26,6 +26,7 @@ class DeviceTPU(Device):
     More details.
     """
 
+    dist_backend = 'xla'
     name = 'tpu'
 
     def __init__(self):
diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py
index 7411dc4393..0d2349bf93 100644
--- a/composer/trainer/trainer.py
+++ b/composer/trainer/trainer.py
@@ -2567,6 +2567,11 @@ def _train_microbatch(self, use_grad_scaling: bool, current_batch_size: int,
                 microbatch_loss.mul_(microbatch_num_samples / current_batch_size)
                 microbatch_loss.backward(create_graph=self._backwards_create_graph)
 
+            if self.state.device.dist_backend == 'xla':
+                # For xla devices, the program between any pair of mark_steps() calls is compiled. With out this, the
+                # microbatching loop is unrolled, drastically increasing compile time.
+                xm.mark_step()
+
             self.engine.run_event(Event.AFTER_BACKWARD)
 
             # Use microbatch outputs to update training metrics
diff --git a/composer/utils/dist.py b/composer/utils/dist.py
index 65edb5e80c..5b8dd5df68 100644
--- a/composer/utils/dist.py
+++ b/composer/utils/dist.py
@@ -37,6 +37,7 @@
 import logging
 import os
 import pickle
+import sys
 import time
 from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, List, Optional, Sequence, TypeVar, Union, cast
@@ -44,8 +45,12 @@
 import torch
 import torch.distributed as dist
 import torch.utils.data
+from packaging import version
 
-from composer.utils.device import get_device, is_hpu_installed
+from composer.utils.device import get_device, is_hpu_installed, is_tpu_installed
+
+if is_tpu_installed():
+    import torch_xla
 
 if TYPE_CHECKING:
     from composer.devices import Device
@@ -534,7 +539,15 @@ def initialize_dist(device: Union[str, Device], timeout: float = 300.0):
 
     dist_env_vars_match_defaults = all(os.environ.get(k, v) == v for (k, v) in dist_env_var_defaults.items())
 
-    if dist_env_vars_match_defaults:
+    if device_obj.dist_backend == 'xla':
+        if not 'torch_xla' in sys.modules:
+            raise RuntimeError('PyTorch XLA package not found. In order to use XLA based devices '
+                               'PyTorch XLA must be installed.')
+        if version.parse(torch_xla.__version__) < version.parse('2.1.0'):
+            raise RuntimeError(f'PyTorch XLA version must be at least 2.1.0, found {torch_xla.__version__}.')
+        # XLA initialization requires the init_method to be set
+        dist.init_process_group(device_obj.dist_backend, init_method='xla://')
+    elif dist_env_vars_match_defaults:
         # Fill in the remaining single-rank variables
         os.environ.update(dist_env_var_defaults)
         dist.init_process_group(device_obj.dist_backend, store=dist.HashStore(), world_size=1, rank=0)

From d3987a0542b36ad8b07cab23ddc51679ac2cf61d Mon Sep 17 00:00:00 2001
From: Charles Tang <j316chuck@users.noreply.github.com>
Date: Thu, 22 Feb 2024 14:27:38 -0800
Subject: [PATCH 3/3] Bump composer version 0.20.0 (#3051)

---
 composer/_version.py            |  2 +-
 docker/README.md                |  4 ++--
 docker/build_matrix.yaml        | 12 ++++++------
 docker/generate_build_matrix.py |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/composer/_version.py b/composer/_version.py
index e638cdc5a7..cb43d310d0 100644
--- a/composer/_version.py
+++ b/composer/_version.py
@@ -3,4 +3,4 @@
 
 """The Composer Version."""
 
-__version__ = '0.19.2'
+__version__ = '0.20.0'
diff --git a/docker/README.md b/docker/README.md
index b7f7832c7b..d0624e2665 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -15,8 +15,8 @@ all dependencies for both NLP and Vision models. They are built on top of the
 <!-- BEGIN_COMPOSER_BUILD_MATRIX -->
 | Composer Version   | CUDA Support   | Docker Tag                                                     |
 |--------------------|----------------|----------------------------------------------------------------|
-| 0.19.2             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.19.2`         |
-| 0.19.2             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.19.2_cpu` |
+| 0.20.0             | Yes            | `mosaicml/composer:latest`, `mosaicml/composer:0.20.0`         |
+| 0.20.0             | No             | `mosaicml/composer:latest_cpu`, `mosaicml/composer:0.20.0_cpu` |
 <!-- END_COMPOSER_BUILD_MATRIX -->
 
 **Note**: For a lightweight installation, we recommended using a [MosaicML PyTorch Image](#pytorch-images) and manually
diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml
index 765d6cf180..21c36347e9 100644
--- a/docker/build_matrix.yaml
+++ b/docker/build_matrix.yaml
@@ -246,9 +246,9 @@
   TORCHVISION_VERSION: 0.18.0
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.2
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.0
   CUDA_VERSION: 12.1.0
-  IMAGE_NAME: composer-0-19-2
+  IMAGE_NAME: composer-0-20-0
   MOFED_VERSION: 5.5-1.0.3.2
   NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
     brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
@@ -269,15 +269,15 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.1.2
   TAGS:
-  - mosaicml/composer:0.19.2
+  - mosaicml/composer:0.20.0
   - mosaicml/composer:latest
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.16.2
 - AWS_OFI_NCCL_VERSION: ''
   BASE_IMAGE: ubuntu:20.04
-  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.19.2
+  COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.20.0
   CUDA_VERSION: ''
-  IMAGE_NAME: composer-0-19-2-cpu
+  IMAGE_NAME: composer-0-20-0-cpu
   MOFED_VERSION: 5.5-1.0.3.2
   NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
   PYTHON_VERSION: '3.10'
@@ -285,7 +285,7 @@
   PYTORCH_NIGHTLY_VERSION: ''
   PYTORCH_VERSION: 2.1.2
   TAGS:
-  - mosaicml/composer:0.19.2_cpu
+  - mosaicml/composer:0.20.0_cpu
   - mosaicml/composer:latest_cpu
   TARGET: composer_stage
   TORCHVISION_VERSION: 0.16.2
diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py
index cd4b0e0f27..ca378388c6 100644
--- a/docker/generate_build_matrix.py
+++ b/docker/generate_build_matrix.py
@@ -261,7 +261,7 @@ def _main():
     composer_entries = []
 
     # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images
-    composer_versions = ['0.19.2']  # Only build images for the latest composer version
+    composer_versions = ['0.20.0']  # Only build images for the latest composer version
     composer_python_versions = [PRODUCTION_PYTHON_VERSION]  # just build composer against the latest
 
     for product in itertools.product(composer_python_versions, composer_versions, cuda_options):