rename ac -> no_precompute_for_backward to avoid confusion with torch…

… native ac
pytorch · Jan 13, 2025 · 73715c6 · 73715c6
1 parent 9e19e7f
commit 73715c6
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/torchtitan/config_manager.py b/torchtitan/config_manager.py
@@ -573,7 +573,7 @@ def __init__(self):
             help="use the float8nocompile prototype implementation",
         )
         self.parser.add_argument(
-            "--float8.float8nocompile_ac",
+            "--float8.float8nocompile_no_precompute_for_backward",
             action="store_true",
             help="use activation checkpointing with float8nocompile linear layers",
         )

diff --git a/torchtitan/float8.py b/torchtitan/float8.py
@@ -48,7 +48,9 @@ def __init__(self, job_config: JobConfig, parallel_dims: ParallelDims):
             ) from e
 
         self.use_float8nocompile = float8_config.float8nocompile
-        self.use_float8nocompile_ac = float8_config.float8nocompile_ac
+        self.use_float8nocompile_no_precompute_for_backward = (
+            float8_config.float8nocompile_no_precompute_for_backward
+        )
 
         # Mutates the model inplace replacing instances of torch.nn.Linear with Float8Linear
         enable_fsdp_float8_all_gather = (
@@ -105,7 +107,7 @@ def convert_to_float8_training(self, model: nn.Module):
                 model,
                 config=self.config,
                 module_filter_fn=lambda mod, fqn: fqn != "output",
-                use_activation_checkpointing=self.use_float8nocompile_ac,
+                no_precompute_for_backward=self.use_float8nocompile_no_precompute_for_backward,
             )
         else:
             logger.info("Using float8 training")