diff --git a/nemo/collections/llm/recipes/log/default.py b/nemo/collections/llm/recipes/log/default.py index 4d5e9223b535..94e595bdb811 100644 --- a/nemo/collections/llm/recipes/log/default.py +++ b/nemo/collections/llm/recipes/log/default.py @@ -32,7 +32,6 @@ def default_log( ) -> Config[nl.NeMoLogger]: ckpt = Config( nl.ModelCheckpoint, - save_best_model=False, save_last=True, save_top_k=10, every_n_train_steps=200, diff --git a/nemo/lightning/pytorch/strategies/fsdp_strategy.py b/nemo/lightning/pytorch/strategies/fsdp_strategy.py index 2a210c9bd7f0..d34d1716e6b4 100644 --- a/nemo/lightning/pytorch/strategies/fsdp_strategy.py +++ b/nemo/lightning/pytorch/strategies/fsdp_strategy.py @@ -216,7 +216,7 @@ def save_checkpoint( and self.trainer.state.fn == TrainerFn.FITTING and self.ckpt_save_optimizer ): - del checkpoint["optimizer_states"] + checkpoint["optimizer_states"] = {} checkpoint['optimizer'] = get_optimizer_state_dict(self.model, self.optimizers) pyt_to_mcore_state_dict(checkpoint['optimizer']['state'], prefix="optimizer.state.") diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index 4bf8c42ece02..3a0a0368bcef 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -634,7 +634,7 @@ def save_checkpoint( and self.trainer.state.fn == TrainerFn.FITTING and self.ckpt_save_optimizer ): - del checkpoint["optimizer_states"] + checkpoint["optimizer_states"] = {} checkpoint["optimizer"] = [self.optimizer_sharded_state_dict()] self.checkpoint_io.save_checkpoint(checkpoint, filepath, storage_options=storage_options)