Skip to content

Commit

Permalink
Only broadcast distcp files (mosaicml#3130)
Browse files Browse the repository at this point in the history
* filter

* remove metadata skip
  • Loading branch information
mvpatel2000 authored and Ghelfi committed Mar 25, 2024
1 parent ccaeec5 commit 2fdbf45
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions composer/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,15 @@ def read_data(self, plan: LoadPlan, planner: LoadPlanner):
receiver = dist.get_global_rank() != rank_in_first_replica

# Send list of files to all ranks
file_list = [sorted(os.listdir(self.destination_path))]
file_list = [
file_name for file_name in sorted(os.listdir(self.destination_path)) if file_name.endswith('.distcp')
]
dist.broadcast_object_list(file_list, src=rank_in_first_replica, group=replicate_process_group)
file_list = file_list[0]
log.debug(f'List of files to broadcast: {file_list}')

# Send each file to the appropriate rank
for file_name in file_list:
if 'metadata' in file_name: # All ranks already have the metadata file
continue
if dist.get_local_rank() == 0: # Only 1 rank per node needs to transfer file
full_path = os.path.join(self.destination_path, file_name)
log.debug(f'Transferring {full_path=}')
Expand Down

0 comments on commit 2fdbf45

Please sign in to comment.