Fix distributed issue for timm (#1653)

Co-authored-by: regisss <[email protected]>
huggingface · Dec 23, 2024 · 357a9d4 · 357a9d4
1 parent 1db6864
commit 357a9d4
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 12 deletions.
diff --git a/examples/pytorch-image-models/train_hpu_graph.py b/examples/pytorch-image-models/train_hpu_graph.py
@@ -635,10 +635,6 @@ def _parse_args():
     return args, args_text
 
 
-def setup():
-    dist.init_process_group(backend="hccl")
-
-
 def cleanup():
     dist.destroy_process_group()
 
@@ -663,8 +659,6 @@ def main():
         device = torch.device("hpu")
 
     if args.distributed:
-        setup()
-
         _logger.info(
             "Training in distributed mode with multiple processes, 1 device per process."
             f"Process {args.rank}, total {args.world_size}, device {args.device}."

diff --git a/examples/pytorch-image-models/train_hpu_lazy.py b/examples/pytorch-image-models/train_hpu_lazy.py
@@ -637,10 +637,6 @@ def _parse_args():
     return args, args_text
 
 
-def setup():
-    dist.init_process_group(backend="hccl")
-
-
 def cleanup():
     dist.destroy_process_group()
 
@@ -665,8 +661,6 @@ def main():
         device = torch.device("hpu")
 
     if args.distributed:
-        setup()
-
         _logger.info(
             "Training in distributed mode with multiple processes, 1 device per process."
             f"Process {args.rank}, total {args.world_size}, device {args.device}."