Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support open_clip with NPU backend #813

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/open_clip_train/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ def init_distributed_device(args):
else:
device = 'cuda:0'
torch.cuda.set_device(device)
elif args.device == "npu" and torch.npu.is_available():
if args.distributed and not args.no_set_device_rank:
device = 'npu:%d' % args.local_rank
else:
device = "npu:0"
torch.npu.set_device(device)
else:
device = 'cpu'
args.device = device
Expand Down
8 changes: 6 additions & 2 deletions src/open_clip_train/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import numpy as np
import torch
from torch import optim
from torch.cuda.amp import GradScaler

try:
import wandb
Expand Down Expand Up @@ -329,7 +328,12 @@ def main(args):
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)

scaler = GradScaler() if args.precision == "amp" else None
scaler = None
if args.precision == "amp":
try:
scaler = torch.amp.GradScaler(device=device)
except (AttributeError, TypeError) as e:
scaler = torch.cuda.amp.GradScaler()

# optionally resume from a checkpoint
start_epoch = 0
Expand Down
5 changes: 4 additions & 1 deletion src/open_clip_train/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ def parse_args(args):
parser.add_argument(
"--accum-freq", type=int, default=1, help="Update the model every --acum-freq steps."
)
parser.add_argument(
"--device", default="cuda", type=str, choices=["cpu", "cuda", "npu"], help="Accelerator to use."
)
# arguments for distributed training
parser.add_argument(
"--dist-url",
Expand All @@ -314,7 +317,7 @@ def parse_args(args):
help="url used to set up distributed training",
)
parser.add_argument(
"--dist-backend", default="nccl", type=str, help="distributed backend"
"--dist-backend", default="nccl", type=str, help="distributed backend. \"nccl\" for GPU, \"hccl\" for Ascend NPU"
)
parser.add_argument(
"--report-to",
Expand Down
6 changes: 4 additions & 2 deletions src/open_clip_train/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,16 @@ def profile_torch(model, text_input_size, image_input_size, batch_size=1, force_
def count_params(model):
return sum(m.numel() for m in model.parameters())

def profile_model(model_name, batch_size=1, profiler='torch'):
def profile_model(model_name, batch_size=1, profiler='torch', device="cuda"):
assert profiler in ['torch', 'fvcore'], 'Only torch and fvcore profilers are supported'
if profiler == 'fvcore':
assert fvcore is not None, 'Please install fvcore.'
model = open_clip.create_model(model_name, force_custom_text=True, pretrained_hf=False)
model.eval()
if torch.cuda.is_available():
model = model.cuda()
elif device == "npu" and torch.npu.is_available():
model = model.npu()

if isinstance(model.visual.image_size, (tuple, list)):
image_input_size = (3,) + tuple(model.visual.image_size[-2:])
Expand Down Expand Up @@ -217,7 +219,7 @@ def main():
print('='*100)
print(f'Profiling {m}')
try:
row = profile_model(m, batch_size=args.batch_size, profiler=args.profiler)
row = profile_model(m, batch_size=args.batch_size, profiler=args.profiler, device=args.device)
results.append(row)
except Exception as e:
print(f'Error profiling {m}: {e}')
Expand Down
Loading