Skip to content

Commit

Permalink
Update collect_env.py
Browse files Browse the repository at this point in the history
Add HPU information to collect_env.py script
  • Loading branch information
michalkuligowski authored Oct 25, 2024
1 parent a5136ec commit 1e427c2
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions collect_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
'cuda_module_loading',
'nvidia_driver_version',
'nvidia_gpu_models',
'habana_hpu_models',
'habana_driver_version',
'cudnn_version',
'pip_version', # 'pip' or 'pip3'
'pip_packages',
Expand Down Expand Up @@ -251,6 +253,40 @@ def get_nvidia_smi():
return smi


def get_hpu_info():
command = ["hl-smi", "-q", "-d", "PRODUCT"]
try:
lines = subprocess.Popen(command, stdout=subprocess.PIPE, universal_newlines=True).stdout.readlines()
lines = [l.strip('\t') for l in lines]
hpu_count = None
hpu_model = None
hpu_driver = None
model_re = re.compile(r'Product Name.+?: (.+)')
count_re = re.compile(r'Attached AIPs.+?: (\d+)')
driver_re = re.compile(r'Driver Version.+?: (.+)')
for line in lines:
hpu_c = count_re.match(line)
if hpu_c:
hpu_count = hpu_c.group(1)

hpu_m = model_re.match(line)
if hpu_m:
hpu_model = hpu_m.group(1)

hpu_d = driver_re.match(line)
if hpu_d:
hpu_driver = hpu_d.group(1)

if hpu_model and hpu_count and hpu_driver:
break

if hpu_model is None:
return ('N/A', hpu_driver)
return (f'{hpu_count}x {hpu_model}', hpu_driver)
except Exception as e:
return ('N/A', 'N/A')


def get_rocm_version(run_lambda):
"""Returns the ROCm version if available, otherwise 'N/A'."""
return run_and_parse_first_match(run_lambda, 'hipcc --version',
Expand Down Expand Up @@ -556,6 +592,7 @@ def get_version_or_na(cfg, prefix):
vllm_version = get_vllm_version()
vllm_build_flags = summarize_vllm_build_flags()
gpu_topo = get_gpu_topo(run_lambda)
hpu_info=get_hpu_info()

return SystemEnv(
torch_version=version_str,
Expand All @@ -571,6 +608,8 @@ def get_version_or_na(cfg, prefix):
nvidia_gpu_models=get_gpu_info(run_lambda),
nvidia_driver_version=get_nvidia_driver_version(run_lambda),
cudnn_version=get_cudnn_version(run_lambda),
habana_hpu_models=hpu_info[0],
habana_driver_version=hpu_info[1],
hip_compiled_version=hip_compiled_version,
hip_runtime_version=hip_runtime_version,
miopen_runtime_version=miopen_runtime_version,
Expand Down Expand Up @@ -613,6 +652,8 @@ def get_version_or_na(cfg, prefix):
GPU models and configuration: {nvidia_gpu_models}
Nvidia driver version: {nvidia_driver_version}
cuDNN version: {cudnn_version}
HPU devices: {habana_hpu_models}
HPU driver version: {habana_driver_version}
HIP runtime version: {hip_runtime_version}
MIOpen runtime version: {miopen_runtime_version}
Is XNNPACK available: {is_xnnpack_available}
Expand Down

0 comments on commit 1e427c2

Please sign in to comment.