-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathutils.py
131 lines (120 loc) · 3.98 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""Function to get GPU infomations.
"""
import time
from collections import namedtuple
from pprint import pprint
import psutil
import pynvml
pynvml.nvmlInit()
def get_infos():
"""Get all information about all your graphics cards.
Returns:
dict: The returned result is a dict with 3 keys: count, driver_version and devices:
count: Number of gpus found
driver_version: The version of the system’s graphics driver
devices: It's a list and every item is a namedtuple Device which has 10 fields, for exzample id, name and fan_speed etc.
It should be noted that the Process field is also a namedtuple which has 11 fields.
"""
infos = {}
Device = namedtuple(
"Device",
[
"id",
"name",
"free",
"used",
"total",
"gpu_util",
"temperature",
"fan_speed",
"power_usage",
"power_state",
"process",
],
)
Process = namedtuple(
"Process",
[
"pid",
"memory_percent",
"status",
"username",
"num_threads",
"cpu_num",
"cpu_percent",
"name",
"cmdline",
"used_gpu_mem",
"create_time",
],
)
driver_version = pynvml.nvmlSystemGetDriverVersion().decode()
device_count = pynvml.nvmlDeviceGetCount()
devices = []
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle).decode()
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
power_usage = pynvml.nvmlDeviceGetPowerUsage(
handle
) # Power usage in milliwatts mW
processes = pynvml.nvmlDeviceGetComputeRunningProcesses(
handle
) # Which processes are using the GPU
# process_info = [(item.pid, item.usedGpuMemory) for item in process_info]
process_info = []
for p in processes:
# append Process object to process_info
pid = p.pid
used_gpu_mem = p.usedGpuMemory
p = psutil.Process(pid=pid)
_ = p.cpu_percent()
time.sleep(0.05)
process_info.append(
Process(
pid=pid,
memory_percent=p.memory_percent(),
status=p.status(),
username=p.username(),
num_threads=p.num_threads(),
cpu_num=p.cpu_num(),
cpu_percent=p.cpu_percent(),
name=p.name(),
cmdline=" ".join(p.cmdline()),
used_gpu_mem=used_gpu_mem,
create_time=p.create_time(),
)
)
try:
fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
except pynvml.NVMLError_NotSupported as e:
fan_speed = None
power_usage = pynvml.nvmlDeviceGetPowerUsage(handle)
power_state = pynvml.nvmlDeviceGetPowerState(handle)
temperature = pynvml.nvmlDeviceGetTemperature(
handle, pynvml.NVML_TEMPERATURE_GPU
)
# Volatile Gpu-Util in output of nvidia-smi
gpu_util = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
devices.append(
Device(
id=i,
name=name,
free=mem_info.free,
used=mem_info.used,
total=mem_info.total,
gpu_util=gpu_util,
temperature=temperature,
fan_speed=fan_speed,
power_usage=power_usage,
power_state=power_state,
process=process_info,
)
)
infos["count"] = device_count
infos["driver_version"] = driver_version
infos["devices"] = devices
return infos
if __name__ == "__main__":
infos = get_infos()
pprint(infos, indent=4)