-
Notifications
You must be signed in to change notification settings - Fork 4
/
app.js
126 lines (114 loc) · 4.03 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
'use strict';
const config = require('./config/config').initialize();
const exec = require('child_process').exec;
const CSV = require('csvtojson').Converter;
const StatsD = require('hot-shots');
const logger = config.logger.child({file:__filename});
const Client = new StatsD({
prefix: 'gpu.',
globalTags: []
});
const params = [
'count',
'name',
'pcie.link.width.current',
'pcie.link.gen.current',
'display_mode',
'display_active',
'driver_version',
'uuid',
'fan.speed',
'pstate',
'memory.total',
'memory.used',
'memory.free',
'utilization.gpu',
'temperature.gpu',
'power.draw',
'clocks.gr',
'clocks.sm',
'clocks.mem',
'clocks.video'
].join(',');
const query = `nvidia-smi --format=csv --query-gpu=${params}`;
function reportGpuStats(gpu){
const gpuUsed = parseInt(gpu['memory.used [MiB]'].replace(' MiB',''),10);
const gpuTotal = parseInt(gpu['memory.total [MiB]'].replace(' MiB',''),10);
const percentUsed = (gpuUsed / gpuTotal * 100).toFixed(2);
const tags = [
`gpus:${gpu.count}`,
`name:${gpu.name}`,
`driver:${gpu.driver_version}`,
`uuid:${gpu.uuid}`
];
if(gpu.display_mode !== ''){
tags.push(
`screen_connected:${gpu.display_mode}`,
`screen_active:${gpu.display_active}`
);
}
Client.gauge('fan', gpu['fan.speed [%]'], tags);
Client.gauge('pstate', gpu.pstate.replace('P',''), tags);
Client.gauge('pcie.generation', gpu['pcie.link.gen.current'], tags);
Client.gauge('pcie.speed', gpu['pcie.link.width.current'], tags);
Client.gauge('memory.used', gpuUsed, tags);
Client.gauge('memory.free', gpu['memory.free [MiB]'].replace(' MiB',''), tags);
Client.gauge('memory.total', gpuTotal, tags);
Client.gauge('memory.pct', percentUsed, tags);
Client.gauge('utilization', gpu['utilization.gpu [%]'].replace(' %',''), tags);
Client.gauge('temperature.celsius', gpu['temperature.gpu'], tags);
Client.gauge('temperature.fahrenheit', (gpu['temperature.gpu'] * 1.8 + 32).toFixed(0), tags);
Client.gauge('watts', gpu['power.draw [W]'].replace(' W',''), tags);
Client.gauge('clock.shader', gpu['clocks.current.graphics [MHz]'].replace(' MHz',''), tags);
Client.gauge('clock.streaming', gpu['clocks.current.sm [MHz]'].replace(' MHz',''), tags);
Client.gauge('clock.memory', gpu['clocks.current.memory [MHz]'].replace(' MHz',''), tags);
Client.gauge('clock.encoder', gpu['clocks.current.video [MHz]'].replace(' MHz',''), tags);
}
function parseData(data,next){
const Parser = new CSV({
flatKeys: true
});
Parser.fromString(data,(err,result) =>{
if(err){
logger.error({err},'Failed to parse CSV output');
console.log(data);
} else {
result.forEach((gpu) =>{
// Set unsupported returns to an empty string for simplicity
for(const key in gpu){
if(gpu.hasOwnProperty(key)){
const value = gpu[key];
if(value === '[Not Supported]'){
gpu[key] = '';
} else {
if(value === 'Enabled'){
gpu[key] = true;
} else if(value === 'Disabled'){
gpu[key] = false;
}
}
}
}
next(gpu);
});
}
});
}
function queryGpus(){
exec(query,(err,result) =>{
if(err){
logger.error({err}, 'Failed to get stats from GPUs');
} else {
parseData(result,reportGpuStats);
}
});
}
exec('nvidia-smi',(err) =>{
if(err){
throw new Error('nVidia SMI is not available, verify that it is part of your PATH environment variable');
} else {
logger.info('nVidia SMI found, beginning loop and reporting');
setInterval(queryGpus,5000);
queryGpus();
}
});