Skip to content

Commit

Permalink
Revert e7064fd, undoing style fixes
Browse files Browse the repository at this point in the history
To decrease the size of the pull request
  • Loading branch information
Rovanion committed Aug 4, 2021
1 parent 9afe3af commit ccf39b6
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 236 deletions.
132 changes: 66 additions & 66 deletions accounts.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,91 +17,91 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */
package main

import (
"github.com/prometheus/client_golang/prometheus"
"regexp"
"strconv"
"strings"
"strings"
"strconv"
"regexp"
"github.com/prometheus/client_golang/prometheus"
)

type JobMetrics struct {
pending float64
running float64
running_cpus float64
suspended float64
pending float64
running float64
running_cpus float64
suspended float64
}

func ParseAccountsMetrics(squeueOutput []byte) map[string]*JobMetrics {
accounts := make(map[string]*JobMetrics)
lines := strings.Split(string(squeueOutput), "\n")
for _, line := range lines {
if strings.Contains(line, "|") {
account := strings.Split(line, "|")[1]
_, key := accounts[account]
if !key {
accounts[account] = &JobMetrics{0, 0, 0, 0}
}
state := strings.Split(line, "|")[2]
state = strings.ToLower(state)
cpus, _ := strconv.ParseFloat(strings.Split(line, "|")[3], 64)
pending := regexp.MustCompile(`^pending`)
running := regexp.MustCompile(`^running`)
suspended := regexp.MustCompile(`^suspended`)
switch {
case pending.MatchString(state) == true:
accounts[account].pending++
case running.MatchString(state) == true:
accounts[account].running++
accounts[account].running_cpus += cpus
case suspended.MatchString(state) == true:
accounts[account].suspended++
}
}
}
return accounts
func ParseAccountsMetrics(input []byte) map[string]*JobMetrics {
accounts := make(map[string]*JobMetrics)
lines := strings.Split(string(input), "\n")
for _, line := range lines {
if strings.Contains(line,"|") {
account := strings.Split(line,"|")[1]
_,key := accounts[account]
if !key {
accounts[account] = &JobMetrics{0,0,0,0}
}
state := strings.Split(line,"|")[2]
state = strings.ToLower(state)
cpus,_ := strconv.ParseFloat(strings.Split(line,"|")[3],64)
pending := regexp.MustCompile(`^pending`)
running := regexp.MustCompile(`^running`)
suspended := regexp.MustCompile(`^suspended`)
switch {
case pending.MatchString(state) == true:
accounts[account].pending++
case running.MatchString(state) == true:
accounts[account].running++
accounts[account].running_cpus += cpus
case suspended.MatchString(state) == true:
accounts[account].suspended++
}
}
}
return accounts
}

func GetAccountsMetrics() map[string]*JobMetrics {
return ParseAccountsMetrics(Subprocess("squeue", "-a", "-r", "-h", "-o %A|%a|%T|%C"))
}

type AccountsCollector struct {
pending *prometheus.Desc
running *prometheus.Desc
running_cpus *prometheus.Desc
suspended *prometheus.Desc
pending *prometheus.Desc
running *prometheus.Desc
running_cpus *prometheus.Desc
suspended *prometheus.Desc
}

func NewAccountsCollector() *AccountsCollector {
labels := []string{"account"}
return &AccountsCollector{
pending: prometheus.NewDesc("slurm_account_jobs_pending", "Pending jobs for account", labels, nil),
running: prometheus.NewDesc("slurm_account_jobs_running", "Running jobs for account", labels, nil),
running_cpus: prometheus.NewDesc("slurm_account_cpus_running", "Running cpus for account", labels, nil),
suspended: prometheus.NewDesc("slurm_account_jobs_suspended", "Suspended jobs for account", labels, nil),
}
labels := []string{"account"}
return &AccountsCollector{
pending: prometheus.NewDesc("slurm_account_jobs_pending", "Pending jobs for account", labels, nil),
running: prometheus.NewDesc("slurm_account_jobs_running", "Running jobs for account", labels, nil),
running_cpus: prometheus.NewDesc("slurm_account_cpus_running", "Running cpus for account", labels, nil),
suspended: prometheus.NewDesc("slurm_account_jobs_suspended", "Suspended jobs for account", labels, nil),
}
}

func (ac *AccountsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- ac.pending
ch <- ac.running
ch <- ac.running_cpus
ch <- ac.suspended
ch <- ac.pending
ch <- ac.running
ch <- ac.running_cpus
ch <- ac.suspended
}

func (ac *AccountsCollector) Collect(ch chan<- prometheus.Metric) {
am := GetAccountsMetrics()
for a := range am {
if am[a].pending > 0 {
ch <- prometheus.MustNewConstMetric(ac.pending, prometheus.GaugeValue, am[a].pending, a)
}
if am[a].running > 0 {
ch <- prometheus.MustNewConstMetric(ac.running, prometheus.GaugeValue, am[a].running, a)
}
if am[a].running_cpus > 0 {
ch <- prometheus.MustNewConstMetric(ac.running_cpus, prometheus.GaugeValue, am[a].running_cpus, a)
}
if am[a].suspended > 0 {
ch <- prometheus.MustNewConstMetric(ac.suspended, prometheus.GaugeValue, am[a].suspended, a)
}
}
am := GetAccountsMetrics()
for a := range am {
if am[a].pending > 0 {
ch <- prometheus.MustNewConstMetric(ac.pending, prometheus.GaugeValue, am[a].pending, a)
}
if am[a].running > 0 {
ch <- prometheus.MustNewConstMetric(ac.running, prometheus.GaugeValue, am[a].running, a)
}
if am[a].running_cpus > 0 {
ch <- prometheus.MustNewConstMetric(ac.running_cpus, prometheus.GaugeValue, am[a].running_cpus, a)
}
if am[a].suspended > 0 {
ch <- prometheus.MustNewConstMetric(ac.suspended, prometheus.GaugeValue, am[a].suspended, a)
}
}
}
6 changes: 3 additions & 3 deletions gpus.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ func GetGPUsMetrics() *GPUsMetrics {

func NewGPUsCollector() *GPUsCollector {
return &GPUsCollector{
alloc: prometheus.NewDesc("slurm_gpus_alloc", "Allocated GPUs", nil, nil),
idle: prometheus.NewDesc("slurm_gpus_idle", "Idle GPUs", nil, nil),
total: prometheus.NewDesc("slurm_gpus_total", "Total GPUs", nil, nil),
alloc: prometheus.NewDesc("slurm_gpus_alloc", "Allocated GPUs", nil, nil),
idle: prometheus.NewDesc("slurm_gpus_idle", "Idle GPUs", nil, nil),
total: prometheus.NewDesc("slurm_gpus_total", "Total GPUs", nil, nil),
utilization: prometheus.NewDesc("slurm_gpus_utilization", "Total GPU utilization", nil, nil),
}
}
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ func init() {
// Metrics have to be registered to be exposed
prometheus.MustRegister(NewAccountsCollector()) // from accounts.go
prometheus.MustRegister(NewCPUsCollector()) // from cpus.go
prometheus.MustRegister(NewGPUsCollector()) // from gpus.go
prometheus.MustRegister(NewNodesCollector()) // from nodes.go
prometheus.MustRegister(NewNodeCollector()) // from node.go
prometheus.MustRegister(NewGPUsCollector()) // from gpus.go
prometheus.MustRegister(NewPartitionsCollector()) // from partitions.go
prometheus.MustRegister(NewQueueCollector()) // from queue.go
prometheus.MustRegister(NewSchedulerCollector()) // from scheduler.go
Expand Down
1 change: 0 additions & 1 deletion node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ func TestNodeMetrics(t *testing.T) {
t.Fatalf("Can not open test data: %v", err)
}
metrics := ParseNodeMetrics(data)
// t.Logf("%+v", metrics)

assert.Contains(t, metrics, "b001")
assert.Equal(t, uint64(327680), metrics["b001"].memAlloc)
Expand Down
144 changes: 72 additions & 72 deletions partitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,46 +23,46 @@ import (
)

type PartitionMetrics struct {
allocated float64
idle float64
other float64
pending float64
total float64
allocated float64
idle float64
other float64
pending float64
total float64
}

func ParsePartitionsMetrics(sinfoOutput []byte, squeueOutput []byte) map[string]*PartitionMetrics {
partitions := make(map[string]*PartitionMetrics)
lines := strings.Split(string(sinfoOutput), "\n")
for _, line := range lines {
if strings.Contains(line, ",") {
// name of a partition
partition := strings.Split(line, ",")[0]
_, key := partitions[partition]
if !key {
partitions[partition] = &PartitionMetrics{0, 0, 0, 0, 0}
}
states := strings.Split(line, ",")[1]
allocated, _ := strconv.ParseFloat(strings.Split(states, "/")[0], 64)
idle, _ := strconv.ParseFloat(strings.Split(states, "/")[1], 64)
other, _ := strconv.ParseFloat(strings.Split(states, "/")[2], 64)
total, _ := strconv.ParseFloat(strings.Split(states, "/")[3], 64)
partitions[partition].allocated = allocated
partitions[partition].idle = idle
partitions[partition].other = other
partitions[partition].total = total
}
}
// get list of pending jobs by partition name
list := strings.Split(string(squeueOutput), "\n")
for _, partition := range list {
// accumulate the number of pending jobs
_, key := partitions[partition]
if key {
partitions[partition].pending += 1
}
}
partitions := make(map[string]*PartitionMetrics)
lines := strings.Split(string(sinfoOutput),"\n")
for _, line := range lines {
if strings.Contains(line,",") {
// name of a partition
partition := strings.Split(line,",")[0]
_,key := partitions[partition]
if !key {
partitions[partition] = &PartitionMetrics{0,0,0,0,0}
}
states := strings.Split(line,",")[1]
allocated,_ := strconv.ParseFloat(strings.Split(states,"/")[0],64)
idle,_ := strconv.ParseFloat(strings.Split(states,"/")[1],64)
other,_ := strconv.ParseFloat(strings.Split(states,"/")[2],64)
total,_ := strconv.ParseFloat(strings.Split(states,"/")[3],64)
partitions[partition].allocated = allocated
partitions[partition].idle = idle
partitions[partition].other = other
partitions[partition].total = total
}
}
// get list of pending jobs by partition name
list := strings.Split(string(squeueOutput), "\n")
for _,partition := range list {
// accumulate the number of pending jobs
_, key := partitions[partition]
if key {
partitions[partition].pending += 1
}
}

return partitions
return partitions
}

func GetPartitionsMetrics() map[string]*PartitionMetrics {
Expand All @@ -73,49 +73,49 @@ func GetPartitionsMetrics() map[string]*PartitionMetrics {
}

type PartitionsCollector struct {
allocated *prometheus.Desc
idle *prometheus.Desc
other *prometheus.Desc
pending *prometheus.Desc
total *prometheus.Desc
allocated *prometheus.Desc
idle *prometheus.Desc
other *prometheus.Desc
pending *prometheus.Desc
total *prometheus.Desc
}

func NewPartitionsCollector() *PartitionsCollector {
labels := []string{"partition"}
return &PartitionsCollector{
allocated: prometheus.NewDesc("slurm_partition_cpus_allocated", "Allocated CPUs for partition", labels, nil),
idle: prometheus.NewDesc("slurm_partition_cpus_idle", "Idle CPUs for partition", labels, nil),
other: prometheus.NewDesc("slurm_partition_cpus_other", "Other CPUs for partition", labels, nil),
pending: prometheus.NewDesc("slurm_partition_jobs_pending", "Pending jobs for partition", labels, nil),
total: prometheus.NewDesc("slurm_partition_cpus_total", "Total CPUs for partition", labels, nil),
}
labels := []string{"partition"}
return &PartitionsCollector{
allocated: prometheus.NewDesc("slurm_partition_cpus_allocated", "Allocated CPUs for partition", labels,nil),
idle: prometheus.NewDesc("slurm_partition_cpus_idle", "Idle CPUs for partition", labels,nil),
other: prometheus.NewDesc("slurm_partition_cpus_other", "Other CPUs for partition", labels,nil),
pending: prometheus.NewDesc("slurm_partition_jobs_pending", "Pending jobs for partition", labels,nil),
total: prometheus.NewDesc("slurm_partition_cpus_total", "Total CPUs for partition", labels,nil),
}
}

func (pc *PartitionsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- pc.allocated
ch <- pc.idle
ch <- pc.other
ch <- pc.pending
ch <- pc.total
ch <- pc.allocated
ch <- pc.idle
ch <- pc.other
ch <- pc.pending
ch <- pc.total
}

func (pc *PartitionsCollector) Collect(ch chan<- prometheus.Metric) {
pm := GetPartitionsMetrics()
for p := range pm {
if pm[p].allocated > 0 {
ch <- prometheus.MustNewConstMetric(pc.allocated, prometheus.GaugeValue, pm[p].allocated, p)
}
if pm[p].idle > 0 {
ch <- prometheus.MustNewConstMetric(pc.idle, prometheus.GaugeValue, pm[p].idle, p)
}
if pm[p].other > 0 {
ch <- prometheus.MustNewConstMetric(pc.other, prometheus.GaugeValue, pm[p].other, p)
}
if pm[p].pending > 0 {
ch <- prometheus.MustNewConstMetric(pc.pending, prometheus.GaugeValue, pm[p].pending, p)
}
if pm[p].total > 0 {
ch <- prometheus.MustNewConstMetric(pc.total, prometheus.GaugeValue, pm[p].total, p)
}
}
pm := GetPartitionsMetrics()
for p := range pm {
if pm[p].allocated > 0 {
ch <- prometheus.MustNewConstMetric(pc.allocated, prometheus.GaugeValue, pm[p].allocated, p)
}
if pm[p].idle > 0 {
ch <- prometheus.MustNewConstMetric(pc.idle, prometheus.GaugeValue, pm[p].idle, p)
}
if pm[p].other > 0 {
ch <- prometheus.MustNewConstMetric(pc.other, prometheus.GaugeValue, pm[p].other, p)
}
if pm[p].pending > 0 {
ch <- prometheus.MustNewConstMetric(pc.pending, prometheus.GaugeValue, pm[p].pending, p)
}
if pm[p].total > 0 {
ch <- prometheus.MustNewConstMetric(pc.total, prometheus.GaugeValue, pm[p].total, p)
}
}
}
Loading

0 comments on commit ccf39b6

Please sign in to comment.