Skip to content

Commit

Permalink
add a new metrics for pending jobs by partition
Browse files Browse the repository at this point in the history
  • Loading branch information
vpenso committed Oct 14, 2020
1 parent 7f340d6 commit 99d8fde
Showing 1 changed file with 38 additions and 4 deletions.
42 changes: 38 additions & 4 deletions partitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,40 @@ func PartitionsData() []byte {
return out
}

func PartitionsPendingJobsData() []byte {
cmd := exec.Command("squeue","-a","-r","-h","-o%P","--states=PENDING")
stdout, err := cmd.StdoutPipe()
if err != nil {
log.Fatal(err)
}
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
out, _ := ioutil.ReadAll(stdout)
if err := cmd.Wait(); err != nil {
log.Fatal(err)
}
return out
}

type PartitionMetrics struct {
allocated float64
idle float64
other float64
pending float64
total float64
}

func ParsePartitionsMetrics(input []byte) map[string]*PartitionMetrics {
func ParsePartitionsMetrics() map[string]*PartitionMetrics {
partitions := make(map[string]*PartitionMetrics)
lines := strings.Split(string(input), "\n")
lines := strings.Split(string(PartitionsData()), "\n")
for _, line := range lines {
if strings.Contains(line,",") {
// name of a partition
partition := strings.Split(line,",")[0]
_,key := partitions[partition]
if !key {
partitions[partition] = &PartitionMetrics{0,0,0,0}
partitions[partition] = &PartitionMetrics{0,0,0,0,0}
}
states := strings.Split(line,",")[1]
allocated,_ := strconv.ParseFloat(strings.Split(states,"/")[0],64)
Expand All @@ -69,13 +86,25 @@ func ParsePartitionsMetrics(input []byte) map[string]*PartitionMetrics {
partitions[partition].total = total
}
}
// get list of pending jobs by partition name
list := strings.Split(string(PartitionsPendingJobsData()),"\n")
for _,partition := range list {
// accumulate the number of pending jobs
_,key := partitions[partition]
if key {
partitions[partition].pending += 1
}
}


return partitions
}

type PartitionsCollector struct {
allocated *prometheus.Desc
idle *prometheus.Desc
other *prometheus.Desc
pending *prometheus.Desc
total *prometheus.Desc
}

Expand All @@ -85,6 +114,7 @@ func NewPartitionsCollector() *PartitionsCollector {
allocated: prometheus.NewDesc("slurm_partition_cpus_allocated", "Allocated CPUs for partition", labels,nil),
idle: prometheus.NewDesc("slurm_partition_cpus_idle", "Idle CPUs for partition", labels,nil),
other: prometheus.NewDesc("slurm_partition_cpus_other", "Other CPUs for partition", labels,nil),
pending: prometheus.NewDesc("slurm_partition_jobs_pending", "Pending jobs for partition", labels,nil),
total: prometheus.NewDesc("slurm_partition_cpus_total", "Total CPUs for partition", labels,nil),
}
}
Expand All @@ -93,11 +123,12 @@ func (pc *PartitionsCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- pc.allocated
ch <- pc.idle
ch <- pc.other
ch <- pc.pending
ch <- pc.total
}

func (pc *PartitionsCollector) Collect(ch chan<- prometheus.Metric) {
pm := ParsePartitionsMetrics(PartitionsData())
pm := ParsePartitionsMetrics()
for p := range pm {
if pm[p].allocated > 0 {
ch <- prometheus.MustNewConstMetric(pc.allocated, prometheus.GaugeValue, pm[p].allocated, p)
Expand All @@ -108,6 +139,9 @@ func (pc *PartitionsCollector) Collect(ch chan<- prometheus.Metric) {
if pm[p].other > 0 {
ch <- prometheus.MustNewConstMetric(pc.other, prometheus.GaugeValue, pm[p].other, p)
}
if pm[p].pending > 0 {
ch <- prometheus.MustNewConstMetric(pc.pending, prometheus.GaugeValue, pm[p].pending, p)
}
if pm[p].total > 0 {
ch <- prometheus.MustNewConstMetric(pc.total, prometheus.GaugeValue, pm[p].total, p)
}
Expand Down

0 comments on commit 99d8fde

Please sign in to comment.