Skip to content

Commit

Permalink
Tidy internal structure
Browse files Browse the repository at this point in the history
  • Loading branch information
paulgear authored Dec 27, 2023
2 parents 4e9488f + 4fdcdfc commit 8bbd60c
Show file tree
Hide file tree
Showing 6 changed files with 381 additions and 272 deletions.
186 changes: 13 additions & 173 deletions src/alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,9 @@
"""

import pprint
import sys

import line_protocol
import metrics

from io import TextIOWrapper
import outputs

from classifier import MetricClassifier

Expand Down Expand Up @@ -78,76 +75,25 @@
}


"""
Metric types for collectd
"""
_collectdtypes = {
"frequency": "frequency/frequency_offset",
"offset": "offset/time_offset",
"reach": "reachability/percent",
"rootdelay": "rootdelay/time_offset",
"rootdisp": "rootdisp/time_offset",
"runtime": "runtime/duration",
"stratum": "stratum/count",
"sysjitter": "sysjitter/time_offset",
"sysoffset": "sysoffset/time_offset",
}


"""
Peer metric types, used by both collectd & telegraf
"""
_peer_types = {
"backup": "peers/count-backup",
"excess": "peers/count-excess",
"false": "peers/count-false",
"invalid": "peers/count-invalid",
"outlier": "peers/count-outlier",
"pps": "peers/count-pps",
"survivor": "peers/count-survivor",
"sync": "peers/count-sync",
}

"""
Metric types and suffixes for prometheus
"""
_prometheus_types = {
"frequency": (None, "_hertz", "Frequency error of the local clock"),
"offset": (None, "_seconds", "Mean clock offset of peers"),
"reach": ("%", "_ratio", "Peer reachability over the last 8 polls"),
"rootdelay": (None, "_seconds", "Network delay to stratum 0 sources"),
"rootdisp": (None, "_seconds", "Maximum calculated offset from stratum 0 sources"),
"runtime": (None, "_duration_seconds", "Duration NTP service has been running"),
"stratum": ("i", None, "NTP stratum of this server"),
"sysjitter": (None, "_seconds", "RMS average of most recent system peer offset differences"),
"sysoffset": (None, "_seconds", "Current clock offset of selected system peer"),
}

"""
Metric types for telegraf
"""
_telegraf_types = {
"frequency": None,
"offset": None,
"reach": None,
"rootdelay": None,
"rootdisp": None,
"runtime": None,
"stratum": "i",
"sysjitter": None,
"sysoffset": None,
}


class NTPAlerter(object):
def __init__(self, checks):
self.checks = checks
self.mc = MetricClassifier(_metricdefs)
self.metrics = {}
self.objs = {}
self.prometheus_objs = {}

def collectmetrics(self, checkobjs, debug):
def alert(self, checkobjs: dict, output: outputs.Output, debug: bool = False) -> None:
"""
Produce the metrics
"""
self.collectmetrics(checkobjs=checkobjs)
self.mc.classify_metrics(self.metrics)
(m, rc) = self.mc.worst_metric(self.checks)
self.metrics["result"] = self.return_code()
output.send_summary_stats(self.metrics, debug)
output.send_peer_counts(self.metrics, debug)

def collectmetrics(self, checkobjs: dict, debug: bool = False) -> None:
"""
Get metrics from each registered metric source and add all relevant aliases.
"""
Expand Down Expand Up @@ -192,112 +138,6 @@ def custom_message_sync(self, result):
return "%s: Time is in sync with %s" % (result, self.objs["peers"].syncpeer())
return None

def alert(self, checkobjs, hostname, interval, format, telegraf_file, debug=False):
"""
Produce the metrics
"""
self.collectmetrics(checkobjs=checkobjs, debug=False)
self.mc.classify_metrics(self.metrics)
(m, rc) = self.mc.worst_metric(self.checks)
self.metrics["result"] = self.return_code()
if format == "collectd":
self.alert_collectd(hostname, interval)
elif format == "prometheus":
self.alert_prometheus(debug=debug)
elif format == "telegraf":
self.alert_telegraf(telegraf_file)
self.alert_peers(hostname, interval, format, telegraf_file, debug)

def alert_collectd(self, hostname, interval):
"""
Produce collectd output for the metrics
"""
for metric in sorted(_collectdtypes.keys()):
if metric in self.metrics:
print(
'PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f'
% (
hostname,
_collectdtypes[metric],
interval,
self.metrics[metric],
)
)

def set_prometheus_metric(self, name, description, value, peertype=None):
import prometheus_client

if name in self.prometheus_objs:
g = self.prometheus_objs[name]
if peertype is not None:
g = g.labels(peertype=peertype)
else:
if peertype is not None:
g = prometheus_client.Gauge(name, description, ["peertype"])
self.prometheus_objs[name] = g
g = g.labels(peertype=peertype)
else:
g = prometheus_client.Gauge(name, description)
self.prometheus_objs[name] = g
g.set(value)

def alert_prometheus(self, debug=False):
def emit_metric(name, description, metrictype, value, format):
if debug:
valuestr = format % (value,)
print("# HELP %s %s" % (name, description))
print("# TYPE %s gauge" % (name,))
print("%s %s" % (name, valuestr))
else:
self.set_prometheus_metric(name, description, value)

for metric in sorted(_prometheus_types.keys()):
if metric in self.metrics:
(metrictype, suffix, description) = _prometheus_types[metric]
s = "ntpmon_" + metric
if suffix is not None:
s += suffix
val = self.metrics[metric]
fmt = "%.9f"
if metrictype == "i":
fmt = "%d"
elif metrictype == "%":
val /= 100
emit_metric(s, description, metrictype, val, fmt)

def alert_telegraf(self, telegraf_file: TextIOWrapper):
telegraf_metrics = {k: self.metrics[k] for k in sorted(_telegraf_types.keys()) if k in self.metrics}
output = line_protocol.to_line_protocol(telegraf_metrics, "ntpmon")
print(output, file=telegraf_file)

def alert_peers(self, hostname, interval, format, telegraf_file, debug=False):
if debug and format == "prometheus":
print("# TYPE ntpmon_peers gauge")
for metric in _peer_types:
value = self.metrics.get(metric)
if format == "collectd":
print(
'PUTVAL "%s/ntpmon-%s" interval=%d N:%.9f'
% (
hostname,
_peer_types[metric],
interval,
value,
)
)
elif format == "prometheus":
if debug:
print('ntpmon_peers{peertype="%s"} %d' % (metric, value))
else:
self.set_prometheus_metric("ntpmon_peers", "NTP peer count", value, metric)
elif format == "telegraf":
telegraf_metrics = {
"count": value,
"peertype": metric,
}
output = line_protocol.to_line_protocol(telegraf_metrics, "ntpmon_peers")
print(output, file=telegraf_file)

def alert_nagios(self, checkobjs, debug):
"""
Produce nagios output for the metrics
Expand Down
5 changes: 4 additions & 1 deletion src/line_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ def to_line_protocol(metrics: dict, which: str, additional_tags: dict = {}) -> s
timestamp = f" {seconds}{nanoseconds:09}"
else:
timestamp = ""
return f"{which},{format_tags(metrics, additional_tags)} {format_fields(metrics)}{timestamp}"
tags = format_tags(metrics, additional_tags)
if len(tags):
tags = "," + tags
return f"{which}{tags} {format_fields(metrics)}{timestamp}"


def transform_identifier(id: str) -> str:
Expand Down
Loading

0 comments on commit 8bbd60c

Please sign in to comment.