diff --git a/Dockerfile b/Dockerfile index 83039dc..7d10f3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,13 @@ FROM python:3.12-slim-bookworm COPY requirements.txt /tmp +RUN apt-get update && apt-get install curl -y RUN pip3 install --no-cache-dir -r /tmp/requirements.txt COPY exporter.py /usr/local/bin/homematic_exporter +COPY healthcheck.sh /usr/local/bin/healthcheck.sh ENTRYPOINT [ "/usr/local/bin/homematic_exporter" ] EXPOSE 8010 +HEALTHCHECK --interval=20s --timeout=3s \ + CMD bash /usr/local/bin/healthcheck.sh diff --git a/exporter.py b/exporter.py index 51ddd43..fe829f1 100755 --- a/exporter.py +++ b/exporter.py @@ -152,6 +152,7 @@ def run(self): try: with generate_metrics_summary.labels(self.ccu_host).time(): self.generate_metrics() + self.refresh_time = time.time() except OSError as os_error: logging.info("Failed to generate metrics: {0}".format(os_error)) error_counter.labels(self.ccu_host).inc() @@ -183,6 +184,11 @@ def __init__(self, ccu_host, ccu_port, auth, gathering_interval, reload_names_in self.gathering_interval = int(gathering_interval) self.reload_names_interval = int(reload_names_interval) self.devicecount = Gauge('devicecount', 'Number of processed/supported devices', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) + # Upon request export the seconds since the last successful update. + # This is robust against internal crashes and can be used by the healthcheck. + self.refresh_time = time.time() + self.refresh_age = Gauge("refresh_age_seconds", "Seconds since the last successful refresh.", labelnames=["ccu"], namespace=self.METRICS_NAMESPACE) + self.refresh_age.labels(self.ccu_host).set_function(lambda: time.time() - self.refresh_time) def generate_metrics(self): logging.info("Gathering metrics") diff --git a/healthcheck.sh b/healthcheck.sh new file mode 100755 index 0000000..4834867 --- /dev/null +++ b/healthcheck.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# First argument is the maximum age in seconds (default: 60) +maxage=${1:-60} + +# Get the age of the last successful metrics refresh in seconds +age=$(curl -s http://localhost:9040/metrics | grep 'homematic_refresh_age_seconds{' | cut -d ' ' -f2 | cut -d '.' -f1) + +if [[ $age -lt $maxage ]]; then + exit 0 +else + # Maximum age exceeded → unhealthy + exit 1 +fi