From c47a05aa396381ec46340cfeb8360e9bd3b17354 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sun, 12 May 2024 12:49:32 +0200 Subject: [PATCH 1/3] Add `refresh_age` metric --- exporter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/exporter.py b/exporter.py index 51ddd43..fe829f1 100755 --- a/exporter.py +++ b/exporter.py @@ -152,6 +152,7 @@ def run(self): try: with generate_metrics_summary.labels(self.ccu_host).time(): self.generate_metrics() + self.refresh_time = time.time() except OSError as os_error: logging.info("Failed to generate metrics: {0}".format(os_error)) error_counter.labels(self.ccu_host).inc() @@ -183,6 +184,11 @@ def __init__(self, ccu_host, ccu_port, auth, gathering_interval, reload_names_in self.gathering_interval = int(gathering_interval) self.reload_names_interval = int(reload_names_interval) self.devicecount = Gauge('devicecount', 'Number of processed/supported devices', labelnames=['ccu'], namespace=self.METRICS_NAMESPACE) + # Upon request export the seconds since the last successful update. + # This is robust against internal crashes and can be used by the healthcheck. + self.refresh_time = time.time() + self.refresh_age = Gauge("refresh_age_seconds", "Seconds since the last successful refresh.", labelnames=["ccu"], namespace=self.METRICS_NAMESPACE) + self.refresh_age.labels(self.ccu_host).set_function(lambda: time.time() - self.refresh_time) def generate_metrics(self): logging.info("Gathering metrics") From 463a2e8f16e1e31028c7e5058bb5ee00a880a577 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sun, 12 May 2024 12:53:51 +0200 Subject: [PATCH 2/3] Add a healthcheck --- Dockerfile | 3 +++ healthcheck.sh | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 healthcheck.sh diff --git a/Dockerfile b/Dockerfile index 83039dc..63441cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,10 @@ COPY requirements.txt /tmp RUN pip3 install --no-cache-dir -r /tmp/requirements.txt COPY exporter.py /usr/local/bin/homematic_exporter +COPY healthcheck.sh /usr/local/bin/healthcheck.sh ENTRYPOINT [ "/usr/local/bin/homematic_exporter" ] EXPOSE 8010 +HEALTHCHECK --interval=20s --timeout=3s \ + CMD bash /usr/local/bin/healthcheck.sh diff --git a/healthcheck.sh b/healthcheck.sh new file mode 100644 index 0000000..4d4c84e --- /dev/null +++ b/healthcheck.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# First argument is the maximum age in seconds (default: 60) +maxage=${1:-60} + +# Get the age of the last successful metrics refresh in seconds +age=$(curl -s http://localhost:9040/metrics | grep 'homematic_refresh_age{' | cut -d ' ' -f2 | cut -d '.' -f1) + +if [[ $age -lt $maxage ]]; then + exit 0 +else + # Maximum age exceeded → unhealthy + exit 1 +fi From 36c4135f1508a9419b52091ebb8625ac85714503 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Wed, 22 May 2024 01:08:01 +0200 Subject: [PATCH 3/3] Include curl to fix healthcheck --- Dockerfile | 1 + healthcheck.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 63441cc..7d10f3d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ FROM python:3.12-slim-bookworm COPY requirements.txt /tmp +RUN apt-get update && apt-get install curl -y RUN pip3 install --no-cache-dir -r /tmp/requirements.txt COPY exporter.py /usr/local/bin/homematic_exporter diff --git a/healthcheck.sh b/healthcheck.sh index 4d4c84e..4834867 100644 --- a/healthcheck.sh +++ b/healthcheck.sh @@ -4,7 +4,7 @@ maxage=${1:-60} # Get the age of the last successful metrics refresh in seconds -age=$(curl -s http://localhost:9040/metrics | grep 'homematic_refresh_age{' | cut -d ' ' -f2 | cut -d '.' -f1) +age=$(curl -s http://localhost:9040/metrics | grep 'homematic_refresh_age_seconds{' | cut -d ' ' -f2 | cut -d '.' -f1) if [[ $age -lt $maxage ]]; then exit 0