From 8539c3f4fac3f089897e84a2a4a0bd37ffbf9dda Mon Sep 17 00:00:00 2001 From: Marco Braga Date: Wed, 14 Jul 2021 18:28:41 -0300 Subject: [PATCH] refact: gather-monitoring by Prometheus replica --- collection-scripts/gather_monitoring | 48 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/collection-scripts/gather_monitoring b/collection-scripts/gather_monitoring index ed43549d..36044917 100755 --- a/collection-scripts/gather_monitoring +++ b/collection-scripts/gather_monitoring @@ -12,10 +12,6 @@ declare -r CA_BUNDLE="${MONITORING_PATH}/ca-bundle.crt" init() { mkdir -p "${MONITORING_PATH}" - PROMETHEUS_ROUTE="$(oc get routes \ - -n openshift-monitoring prometheus-k8s \ - -o jsonpath='{.status.ingress[0].host}')" - ALERT_MANAGER_ROUTE="$(oc get routes \ -n openshift-monitoring alertmanager-main \ -o jsonpath='{.status.ingress[0].host}')" @@ -34,20 +30,36 @@ cleanup() { rm "$CA_BUNDLE" } -prom_get() { +prom_get_by_replica() { + local replica="$1"; shift local object="$1"; shift local path="$1"; shift - local result_path="$MONITORING_PATH/prometheus/$path" - mkdir -p "$(dirname "$result_path")" + local result_path="${MONITORING_PATH}/prometheus/${path}" + mkdir -p "$(dirname "${result_path}")" - oc get \ - --certificate-authority="$CA_BUNDLE" \ - --token="${SA_TOKEN}" \ - --server="https://$PROMETHEUS_ROUTE" \ - --raw="/api/v1/$object" \ - > "$result_path.json" \ - 2> "$result_path.stderr" + oc --insecure-skip-tls-verify exec pod/${replica} \ + -c prometheus \ + -n openshift-monitoring \ + -- /bin/bash -c "curl -sG http://localhost:9090/api/v1/${object}" \ + > "${result_path}.json" \ + 2> "${result_path}.stderr" +} + +prometheus_gather_by_replica(){ + echo "Collecting metrics for each replica..." + local pods="$(oc get pods -n openshift-monitoring -l prometheus=k8s -o jsonpath='{.items[*].metadata.name}')" + + echo "Replicas found: $pods" + for POD in ${pods}; do + prom_get_by_replica ${POD} rules ${POD}/rules || true + prom_get_by_replica ${POD} alertmanagers ${POD}/alertmanagers || true + prom_get_by_replica ${POD} status/config ${POD}/status/config || true + prom_get_by_replica ${POD} status/flags ${POD}/status/flags || true + prom_get_by_replica ${POD} status/runtimeinfo ${POD}/status/runtimeinfo || true + prom_get_by_replica ${POD} status/buildinfo ${POD}/status/buildinfo || true + prom_get_by_replica ${POD} status/tsdb ${POD}/status/tsdb || true + done } alertmanager_get() { @@ -66,19 +78,13 @@ alertmanager_get() { 2> "$result_path.stderr" } - monitoring_gather(){ init # begin gathering # NOTE || true ignores failures - prom_get rules rules || true - prom_get alertmanagers alertmanagers || true - prom_get status/config status/config || true - prom_get status/flags status/flags || true - prom_get status/runtimeinfo status/runtimeinfo || true - prom_get status/tsdb status/tsdb || true + prometheus_gather_by_replica || true alertmanager_get status status || true