From f926708b05b01a8f906554251b192db26f521ccc Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:11:07 +0800 Subject: [PATCH 01/13] add new datadog autorecovery version field Signed-off-by: ericsyh --- .../templates/bookkeeper/_autorecovery.tpl | 47 +++++++++++++++++++ charts/sn-platform-slim/values.yaml | 1 + 2 files changed, 48 insertions(+) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl index 6f18f7756..7889defc3 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl @@ -20,6 +20,7 @@ ${HOSTNAME}.{{ template "pulsar.autorecovery.service" . }}.{{ template "pulsar.n {{/*Define autorecovery datadog annotation*/}} {{- define "pulsar.autorecovery.datadog.annotation" -}} {{- if .Values.datadog.components.autorecovery.enabled }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.init_configs: | @@ -63,6 +64,52 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_memory_direct_bytes_used": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "gauge", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/* diff --git a/charts/sn-platform-slim/values.yaml b/charts/sn-platform-slim/values.yaml index d532dc95d..523bad088 100644 --- a/charts/sn-platform-slim/values.yaml +++ b/charts/sn-platform-slim/values.yaml @@ -1788,6 +1788,7 @@ datadog: ## Datadog configuration. Set namespace to be prefixed to every metric when viewed in Datadog. ## https://docs.datadoghq.com/containers/kubernetes/prometheus/?tab=kubernetesadv2 namespace: + autodiscoveryVersion: v1 components: zookeeper: enabled: false From 5da7dc5edf9551f18c92cac444659124f56ba022 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:21:46 +0800 Subject: [PATCH 02/13] support on bookie Signed-off-by: ericsyh --- .../templates/bookkeeper/_bookkeeper.tpl | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl index 177cadd05..1ebedd1c9 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl @@ -170,6 +170,7 @@ Define bookkeeper log volumes {{/*Define bookkeeper datadog annotation*/}} {{- define "pulsar.bookkeeper.datadog.annotation" -}} {{- if .Values.datadog.components.bookkeeper.enabled }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.init_configs: | @@ -213,6 +214,52 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_memory_direct_bytes_used": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "gauge", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/*Define bookkeeper service account*/}} From 10b4dd4d852f67195b4c2c32705fa750ae81ce0f Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:27:59 +0800 Subject: [PATCH 03/13] support on broker Signed-off-by: ericsyh --- .../templates/broker/_broker.tpl | 42 +++++-------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/charts/sn-platform-slim/templates/broker/_broker.tpl b/charts/sn-platform-slim/templates/broker/_broker.tpl index dc418fe4f..6e4d82680 100644 --- a/charts/sn-platform-slim/templates/broker/_broker.tpl +++ b/charts/sn-platform-slim/templates/broker/_broker.tpl @@ -172,7 +172,7 @@ Define function worker config volume {{/*Define broker datadog annotation*/}} {{- define "pulsar.broker.datadog.annotation" -}} {{- if .Values.datadog.components.broker.enabled }} -{{- if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "openmetrics" }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | @@ -262,29 +262,13 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | ] } ] -{{- else if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "native" }} -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | - ["pulsar"] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | - [{}] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | - [ - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", - "enable_health_service_check": true, - "timeout": 300, - "tags": [ - "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" - ] - } - ] -{{- else if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "both" }} -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | - ["openmetrics", "pulsar"] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | - [{}, {}] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | - [ +{{- end }} +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ { "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} @@ -366,16 +350,10 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] - }, - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", - "enable_health_service_check": true, - "timeout": 300, - "tags": [ - "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" - ] } ] + } + } {{- end }} {{- end }} {{- end }} From f6e289b405bc13ce197a438581f84f2e271b6aeb Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:30:43 +0800 Subject: [PATCH 04/13] support on prometheus Signed-off-by: ericsyh --- .../templates/prometheus/_prometheus.tpl | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl index ddcbe604b..356337241 100644 --- a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl @@ -43,6 +43,7 @@ Define toolset token volumes {{/*Define federation datadog annotation*/}} {{- define "pulsar.prometheus.datadog.annotation" -}} {{- if .Values.datadog.components.prometheus.enabled }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.init_configs: | @@ -130,6 +131,96 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon } ] {{- end }} +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.prometheus.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "pulsar_topics_count": "gauge", + "pulsar_rate_in": "gauge", + "pulsar_rate_out": "gauge", + "pulsar_subscriptions_count": "gauge", + "pulsar_producers_count": "gauge", + "pulsar_consumers_count": "gauge", + "pulsar_throughput_in": "gauge", + "pulsar_throughput_out": "gauge", + "pulsar_storage_size": "gauge", + "pulsar_msg_backlog": "gauge", + "pulsar_storage_backlog_size": "gauge", + "pulsar_storage_offloaded_size": "gauge", + "pulsar_storage_write_latency_le_0_5": "gauge", + "pulsar_storage_write_latency_le_1": "gauge", + "pulsar_storage_write_latency_le_5": "gauge", + "pulsar_storage_write_latency_le_10": "gauge", + "pulsar_storage_write_latency_le_20": "gauge", + "pulsar_storage_write_latency_le_50": "gauge", + "pulsar_storage_write_latency_le_100": "gauge", + "pulsar_storage_write_latency_le_200": "gauge", + "pulsar_storage_write_latency_le_1000": "gauge", + "pulsar_storage_write_latency_overflow": "gauge", + "pulsar_entry_size_le_128": "gauge", + "pulsar_entry_size_le_512": "gauge", + "pulsar_entry_size_le_1_kb": "gauge", + "pulsar_entry_size_le_2_kb": "gauge", + "pulsar_entry_size_le_4_kb": "gauge", + "pulsar_entry_size_le_16_kb": "gauge", + "pulsar_entry_size_le_100_kb": "gauge", + "pulsar_entry_size_le_1_mb": "gauge", + "pulsar_entry_size_le_overflow": "gauge", + "pulsar_subscription_back_log": "gauge", + "pulsar_subscription_back_log_no_delayed": "gauge", + "pulsar_subscription_delayed": "gauge", + "pulsar_subscription_msg_rate_redeliver": "gauge", + "pulsar_subscription_unacked_messages": "gauge", + "pulsar_subscription_blocked_on_unacked_messages": "gauge", + "pulsar_subscription_msg_rate_out": "gauge", + "pulsar_subscription_msg_throughput_out": "gauge", + "pulsar_in_bytes_total": "counter", + "pulsar_in_messages_total": "counter", + "topic_load_times": "counter", + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_classes_loaded": "gauge", + "jvm_classes_loaded_total": "counter", + "jvm_classes_unloaded_total": "counter", + "jvm_buffer_pool_used_bytes": "gauge", + "jvm_buffer_pool_capacity_bytes": "gauge", + "jvm_buffer_pool_used_buffers": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "counter", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + } + } + ] + } + } +{{- end }} +{{- end }} {{- end }} From 5f004b4da7a27ac0519d86832aa9c5397837a722 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:35:50 +0800 Subject: [PATCH 05/13] support on proxy Signed-off-by: ericsyh --- .../templates/proxy/_proxy.tpl | 54 ++++--------------- 1 file changed, 9 insertions(+), 45 deletions(-) diff --git a/charts/sn-platform-slim/templates/proxy/_proxy.tpl b/charts/sn-platform-slim/templates/proxy/_proxy.tpl index b10ab2214..cb7ab7198 100644 --- a/charts/sn-platform-slim/templates/proxy/_proxy.tpl +++ b/charts/sn-platform-slim/templates/proxy/_proxy.tpl @@ -105,7 +105,7 @@ Define proxy datadog annotation */}} {{- define "pulsar.proxy.datadog.annotation" -}} {{- if .Values.datadog.components.proxy.enabled }} -{{- if eq (.Values.datadog.components.proxy.checkType | default "openmetrics") "openmetrics" }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | @@ -135,36 +135,13 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | ] } ] -{{- else if (.Values.datadog.components.proxy.checkType | default "openmetrics") "native" }} -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | - ["pulsar"] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | - [{}] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | - [ - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", - "enable_health_service_check": true, - "timeout": 300, -{{- if .Values.auth.authentication.enabled }} -{{- if eq .Values.auth.authentication.provider "jwt" }} - "extra_headers": { - "Authorization": "Bearer %%env_PROXY_TOKEN%%" - }, {{- end }} -{{- end }} - "tags": [ - "pulsar-proxy: {{ template "pulsar.fullname" . }}-{{ .Values.proxy.component }}" - ] - } - ] -{{- else if (.Values.datadog.components.proxy.checkType | default "openmetrics") "both" }} -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | - ["openmetrics", "pulsar"] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | - [{}, {}] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | - [ +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ { "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} @@ -182,27 +159,14 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | "Authorization": "Bearer %%env_PROXY_TOKEN%%" }, {{- end }} -{{- end }} - "tags": [ - "pulsar-proxy: {{ template "pulsar.fullname" . }}-{{ .Values.proxy.component }}" - ] - }, - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", - "enable_health_service_check": true, - "timeout": 300, -{{- if .Values.auth.authentication.enabled }} -{{- if eq .Values.auth.authentication.provider "jwt" }} - "extra_headers": { - "Authorization": "Bearer %%env_PROXY_TOKEN%%" - }, -{{- end }} {{- end }} "tags": [ "pulsar-proxy: {{ template "pulsar.fullname" . }}-{{ .Values.proxy.component }}" ] } ] + } + } {{- end }} {{- end }} {{- end }} From de819884ba88cf84d507d0ec4f14bedf5bc4bd82 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:37:44 +0800 Subject: [PATCH 06/13] support on zk Signed-off-by: ericsyh --- .../templates/zookeeper/_zookeeper.tpl | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl index e902a82f7..7cdfc9afd 100644 --- a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl @@ -119,6 +119,7 @@ Define zookeeper log volumes {{/*Define zookeeper datadog annotation*/}} {{- define "pulsar.zookeeper.datadog.annotation"}} {{- if .Values.datadog.components.zookeeper.enabled }} +{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.init_configs: | @@ -167,6 +168,57 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_classes_loaded": "gauge", + "jvm_classes_loaded_total": "counter", + "jvm_classes_unloaded_total": "counter", + "jvm_buffer_pool_used_bytes": "gauge", + "jvm_buffer_pool_capacity_bytes": "gauge", + "jvm_buffer_pool_used_buffers": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "counter", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/* From 8e518c52c9895b39267c4852d65a3d3a56287984 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 15:53:25 +0800 Subject: [PATCH 07/13] update Signed-off-by: ericsyh --- .../sn-platform-slim/templates/bookkeeper/_autorecovery.tpl | 4 ++-- charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl | 4 ++-- charts/sn-platform-slim/templates/broker/_broker.tpl | 4 ++-- charts/sn-platform-slim/templates/prometheus/_prometheus.tpl | 4 ++-- charts/sn-platform-slim/templates/proxy/_proxy.tpl | 4 ++-- charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl | 4 ++-- charts/sn-platform-slim/values.yaml | 4 +++- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl index 7889defc3..9653844a8 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl @@ -20,7 +20,7 @@ ${HOSTNAME}.{{ template "pulsar.autorecovery.service" . }}.{{ template "pulsar.n {{/*Define autorecovery datadog annotation*/}} {{- define "pulsar.autorecovery.datadog.annotation" -}} {{- if .Values.datadog.components.autorecovery.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.init_configs: | @@ -64,7 +64,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl index 1ebedd1c9..3f4b04f63 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl @@ -170,7 +170,7 @@ Define bookkeeper log volumes {{/*Define bookkeeper datadog annotation*/}} {{- define "pulsar.bookkeeper.datadog.annotation" -}} {{- if .Values.datadog.components.bookkeeper.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.init_configs: | @@ -214,7 +214,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/templates/broker/_broker.tpl b/charts/sn-platform-slim/templates/broker/_broker.tpl index 6e4d82680..12eb6a6e3 100644 --- a/charts/sn-platform-slim/templates/broker/_broker.tpl +++ b/charts/sn-platform-slim/templates/broker/_broker.tpl @@ -172,7 +172,7 @@ Define function worker config volume {{/*Define broker datadog annotation*/}} {{- define "pulsar.broker.datadog.annotation" -}} {{- if .Values.datadog.components.broker.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | @@ -263,7 +263,7 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl index 356337241..b69fb0535 100644 --- a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl @@ -43,7 +43,7 @@ Define toolset token volumes {{/*Define federation datadog annotation*/}} {{- define "pulsar.prometheus.datadog.annotation" -}} {{- if .Values.datadog.components.prometheus.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.init_configs: | @@ -131,7 +131,7 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/templates/proxy/_proxy.tpl b/charts/sn-platform-slim/templates/proxy/_proxy.tpl index cb7ab7198..8cd17e938 100644 --- a/charts/sn-platform-slim/templates/proxy/_proxy.tpl +++ b/charts/sn-platform-slim/templates/proxy/_proxy.tpl @@ -105,7 +105,7 @@ Define proxy datadog annotation */}} {{- define "pulsar.proxy.datadog.annotation" -}} {{- if .Values.datadog.components.proxy.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | @@ -136,7 +136,7 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl index 7cdfc9afd..cb44defcf 100644 --- a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl @@ -119,7 +119,7 @@ Define zookeeper log volumes {{/*Define zookeeper datadog annotation*/}} {{- define "pulsar.zookeeper.datadog.annotation"}} {{- if .Values.datadog.components.zookeeper.enabled }} -{{- if eq .Values.datadog.autodiscoveryVersion "v1" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.init_configs: | @@ -168,7 +168,7 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | } ] {{- end }} -{{- if eq .Values.datadog.autodiscoveryVersion "v2" }} +{{- if eq .Values.datadog.adVersion "v2" }} ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | { "openmetrics": { diff --git a/charts/sn-platform-slim/values.yaml b/charts/sn-platform-slim/values.yaml index 523bad088..80a33259d 100644 --- a/charts/sn-platform-slim/values.yaml +++ b/charts/sn-platform-slim/values.yaml @@ -1788,7 +1788,9 @@ datadog: ## Datadog configuration. Set namespace to be prefixed to every metric when viewed in Datadog. ## https://docs.datadoghq.com/containers/kubernetes/prometheus/?tab=kubernetesadv2 namespace: - autodiscoveryVersion: v1 + ## Datadog Autodiscovery version, support v1 and v2. + ## https://docs.datadoghq.com/getting_started/containers/autodiscovery/?tab=adannotationsv2agent736 + adVersion: v1 components: zookeeper: enabled: false From c21d3c9c8bce965f8e4c90c43c3c4b2b5a6f8f4f Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 16:06:02 +0800 Subject: [PATCH 08/13] apply on sn-platform Signed-off-by: ericsyh --- .../templates/bookkeeper/_autorecovery.tpl | 48 +++++++++- .../sn-platform/templates/broker/_broker.tpl | 42 ++------- .../templates/prometheus/_prometheus.tpl | 91 +++++++++++++++++++ charts/sn-platform/templates/proxy/_proxy.tpl | 62 +++---------- charts/sn-platform/templates/vault/_vault.tpl | 25 +++++ .../templates/zookeeper/_zookeeper.tpl | 53 ++++++++++- charts/sn-platform/values.yaml | 3 + 7 files changed, 241 insertions(+), 83 deletions(-) diff --git a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl index 7ddf8aec4..f055d77b3 100644 --- a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl @@ -19,7 +19,7 @@ ${HOSTNAME}.{{ template "pulsar.autorecovery.service" . }}.{{ template "pulsar.n {{/*Define autorecovery datadog annotation*/}} {{- define "pulsar.autorecovery.datadog.annotation" -}} -{{- if .Values.datadog.components.autorecovery.enabled }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.init_configs: | @@ -63,6 +63,52 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_memory_direct_bytes_used": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "gauge", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/* diff --git a/charts/sn-platform/templates/broker/_broker.tpl b/charts/sn-platform/templates/broker/_broker.tpl index dc418fe4f..12eb6a6e3 100644 --- a/charts/sn-platform/templates/broker/_broker.tpl +++ b/charts/sn-platform/templates/broker/_broker.tpl @@ -172,7 +172,7 @@ Define function worker config volume {{/*Define broker datadog annotation*/}} {{- define "pulsar.broker.datadog.annotation" -}} {{- if .Values.datadog.components.broker.enabled }} -{{- if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "openmetrics" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | @@ -262,29 +262,13 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | ] } ] -{{- else if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "native" }} -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | - ["pulsar"] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | - [{}] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | - [ - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", - "enable_health_service_check": true, - "timeout": 300, - "tags": [ - "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" - ] - } - ] -{{- else if eq (.Values.datadog.components.broker.checkType | default "openmetrics") "both" }} -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.check_names: | - ["openmetrics", "pulsar"] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | - [{}, {}] -ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | - [ +{{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ { "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} @@ -366,16 +350,10 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] - }, - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", - "enable_health_service_check": true, - "timeout": 300, - "tags": [ - "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" - ] } ] + } + } {{- end }} {{- end }} {{- end }} diff --git a/charts/sn-platform/templates/prometheus/_prometheus.tpl b/charts/sn-platform/templates/prometheus/_prometheus.tpl index ddcbe604b..b69fb0535 100644 --- a/charts/sn-platform/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform/templates/prometheus/_prometheus.tpl @@ -43,6 +43,7 @@ Define toolset token volumes {{/*Define federation datadog annotation*/}} {{- define "pulsar.prometheus.datadog.annotation" -}} {{- if .Values.datadog.components.prometheus.enabled }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.init_configs: | @@ -130,6 +131,96 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon } ] {{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.prometheus.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "pulsar_topics_count": "gauge", + "pulsar_rate_in": "gauge", + "pulsar_rate_out": "gauge", + "pulsar_subscriptions_count": "gauge", + "pulsar_producers_count": "gauge", + "pulsar_consumers_count": "gauge", + "pulsar_throughput_in": "gauge", + "pulsar_throughput_out": "gauge", + "pulsar_storage_size": "gauge", + "pulsar_msg_backlog": "gauge", + "pulsar_storage_backlog_size": "gauge", + "pulsar_storage_offloaded_size": "gauge", + "pulsar_storage_write_latency_le_0_5": "gauge", + "pulsar_storage_write_latency_le_1": "gauge", + "pulsar_storage_write_latency_le_5": "gauge", + "pulsar_storage_write_latency_le_10": "gauge", + "pulsar_storage_write_latency_le_20": "gauge", + "pulsar_storage_write_latency_le_50": "gauge", + "pulsar_storage_write_latency_le_100": "gauge", + "pulsar_storage_write_latency_le_200": "gauge", + "pulsar_storage_write_latency_le_1000": "gauge", + "pulsar_storage_write_latency_overflow": "gauge", + "pulsar_entry_size_le_128": "gauge", + "pulsar_entry_size_le_512": "gauge", + "pulsar_entry_size_le_1_kb": "gauge", + "pulsar_entry_size_le_2_kb": "gauge", + "pulsar_entry_size_le_4_kb": "gauge", + "pulsar_entry_size_le_16_kb": "gauge", + "pulsar_entry_size_le_100_kb": "gauge", + "pulsar_entry_size_le_1_mb": "gauge", + "pulsar_entry_size_le_overflow": "gauge", + "pulsar_subscription_back_log": "gauge", + "pulsar_subscription_back_log_no_delayed": "gauge", + "pulsar_subscription_delayed": "gauge", + "pulsar_subscription_msg_rate_redeliver": "gauge", + "pulsar_subscription_unacked_messages": "gauge", + "pulsar_subscription_blocked_on_unacked_messages": "gauge", + "pulsar_subscription_msg_rate_out": "gauge", + "pulsar_subscription_msg_throughput_out": "gauge", + "pulsar_in_bytes_total": "counter", + "pulsar_in_messages_total": "counter", + "topic_load_times": "counter", + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_classes_loaded": "gauge", + "jvm_classes_loaded_total": "counter", + "jvm_classes_unloaded_total": "counter", + "jvm_buffer_pool_used_bytes": "gauge", + "jvm_buffer_pool_capacity_bytes": "gauge", + "jvm_buffer_pool_used_buffers": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "counter", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + } + } + ] + } + } +{{- end }} +{{- end }} {{- end }} diff --git a/charts/sn-platform/templates/proxy/_proxy.tpl b/charts/sn-platform/templates/proxy/_proxy.tpl index 2b2f333b6..8cd17e938 100644 --- a/charts/sn-platform/templates/proxy/_proxy.tpl +++ b/charts/sn-platform/templates/proxy/_proxy.tpl @@ -105,7 +105,7 @@ Define proxy datadog annotation */}} {{- define "pulsar.proxy.datadog.annotation" -}} {{- if .Values.datadog.components.proxy.enabled }} -{{- if eq (.Values.datadog.components.proxy.checkType | default "openmetrics") "openmetrics" }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | @@ -123,8 +123,8 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | "health_service_check": true, "prometheus_timeout": 1000, "max_returned_metrics": 1000000, -{{- if and .Values.auth.authentication.enabled .Values.proxy.authenticateMetricsEndpoint.enabled }} -{{- if or (eq .Values.auth.authentication.provider "jwt") .Values.auth.vault.enabled }} +{{- if .Values.auth.authentication.enabled }} +{{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { "Authorization": "Bearer %%env_PROXY_TOKEN%%" }, @@ -135,36 +135,13 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | ] } ] -{{- else if (.Values.datadog.components.proxy.checkType | default "openmetrics") "native" }} -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | - ["pulsar"] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | - [{}] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | - [ - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", - "enable_health_service_check": true, - "timeout": 300, -{{- if and .Values.auth.authentication.enabled .Values.proxy.authenticateMetricsEndpoint.enabled }} -{{- if or (eq .Values.auth.authentication.provider "jwt") .Values.auth.vault.enabled }} - "extra_headers": { - "Authorization": "Bearer %%env_PROXY_TOKEN%%" - }, -{{- end }} {{- end }} - "tags": [ - "pulsar-proxy: {{ template "pulsar.fullname" . }}-{{ .Values.proxy.component }}" - ] - } - ] -{{- else if (.Values.datadog.components.proxy.checkType | default "openmetrics") "both" }} -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.check_names: | - ["openmetrics", "pulsar"] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | - [{}, {}] -ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | - [ +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ { "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} @@ -176,23 +153,8 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | "health_service_check": true, "prometheus_timeout": 1000, "max_returned_metrics": 1000000, -{{- if and .Values.auth.authentication.enabled .Values.proxy.authenticateMetricsEndpoint.enabled }} -{{- if or (eq .Values.auth.authentication.provider "jwt") .Values.auth.vault.enabled }} - "extra_headers": { - "Authorization": "Bearer %%env_PROXY_TOKEN%%" - }, -{{- end }} -{{- end }} - "tags": [ - "pulsar-proxy: {{ template "pulsar.fullname" . }}-{{ .Values.proxy.component }}" - ] - }, - { - "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", - "enable_health_service_check": true, - "timeout": 300, -{{- if and .Values.auth.authentication.enabled .Values.proxy.authenticateMetricsEndpoint.enabled }} -{{- if or (eq .Values.auth.authentication.provider "jwt") .Values.auth.vault.enabled }} +{{- if .Values.auth.authentication.enabled }} +{{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { "Authorization": "Bearer %%env_PROXY_TOKEN%%" }, @@ -203,6 +165,8 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | ] } ] + } + } {{- end }} {{- end }} {{- end }} diff --git a/charts/sn-platform/templates/vault/_vault.tpl b/charts/sn-platform/templates/vault/_vault.tpl index fdaf2bff1..47e250b72 100644 --- a/charts/sn-platform/templates/vault/_vault.tpl +++ b/charts/sn-platform/templates/vault/_vault.tpl @@ -25,6 +25,7 @@ Inject vault token values to pod through env variables {{/*Define vault datadog annotation*/}} {{- define "pulsar.vault.datadog.annotation" -}} {{- if .Values.datadog.components.vault.enabled }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/vault.check_names: | ["vault"] ad.datadoghq.com/vault.init_configs: | @@ -46,6 +47,30 @@ ad.datadoghq.com/vault.instances: | } ] {{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/vault.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "api_url": "http://%%host%%:8200/v1", + {{- if .Values.datadog.components.vault.auth.enabled }} + "client_token": {{ .Values.datadog.components.vault.auth.token }} + {{- else }} + "no_token": true + {{- end }} + {{- if .Values.datadog.components.vault.tags }} + "tags": [ +{{ toYaml .Values.datadog.components.vault.tags | indent 8 }} + ] + {{- end }} + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{- define "pulsar.vault-unseal-secret-key-name" -}} diff --git a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl index e902a82f7..8e48b9239 100644 --- a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl @@ -118,7 +118,7 @@ Define zookeeper log volumes {{/*Define zookeeper datadog annotation*/}} {{- define "pulsar.zookeeper.datadog.annotation"}} -{{- if .Values.datadog.components.zookeeper.enabled }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.init_configs: | @@ -167,6 +167,57 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_classes_loaded": "gauge", + "jvm_classes_loaded_total": "counter", + "jvm_classes_unloaded_total": "counter", + "jvm_buffer_pool_used_bytes": "gauge", + "jvm_buffer_pool_capacity_bytes": "gauge", + "jvm_buffer_pool_used_buffers": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "counter", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/* diff --git a/charts/sn-platform/values.yaml b/charts/sn-platform/values.yaml index 9f65b8d8c..09ef945b6 100644 --- a/charts/sn-platform/values.yaml +++ b/charts/sn-platform/values.yaml @@ -1869,6 +1869,9 @@ datadog: ## Datadog configuration. Set namespace to be prefixed to every metric when viewed in Datadog. ## https://docs.datadoghq.com/containers/kubernetes/prometheus/?tab=kubernetesadv2 namespace: + ## Datadog Autodiscovery version, support v1 and v2. + ## https://docs.datadoghq.com/getting_started/containers/autodiscovery/?tab=adannotationsv2agent736 + adVersion: v1 components: zookeeper: enabled: false From ad1114f97e7d531533784af9e2f1fc180988d964 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 16:12:36 +0800 Subject: [PATCH 09/13] fix lint Signed-off-by: ericsyh --- charts/sn-platform/templates/zookeeper/_zookeeper.tpl | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl index 8e48b9239..cb44defcf 100644 --- a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl @@ -118,6 +118,7 @@ Define zookeeper log volumes {{/*Define zookeeper datadog annotation*/}} {{- define "pulsar.zookeeper.datadog.annotation"}} +{{- if .Values.datadog.components.zookeeper.enabled }} {{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.check_names: | ["openmetrics"] From fb4890d855962df659e3586b32f286ad4c6da6ae Mon Sep 17 00:00:00 2001 From: ericsyh Date: Mon, 30 Dec 2024 16:15:58 +0800 Subject: [PATCH 10/13] fix lint Signed-off-by: ericsyh --- .../templates/bookkeeper/_autorecovery.tpl | 1 + .../templates/bookkeeper/_bookkeeper.tpl | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl index f055d77b3..5f9407b2e 100644 --- a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl @@ -19,6 +19,7 @@ ${HOSTNAME}.{{ template "pulsar.autorecovery.service" . }}.{{ template "pulsar.n {{/*Define autorecovery datadog annotation*/}} {{- define "pulsar.autorecovery.datadog.annotation" -}} +{{- if .Values.datadog.components.autorecovery.enabled }} {{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.check_names: | ["openmetrics"] diff --git a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl index 177cadd05..3f4b04f63 100644 --- a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl @@ -170,6 +170,7 @@ Define bookkeeper log volumes {{/*Define bookkeeper datadog annotation*/}} {{- define "pulsar.bookkeeper.datadog.annotation" -}} {{- if .Values.datadog.components.bookkeeper.enabled }} +{{- if eq .Values.datadog.adVersion "v1" }} ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.check_names: | ["openmetrics"] ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.init_configs: | @@ -213,6 +214,52 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | } ] {{- end }} +{{- if eq .Values.datadog.adVersion "v2" }} +ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | + { + "openmetrics": { + "init_config": [{}], + "instances": [ + { + "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + {{ if .Values.datadog.namespace -}} + "namespace": "{{ .Values.datadog.namespace }}", + {{ else -}} + "namespace": "{{ template "pulsar.namespace" . }}", + {{ end -}} + "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, + "health_service_check": true, + "prometheus_timeout": 1000, + "max_returned_metrics": 1000000, + "type_overrides": { + "jvm_memory_bytes_used": "gauge", + "jvm_memory_bytes_committed": "gauge", + "jvm_memory_bytes_max": "gauge", + "jvm_memory_bytes_init": "gauge", + "jvm_memory_pool_bytes_used": "gauge", + "jvm_memory_pool_bytes_committed": "gauge", + "jvm_memory_pool_bytes_max": "gauge", + "jvm_memory_pool_bytes_init": "gauge", + "jvm_memory_direct_bytes_used": "gauge", + "jvm_threads_current": "gauge", + "jvm_threads_daemon": "gauge", + "jvm_threads_peak": "gauge", + "jvm_threads_started_total": "gauge", + "jvm_threads_deadlocked": "gauge", + "jvm_threads_deadlocked_monitor": "gauge", + "jvm_gc_collection_seconds_count": "gauge", + "jvm_gc_collection_seconds_sum": "gauge", + "jvm_memory_direct_bytes_max": "gauge" + }, + "tags": [ + "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" + ] + } + ] + } + } +{{- end }} +{{- end }} {{- end }} {{/*Define bookkeeper service account*/}} From 76359e323aa89fa5bd1d10ada05ea3a01be351ee Mon Sep 17 00:00:00 2001 From: ericsyh Date: Tue, 31 Dec 2024 10:11:22 +0800 Subject: [PATCH 11/13] fix metrics type Signed-off-by: ericsyh --- .../sn-platform-slim/templates/bookkeeper/_autorecovery.tpl | 4 ++-- charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl | 4 ++-- charts/sn-platform/templates/bookkeeper/_autorecovery.tpl | 4 ++-- charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl index 9653844a8..808e44748 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl @@ -51,7 +51,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", @@ -94,7 +94,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", diff --git a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl index 3f4b04f63..dbec1a98b 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl @@ -201,7 +201,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", @@ -244,7 +244,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", diff --git a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl index 5f9407b2e..f9e1aef35 100644 --- a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl @@ -51,7 +51,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", @@ -94,7 +94,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", diff --git a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl index 3f4b04f63..dbec1a98b 100644 --- a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl @@ -201,7 +201,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", @@ -244,7 +244,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | "jvm_threads_current": "gauge", "jvm_threads_daemon": "gauge", "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "gauge", + "jvm_threads_started_total": "counter", "jvm_threads_deadlocked": "gauge", "jvm_threads_deadlocked_monitor": "gauge", "jvm_gc_collection_seconds_count": "gauge", From 8bcdb2c0ed9f82bcdd935a3156bd03a016a3a629 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Thu, 2 Jan 2025 10:40:46 +0800 Subject: [PATCH 12/13] update the dd v1 template Signed-off-by: ericsyh --- .../templates/bookkeeper/_autorecovery.tpl | 57 +------ .../templates/bookkeeper/_bookkeeper.tpl | 55 +------ .../templates/broker/_broker.tpl | 149 +---------------- .../templates/prometheus/_prometheus.tpl | 151 +----------------- .../templates/proxy/_proxy.tpl | 15 +- .../templates/zookeeper/_zookeeper.tpl | 65 +------- charts/sn-platform-slim/values.yaml | 12 +- 7 files changed, 44 insertions(+), 460 deletions(-) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl index 808e44748..1496fbda5 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl @@ -28,36 +28,15 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} + "enable_health_service_check": true, + "timeout": 1000, "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, "tags": [ "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" ] @@ -68,44 +47,22 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" ] } - ] + ] } } {{- end }} diff --git a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl index dbec1a98b..c8662e162 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl @@ -178,36 +178,15 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" ] @@ -218,39 +197,17 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" ] diff --git a/charts/sn-platform-slim/templates/broker/_broker.tpl b/charts/sn-platform-slim/templates/broker/_broker.tpl index 12eb6a6e3..e98eaa60e 100644 --- a/charts/sn-platform-slim/templates/broker/_broker.tpl +++ b/charts/sn-platform-slim/templates/broker/_broker.tpl @@ -180,83 +180,15 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.broker.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] @@ -267,86 +199,17 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.broker.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] diff --git a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl index b69fb0535..b2e54099c 100644 --- a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl @@ -57,77 +57,9 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} - "metrics": {{ .Values.datadog.components.prometheus.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - } + "enable_health_service_check": true, + "timeout": 1000, + "metrics": {{ .Values.datadog.components.prometheus.metrics }} } ] {{- end }} @@ -135,86 +67,17 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} - "metrics": {{ .Values.datadog.components.prometheus.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - } + "enable_health_service_check": true, + "timeout": 1000, + "metrics": {{ .Values.datadog.components.prometheus.metrics }} } ] } diff --git a/charts/sn-platform-slim/templates/proxy/_proxy.tpl b/charts/sn-platform-slim/templates/proxy/_proxy.tpl index 8cd17e938..efdc921a3 100644 --- a/charts/sn-platform-slim/templates/proxy/_proxy.tpl +++ b/charts/sn-platform-slim/templates/proxy/_proxy.tpl @@ -113,16 +113,15 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.proxy.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, + "enable_health_service_check": true, + "timeout": 1000, {{- if .Values.auth.authentication.enabled }} {{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { @@ -140,19 +139,17 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.proxy.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, + "enable_health_service_check": true, + "timeout": 1000, {{- if .Values.auth.authentication.enabled }} {{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { diff --git a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl index cb44defcf..0eb39e459 100644 --- a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl @@ -127,41 +127,15 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" ] @@ -172,44 +146,17 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | { "openmetrics": { - "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" ] diff --git a/charts/sn-platform-slim/values.yaml b/charts/sn-platform-slim/values.yaml index 80a33259d..0ab757848 100644 --- a/charts/sn-platform-slim/values.yaml +++ b/charts/sn-platform-slim/values.yaml @@ -1795,32 +1795,32 @@ datadog: zookeeper: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] bookkeeper: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] autorecovery: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] broker: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] proxy: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] prometheus: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] ## Monitoring Stack: Grafana From 731cbbe8eca7141089bf99d75cb71623fc60aad1 Mon Sep 17 00:00:00 2001 From: ericsyh Date: Thu, 2 Jan 2025 14:13:54 +0800 Subject: [PATCH 13/13] support on sn-platform chart Signed-off-by: ericsyh --- .../templates/bookkeeper/_autorecovery.tpl | 3 +- .../templates/bookkeeper/_bookkeeper.tpl | 1 + .../templates/broker/_broker.tpl | 1 + .../templates/prometheus/_prometheus.tpl | 1 + .../templates/proxy/_proxy.tpl | 1 + .../templates/zookeeper/_zookeeper.tpl | 1 + .../templates/bookkeeper/_autorecovery.tpl | 54 +------ .../templates/bookkeeper/_bookkeeper.tpl | 54 +------ .../sn-platform/templates/broker/_broker.tpl | 148 +---------------- .../templates/prometheus/_prometheus.tpl | 150 +----------------- charts/sn-platform/templates/proxy/_proxy.tpl | 14 +- .../templates/zookeeper/_zookeeper.tpl | 64 +------- charts/sn-platform/values.yaml | 12 +- 13 files changed, 50 insertions(+), 454 deletions(-) diff --git a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl index 1496fbda5..5d2a11831 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_autorecovery.tpl @@ -47,6 +47,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", @@ -62,7 +63,7 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" ] } - ] + ] } } {{- end }} diff --git a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl index c8662e162..448b338bb 100644 --- a/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform-slim/templates/bookkeeper/_bookkeeper.tpl @@ -197,6 +197,7 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", diff --git a/charts/sn-platform-slim/templates/broker/_broker.tpl b/charts/sn-platform-slim/templates/broker/_broker.tpl index e98eaa60e..0702d9d5f 100644 --- a/charts/sn-platform-slim/templates/broker/_broker.tpl +++ b/charts/sn-platform-slim/templates/broker/_broker.tpl @@ -199,6 +199,7 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", diff --git a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl index b2e54099c..9305b8c7f 100644 --- a/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform-slim/templates/prometheus/_prometheus.tpl @@ -67,6 +67,7 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.component }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", diff --git a/charts/sn-platform-slim/templates/proxy/_proxy.tpl b/charts/sn-platform-slim/templates/proxy/_proxy.tpl index efdc921a3..0eb638f85 100644 --- a/charts/sn-platform-slim/templates/proxy/_proxy.tpl +++ b/charts/sn-platform-slim/templates/proxy/_proxy.tpl @@ -139,6 +139,7 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", diff --git a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl index 0eb39e459..6d46d1b59 100644 --- a/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform-slim/templates/zookeeper/_zookeeper.tpl @@ -146,6 +146,7 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | { "openmetrics": { + "init_config": [{}], "instances": [ { "openmetrics_endpoint": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", diff --git a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl index f9e1aef35..646c71568 100644 --- a/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl +++ b/charts/sn-platform/templates/bookkeeper/_autorecovery.tpl @@ -28,36 +28,15 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} + "enable_health_service_check": true, + "timeout": 1000, "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, "tags": [ "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" ] @@ -71,36 +50,15 @@ ad.datadoghq.com/{{ template "pulsar.autorecovery.podName" . }}.checks: | "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.autorecovery.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.autorecovery.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-autorecovery: {{ template "pulsar.fullname" . }}-{{ .Values.autorecovery.component }}" ] diff --git a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl index dbec1a98b..448b338bb 100644 --- a/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl +++ b/charts/sn-platform/templates/bookkeeper/_bookkeeper.tpl @@ -178,36 +178,15 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" ] @@ -221,36 +200,15 @@ ad.datadoghq.com/{{ template "pulsar.bookkeeper.podName" . }}.checks: | "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.bookkeeper.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.bookkeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_memory_direct_bytes_used": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-bookie: {{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}" ] diff --git a/charts/sn-platform/templates/broker/_broker.tpl b/charts/sn-platform/templates/broker/_broker.tpl index 12eb6a6e3..0702d9d5f 100644 --- a/charts/sn-platform/templates/broker/_broker.tpl +++ b/charts/sn-platform/templates/broker/_broker.tpl @@ -180,83 +180,15 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.broker.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] @@ -270,83 +202,15 @@ ad.datadoghq.com/{{ template "pulsar.broker.podName" . }}.checks: | "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.broker.ports.http }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.broker.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-broker: {{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}" ] diff --git a/charts/sn-platform/templates/prometheus/_prometheus.tpl b/charts/sn-platform/templates/prometheus/_prometheus.tpl index b69fb0535..9305b8c7f 100644 --- a/charts/sn-platform/templates/prometheus/_prometheus.tpl +++ b/charts/sn-platform/templates/prometheus/_prometheus.tpl @@ -57,77 +57,9 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} - "metrics": {{ .Values.datadog.components.prometheus.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - } + "enable_health_service_check": true, + "timeout": 1000, + "metrics": {{ .Values.datadog.components.prometheus.metrics }} } ] {{- end }} @@ -138,83 +70,15 @@ ad.datadoghq.com/{{ template "pulsar.fullname" . }}-{{ .Values.prometheus.compon "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.prometheus.port }}/federate?match[]=%7B__name__%3D~%22pulsar_.%2B%7Cjvm_.%2B%7Ctopic_.%2B%22%7D", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} - "metrics": {{ .Values.datadog.components.prometheus.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "pulsar_topics_count": "gauge", - "pulsar_rate_in": "gauge", - "pulsar_rate_out": "gauge", - "pulsar_subscriptions_count": "gauge", - "pulsar_producers_count": "gauge", - "pulsar_consumers_count": "gauge", - "pulsar_throughput_in": "gauge", - "pulsar_throughput_out": "gauge", - "pulsar_storage_size": "gauge", - "pulsar_msg_backlog": "gauge", - "pulsar_storage_backlog_size": "gauge", - "pulsar_storage_offloaded_size": "gauge", - "pulsar_storage_write_latency_le_0_5": "gauge", - "pulsar_storage_write_latency_le_1": "gauge", - "pulsar_storage_write_latency_le_5": "gauge", - "pulsar_storage_write_latency_le_10": "gauge", - "pulsar_storage_write_latency_le_20": "gauge", - "pulsar_storage_write_latency_le_50": "gauge", - "pulsar_storage_write_latency_le_100": "gauge", - "pulsar_storage_write_latency_le_200": "gauge", - "pulsar_storage_write_latency_le_1000": "gauge", - "pulsar_storage_write_latency_overflow": "gauge", - "pulsar_entry_size_le_128": "gauge", - "pulsar_entry_size_le_512": "gauge", - "pulsar_entry_size_le_1_kb": "gauge", - "pulsar_entry_size_le_2_kb": "gauge", - "pulsar_entry_size_le_4_kb": "gauge", - "pulsar_entry_size_le_16_kb": "gauge", - "pulsar_entry_size_le_100_kb": "gauge", - "pulsar_entry_size_le_1_mb": "gauge", - "pulsar_entry_size_le_overflow": "gauge", - "pulsar_subscription_back_log": "gauge", - "pulsar_subscription_back_log_no_delayed": "gauge", - "pulsar_subscription_delayed": "gauge", - "pulsar_subscription_msg_rate_redeliver": "gauge", - "pulsar_subscription_unacked_messages": "gauge", - "pulsar_subscription_blocked_on_unacked_messages": "gauge", - "pulsar_subscription_msg_rate_out": "gauge", - "pulsar_subscription_msg_throughput_out": "gauge", - "pulsar_in_bytes_total": "counter", - "pulsar_in_messages_total": "counter", - "topic_load_times": "counter", - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - } + "enable_health_service_check": true, + "timeout": 1000, + "metrics": {{ .Values.datadog.components.prometheus.metrics }} } ] } diff --git a/charts/sn-platform/templates/proxy/_proxy.tpl b/charts/sn-platform/templates/proxy/_proxy.tpl index 8cd17e938..0eb638f85 100644 --- a/charts/sn-platform/templates/proxy/_proxy.tpl +++ b/charts/sn-platform/templates/proxy/_proxy.tpl @@ -113,16 +113,15 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.proxy.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, + "enable_health_service_check": true, + "timeout": 1000, {{- if .Values.auth.authentication.enabled }} {{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { @@ -143,16 +142,15 @@ ad.datadoghq.com/{{ template "pulsar.proxy.podName" . }}.checks: | "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.proxy.ports.http }}/metrics/", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.proxy.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, + "enable_health_service_check": true, + "timeout": 1000, {{- if .Values.auth.authentication.enabled }} {{- if eq .Values.auth.authentication.provider "jwt" }} "extra_headers": { diff --git a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl index cb44defcf..6d46d1b59 100644 --- a/charts/sn-platform/templates/zookeeper/_zookeeper.tpl +++ b/charts/sn-platform/templates/zookeeper/_zookeeper.tpl @@ -127,41 +127,15 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.init_configs: | ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.instances: | [ { - "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" ] @@ -175,41 +149,15 @@ ad.datadoghq.com/{{ template "pulsar.zookeeper.podName" . }}.checks: | "init_config": [{}], "instances": [ { - "prometheus_url": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", + "openmetrics_endpoint": "http://%%host%%:{{ .Values.zookeeper.ports.metrics }}/metrics", {{ if .Values.datadog.namespace -}} "namespace": "{{ .Values.datadog.namespace }}", {{ else -}} "namespace": "{{ template "pulsar.namespace" . }}", {{ end -}} "metrics": {{ .Values.datadog.components.zookeeper.metrics }}, - "health_service_check": true, - "prometheus_timeout": 1000, - "max_returned_metrics": 1000000, - "type_overrides": { - "jvm_memory_bytes_used": "gauge", - "jvm_memory_bytes_committed": "gauge", - "jvm_memory_bytes_max": "gauge", - "jvm_memory_bytes_init": "gauge", - "jvm_memory_pool_bytes_used": "gauge", - "jvm_memory_pool_bytes_committed": "gauge", - "jvm_memory_pool_bytes_max": "gauge", - "jvm_memory_pool_bytes_init": "gauge", - "jvm_classes_loaded": "gauge", - "jvm_classes_loaded_total": "counter", - "jvm_classes_unloaded_total": "counter", - "jvm_buffer_pool_used_bytes": "gauge", - "jvm_buffer_pool_capacity_bytes": "gauge", - "jvm_buffer_pool_used_buffers": "gauge", - "jvm_threads_current": "gauge", - "jvm_threads_daemon": "gauge", - "jvm_threads_peak": "gauge", - "jvm_threads_started_total": "counter", - "jvm_threads_deadlocked": "gauge", - "jvm_threads_deadlocked_monitor": "gauge", - "jvm_gc_collection_seconds_count": "gauge", - "jvm_gc_collection_seconds_sum": "gauge", - "jvm_memory_direct_bytes_max": "gauge" - }, + "enable_health_service_check": true, + "timeout": 1000, "tags": [ "pulsar-zookeeper: {{ template "pulsar.fullname" . }}-{{ .Values.zookeeper.component }}" ] diff --git a/charts/sn-platform/values.yaml b/charts/sn-platform/values.yaml index 09ef945b6..7a1ab9ed6 100644 --- a/charts/sn-platform/values.yaml +++ b/charts/sn-platform/values.yaml @@ -1876,27 +1876,27 @@ datadog: zookeeper: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] bookkeeper: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] autorecovery: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] broker: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] proxy: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] vault: enabled: false @@ -1907,7 +1907,7 @@ datadog: prometheus: enabled: false metrics: [ - "\"_*\"" + "\".*\"" ] ## Monitoring Stack: Grafana