diff --git a/charts/agent/Chart.yaml b/charts/agent/Chart.yaml index dded4fe8..d7b953ac 100644 --- a/charts/agent/Chart.yaml +++ b/charts/agent/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: agent description: Chart to install K8s collection stack based on Observe Agent type: application -version: 0.19.0 +version: 0.20.0 appVersion: "1.1.0" dependencies: - name: opentelemetry-collector diff --git a/charts/agent/README.md b/charts/agent/README.md index f29edb92..5fc33549 100644 --- a/charts/agent/README.md +++ b/charts/agent/README.md @@ -1,6 +1,6 @@ # agent -![Version: 0.19.0](https://img.shields.io/badge/Version-0.19.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.0](https://img.shields.io/badge/AppVersion-1.1.0-informational?style=flat-square) +![Version: 0.20.0](https://img.shields.io/badge/Version-0.20.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.1.0](https://img.shields.io/badge/AppVersion-1.1.0-informational?style=flat-square) > [!CAUTION] > This chart is under active development and is not meant to be installed yet. @@ -332,6 +332,15 @@ Chart to install K8s collection stack based on Observe Agent | node-logs-metrics.serviceAccount.create | bool | `false` | | | node-logs-metrics.serviceAccount.name | string | `"observe-agent-service-account"` | | | node.containers.logs.enabled | bool | `true` | | +| node.containers.logs.exclude | string | `"[]"` | | +| node.containers.logs.include | string | `"[\"/var/log/pods/*/*/*.log\", \"/var/log/kube-apiserver-audit.log\"]"` | | +| node.containers.logs.lookbackPeriod | string | `"24h"` | | +| node.containers.logs.maxLogSize | string | `"512kb"` | | +| node.containers.logs.retryOnFailure.enabled | bool | `true` | | +| node.containers.logs.retryOnFailure.initialInterval | string | `"1s"` | | +| node.containers.logs.retryOnFailure.maxElapsedTime | string | `"5m"` | | +| node.containers.logs.retryOnFailure.maxInterval | string | `"30s"` | | +| node.containers.logs.startAt | string | `"end"` | | | node.containers.metrics.enabled | bool | `true` | | | node.enabled | bool | `true` | | | node.metrics.enabled | bool | `true` | | diff --git a/charts/agent/templates/_node-logs-metrics-config.tpl b/charts/agent/templates/_node-logs-metrics-config.tpl index 173692c7..f031f53f 100644 --- a/charts/agent/templates/_node-logs-metrics-config.tpl +++ b/charts/agent/templates/_node-logs-metrics-config.tpl @@ -124,19 +124,21 @@ receivers: {{ end -}} {{- if .Values.node.containers.logs.enabled }} filelog: - exclude: [] - include: - - /var/log/pods/*/*/*.log - - /var/log/kube-apiserver-audit.log + exclude: {{ .Values.node.containers.logs.exclude }} + include: {{ .Values.node.containers.logs.include }} include_file_name: false include_file_path: true + exclude_older_than: {{ .Values.node.containers.logs.lookbackPeriod }} operators: - id: container-parser max_log_size: 102400 type: container retry_on_failure: - enabled: true - start_at: end + enabled: {{ .Values.node.containers.logs.retryOnFailure.enabled }} + initial_interval: {{ .Values.node.containers.logs.retryOnFailure.initialInterval }} + max_interval: {{ .Values.node.containers.logs.retryOnFailure.maxInterval }} + max_elapsed_time: {{ .Values.node.containers.logs.retryOnFailure.maxElapsedTime }} + start_at: {{ .Values.node.containers.logs.startAt }} storage: file_storage {{ end }} processors: diff --git a/charts/agent/values.yaml b/charts/agent/values.yaml index bd09a42a..c698a0bf 100644 --- a/charts/agent/values.yaml +++ b/charts/agent/values.yaml @@ -54,6 +54,26 @@ node: containers: logs: enabled: true + # log lines above this size will be truncated + maxLogSize: 512kb + # If true, the receiver will pause reading a file and attempt to resend the current batch of logs if it encounters an error from downstream components. + retryOnFailure: + enabled: true + # Time to wait after the first failure before retrying. + initialInterval: 1s + # Upper bound on retry backoff interval. Once this value is reached the delay between consecutive retries will remain constant at the specified value. + maxInterval: 30s + # Maximum amount of time (including retries) spent trying to send a logs batch to a downstream consumer. Once this value is reached, the data is discarded. Retrying never stops if set to 0. + maxElapsedTime: 5m + # A list of file glob patterns that match the file paths to be read. Need to put inside of '' to avoid helm stripping commas and quotes. + include: '["/var/log/pods/*/*/*.log", "/var/log/kube-apiserver-audit.log"]' + # A list of file glob patterns to exclude from reading. This is applied against the paths matched by include. Need to put inside of '' to avoid helm stripping commas and quotes. + exclude: '[]' + # time unit 1m, 1h + lookbackPeriod: 24h + # At startup, where to start reading logs from the file. Options are beginning or end. + startAt: end + metrics: enabled: true diff --git a/examples/agent/logs/README.md b/examples/agent/logs/README.md new file mode 100644 index 00000000..9c109fb2 --- /dev/null +++ b/examples/agent/logs/README.md @@ -0,0 +1,49 @@ +# Generating sample logs +This creates a number of pods that will generate different types of sample logs so we can test various logging scenarios and confirm our switches work as expected. + +## Create config map with script in it +``` +kubectl create configmap -n default log-generator-script --from-file=log-generator.sh +``` + +## Deploy pods that use script and pas env variables to set type, length, etc. +``` +kubectl apply -n default -f sample-log-pods.yaml +``` + +### Cleanup +``` +kubectl delete -n default configmap log-generator-script + +kubectl delete -n default -f sample-log-pods.yaml +``` + +## Deploy k8s monitoring with log collection enabled +The node-logs-values.yaml file has an include and exclude option set. The exclude pattern will eliminate all logs with path starting with "/var/log/pods/default_log-generator-csv". you can experiment with different patterns to see how to exclude/include. +``` +helm install logs-example -n k8smonitoring \ + --set observe.token.value=$TOKEN \ + --set observe.collectionEndpoint.value=$ENDPOINT \ + -f ./node-logs-values.yaml ../../../charts/agent + +helm upgrade logs-example -n k8smonitoring -f ./node-logs-values.yaml ../../../charts/agent +``` + +### Opal validation +``` +filter OBSERVATION_KIND = "otellogs" + +make_col debug_source:string(FIELDS.logs.attributes.debug_source) +filter debug_source = "pod_logs" +make_col logfilepath:string(FIELDS.logs.attributes['log.file.path']) +make_col k8spodname:string(FIELDS.resource.attributes['k8s.pod.name']) + +make_col k8snamespacename:string(FIELDS.resource.attributes['k8s.namespace.name']) + +filter contains(k8spodname, "log-generator") +``` +### Cleanup + +``` +helm delete logs-example -n k8smonitoring +``` diff --git a/examples/agent/logs/log-generator.sh b/examples/agent/logs/log-generator.sh new file mode 100644 index 00000000..6600c68c --- /dev/null +++ b/examples/agent/logs/log-generator.sh @@ -0,0 +1,95 @@ +#!/bin/sh + +# Set locale to use UTF-8 encoding +export LANG=en_US.UTF-8 +export LC_ALL=en_US.UTF-8 + +# Default values +LOG_TYPE=${LOG_TYPE:-"apache"} # Default to 'apache' logs if LOG_TYPE is not set +LOG_LENGTH=${LOG_LENGTH:-5} # Default to 100 lines if LOG_LENGTH is not set +LINE_LENGTH=${LINE_LENGTH:-50} # Default to 50 characters if LINE_LENGTH is not set +SLEEP_LENGTH=${SLEEP_LENGTH:-2} # Default to 2s if SLEEP_LENGTH is not set + +echo "LOG_TYPE = $LOG_TYPE" +echo "LOG_LENGTH = $LOG_LENGTH" +echo "LINE_LENGTH = $LINE_LENGTH" +echo "SLEEP_LENGTH = $SLEEP_LENGTH" + +# Function to generate a random string of specified length +generate_random_string() { + local length=$1 + < /dev/urandom tr -dc 'A-Za-z0-9' | head -c "$length" +} + +# Function to generate Apache logs +generate_apache_logs() { + for i in $(seq 1 $LOG_LENGTH); do + echo "127.0.0.1 - - [$(date +%d/%b/%Y:%H:%M:%S)] \"GET /index.html HTTP/1.1\" 200 $(generate_random_string $LINE_LENGTH)" + done +} + +# Function to generate Nginx logs +generate_nginx_logs() { + for i in $(seq 1 $LOG_LENGTH); do + echo "$(date +%d/%b/%Y:%H:%M:%S) | 127.0.0.1 | GET /home | HTTP/1.1 | 200 | $(generate_random_string $LINE_LENGTH)" + done +} + +# Function to generate JSON logs +generate_json_logs() { + for i in $(seq 1 $LOG_LENGTH); do + echo "{\"timestamp\": \"$(date +%Y-%m-%dT%H:%M:%S)\", \"level\": \"info\", \"message\": \"$(generate_random_string $LINE_LENGTH)\"}" + done +} + +# Function to generate CSV logs +generate_csv_logs() { + echo "timestamp, level, message" + for i in $(seq 1 $LOG_LENGTH); do + echo "$(date +%Y-%m-%dT%H:%M:%S), info, $(generate_random_string $LINE_LENGTH)" + done +} + +# Function to generate Custom logs +generate_custom_logs() { + for i in $(seq 1 $LOG_LENGTH); do + echo "CUSTOM_LOG | $(date +%Y-%m-%dT%H:%M:%S) | $(generate_random_string $LINE_LENGTH)" + done +} + +# Logic to select the log type +case "$LOG_TYPE" in + apache) + while true; do + generate_apache_logs + sleep $SLEEP_LENGTH + done + ;; + nginx) + while true; do + generate_nginx_logs + sleep $SLEEP_LENGTH + done + ;; + json) + while true; do + generate_json_logs + sleep $SLEEP_LENGTH + done + ;; + csv) + while true; do + generate_csv_logs + sleep $SLEEP_LENGTH + done + ;; + custom) + while true; do + generate_custom_logs + sleep $SLEEP_LENGTH + done + ;; + *) + echo "Unknown log type: $LOG_TYPE" + ;; +esac diff --git a/examples/agent/logs/node-logs-values.yaml b/examples/agent/logs/node-logs-values.yaml new file mode 100644 index 00000000..d2d6a29a --- /dev/null +++ b/examples/agent/logs/node-logs-values.yaml @@ -0,0 +1,61 @@ +cluster: + name: observe-agent-monitored-cluster + events: + pullInterval: 20m + enabled: true + metrics: + enabled: true + pod: + enabled: true + namespaceOverride: + value: k8smonitoring + +node: + containers: + logs: + enabled: true + # log lines above this size will be truncated + maxLogSize: 512kb + # If true, the receiver will pause reading a file and attempt to resend the current batch of logs if it encounters an error from downstream components. + retryOnFailure: true + # A list of file glob patterns that match the file paths to be read. + include: '["/var/log/pods/*/*/*.log", "/var/log/kube-apiserver-audit.log"]' + # A list of file glob patterns to exclude from reading. This is applied against the paths matched by include. + exclude: '["/var/log/pods/default_log-generator-csv*/**"]' + # time unit 1m, 1h, 1d + excludeOlderThan: 1d + # At startup, where to start reading logs from the file. Options are beginning or end. + startAt: end + metrics: + enabled: false + +agent: + selfMonitor: + enabled: true + +config: + global: + debug: + verbosity: normal + +cluster-events: + enabled: true + namespaceOverride: "k8smonitoring" + tolerations: + - key: "deployObserve" + operator: "Equal" + value: "notAllowed" + effect: "NoSchedule" + +cluster-metrics: + enabled: true + namespaceOverride: "k8smonitoring" + +node-logs-metrics: + enabled: true + namespaceOverride: "k8smonitoring" + + +monitor: + enabled: true + namespaceOverride: "k8smonitoring" diff --git a/examples/agent/logs/sample-log-pods.yaml b/examples/agent/logs/sample-log-pods.yaml new file mode 100644 index 00000000..948aea70 --- /dev/null +++ b/examples/agent/logs/sample-log-pods.yaml @@ -0,0 +1,235 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-apache + labels: + app.kubernetes.io/name: log-generator-apache +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: log-generator-apache + template: + metadata: + labels: + app.kubernetes.io/name: log-generator-apache + spec: + containers: + - name: log-generator-apache + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script # Mount the ConfigMap as a volume +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-nginx +spec: + replicas: 1 + selector: + matchLabels: + app: log-generator-nginx + template: + metadata: + labels: + app: log-generator-nginx + spec: + containers: + - name: log-generator-nginx + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-json +spec: + replicas: 1 + selector: + matchLabels: + app: log-generator-json + template: + metadata: + labels: + app: log-generator-json + spec: + containers: + - name: log-generator-json + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-csv +spec: + replicas: 1 + selector: + matchLabels: + app: log-generator-csv + template: + metadata: + labels: + app: log-generator-csv + spec: + containers: + - name: log-generator-csv + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-custom +spec: + replicas: 1 + selector: + matchLabels: + app: log-generator-custom + template: + metadata: + labels: + app: log-generator-custom + spec: + containers: + - name: log-generator-custom + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: log-generator-custom-600 +spec: + replicas: 1 + selector: + matchLabels: + app: log-generator-custom-600 + template: + metadata: + labels: + app: log-generator-custom-600 + spec: + containers: + - name: log-generator-custom-600 + image: busybox + command: ["/bin/sh", "-c"] + args: + - | + cp /scripts/log-generator.sh /tmp/log-generator.sh && chmod +x /tmp/log-generator.sh && /tmp/log-generator.sh + env: + - name: LOG_TYPE + value: "apache" # Default log format, can be overridden at runtime + - name: LOG_LENGTH + value: "1" + - name: LINE_LENGTH + value: "100" + - name: SLEEP_LENGTH + value: "2" + volumeMounts: + - name: script-volume + mountPath: /scripts + readOnly: true + volumes: + - name: script-volume + configMap: + name: log-generator-script diff --git a/examples/agent/pod_metrics/README.md b/examples/agent/pod_metrics/README.md index b38f5320..f9759f67 100644 --- a/examples/agent/pod_metrics/README.md +++ b/examples/agent/pod_metrics/README.md @@ -101,3 +101,8 @@ make_col metric:string(EXTRA.__name__) make_col k8s_namespace_name:string(EXTRA.k8s_namespace_name) make_col app_kubernetes_io_name:string(EXTRA.app_kubernetes_io_name) ``` + +### Cleanup +``` +helm delete pod-metrics-example -n k8smonitoring +``` diff --git a/examples/agent/pod_metrics/pod-metrics-values.yaml b/examples/agent/pod_metrics/pod-metrics-values.yaml index 4eb3b186..5cbf859d 100644 --- a/examples/agent/pod_metrics/pod-metrics-values.yaml +++ b/examples/agent/pod_metrics/pod-metrics-values.yaml @@ -38,9 +38,9 @@ agent: enabled: true config: -global: - debug: - verbosity: normal + global: + debug: + verbosity: normal cluster-events: enabled: true diff --git a/examples/agent/pod_metrics/sample-pod.yaml b/examples/agent/pod_metrics/sample-pod.yaml deleted file mode 100644 index 3018d477..00000000 --- a/examples/agent/pod_metrics/sample-pod.yaml +++ /dev/null @@ -1,73 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app.kubernetes.io/name: prometheus-example-app - name: prometheus-example-app -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: prometheus-example-app - template: - metadata: - labels: - app.kubernetes.io/name: prometheus-example-app - annotations: - observeinc_com_scrape: 'true' - observeinc_com_path: '/metrics' - observeinc_com_port: '8080' - spec: - containers: - - name: prometheus-example-app - image: quay.io/brancz/prometheus-example-app:v0.3.0 - ports: - - name: web - containerPort: 8080 ---- -apiVersion: v1 -kind: Service -metadata: - name: prometheus-example-app-service -spec: - selector: - app.kubernetes.io/name: prometheus-example-app - ports: - - protocol: TCP - port: 8080 # Exposed service port - targetPort: 8080 - name: metrics ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - name: caller-cronjob -spec: - schedule: "*/1 * * * *" # Runs every minute - jobTemplate: - spec: - template: - spec: - containers: - - name: caller - image: curlimages/curl:latest # A lightweight curl image - env: - - name: SLEEP_TIME - value: "10" # Sleep time in seconds - - name: LOOP_COUNT - value: "36" # Number of iterations - command: - - /bin/sh - - -c - - | - for i in $(seq 1 $LOOP_COUNT); do - curl http://prometheus-example-app-service:8080; # Adjust the URL and port as necessary - - # Second call on even numbers - if [ $((i % 2)) -eq 0 ]; then - curl http://prometheus-example-app-service:8080/err; # Second target service - echo "Second call on even #$i made." - fi - sleep $SLEEP_TIME; - done - restartPolicy: OnFailure