From 862722d758882bfb533174fca760a0cc175ac905 Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 13:50:41 -0700 Subject: [PATCH 1/9] Move vulnerability processing into cronjob - vuln processing requires 4Gi RAM. Fleet can run fine with less for most items - Add "dedicated" flag and default to true - Allow user to customize vulnProcessing resources independently from main resources var - ensure that FLEET_VULNERABILITIES_DISABLE_SCHEDULE=true when using dedicated --- .../fleet/templates/cron-vulnprocessing.yaml | 180 ++++++++++++++++++ charts/fleet/templates/deployment.yaml | 10 + charts/fleet/values.yaml | 17 +- 3 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 charts/fleet/templates/cron-vulnprocessing.yaml diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml new file mode 100644 index 000000000000..65fd00014310 --- /dev/null +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -0,0 +1,180 @@ +{{- if .Values.vulnProcessing.dedicated }} +apiVersion: batch/v1 +kind: CronJob +metadata: + labels: + app: fleet + chart: fleet + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: fleet-vulnprocessing + namespace: {{ .Release.Namespace }} +spec: + schedule: "0 1 * * *" + jobTemplate: + spec: + ttlSecondsAfterFinished: 100 + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | trim | nindent 8 }} + {{- end }} + labels: + app: fleet + chart: fleet + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + spec: + restartPolicy: Never + shareProcessNamespace: true + containers: + - name: fleet-vulnprocessing + command: ["/bin/sh", "-c"] + args: + - | + /usr/bin/fleet vuln_processing; + {{- if .Values.gke.cloudSQL.enableProxy }} + sql_proxy_pid=$(pgrep cloud_sql_proxy) && kill -INT $sql_proxy_pid; + {{- end }} + image: "{{ .Values.imageRepository }}:{{ .Values.imageTag }}" + resources: + limits: + cpu: {{ .Values.vulnProcessing.resources.limits.cpu }} + memory: {{ .Values.vulnProcessing.resources.limits.memory }} + requests: + cpu: {{ .Values.vulnProcessing.resources.requests.cpu }} + memory: {{ .Values.vulnProcessing.resources.requests.memory }} + env: + # - name: FLEET_SERVER_ADDRESS + # value: "0.0.0.0:{{ .Values.fleet.listenPort }}" + # - name: FLEET_AUTH_BCRYPT_COST + # value: "{{ .Values.fleet.auth.bcryptCost }}" + # - name: FLEET_AUTH_SALT_KEY_SIZE + # value: "{{ .Values.fleet.auth.saltKeySize }}" + # - name: FLEET_APP_TOKEN_KEY_SIZE + # value: "{{ .Values.fleet.app.tokenKeySize }}" + # - name: FLEET_APP_TOKEN_VALIDITY_PERIOD + # value: "{{ .Values.fleet.app.inviteTokenValidityPeriod }}" + # - name: FLEET_SESSION_KEY_SIZE + # value: "{{ .Values.fleet.session.keySize }}" + # - name: FLEET_SESSION_DURATION + # value: "{{ .Values.fleet.session.duration }}" + - name: FLEET_LOGGING_DEBUG + value: "{{ .Values.fleet.logging.debug }}" + - name: FLEET_LOGGING_JSON + value: "{{ .Values.fleet.logging.json }}" + - name: FLEET_LOGGING_DISABLE_BANNER + value: "{{ .Values.fleet.logging.disableBanner }}" + # - name: FLEET_SERVER_TLS + # value: "{{ .Values.fleet.tls.enabled }}" + # {{- if .Values.fleet.tls.enabled }} + # - name: FLEET_SERVER_TLS_COMPATIBILITY + # value: "{{ .Values.fleet.tls.compatibility }}" + # - name: FLEET_SERVER_CERT + # value: "/secrets/tls/{{ .Values.fleet.tls.certSecretKey }}" + # - name: FLEET_SERVER_KEY + # value: "/secrets/tls/{{ .Values.fleet.tls.keySecretKey }}" + # {{- end }} + ## END FLEET SECTION + ## BEGIN MYSQL SECTION + - name: FLEET_MYSQL_ADDRESS + value: "{{ .Values.database.address }}" + - name: FLEET_MYSQL_DATABASE + value: "{{ .Values.database.database }}" + - name: FLEET_MYSQL_USERNAME + value: "{{ .Values.database.username }}" + - name: FLEET_MYSQL_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.database.secretName }} + key: {{ .Values.database.passwordKey }} + - name: FLEET_MYSQL_MAX_OPEN_CONNS + value: "{{ .Values.database.maxOpenConns }}" + - name: FLEET_MYSQL_MAX_IDLE_CONNS + value: "{{ .Values.database.maxIdleConns }}" + - name: FLEET_MYSQL_CONN_MAX_LIFETIME + value: "{{ .Values.database.connMaxLifetime }}" + {{- if .Values.database.tls.enabled }} + {{- if .Values.database.tls.caCertKey }} + - name: FLEET_MYSQL_TLS_CA + value: "/secrets/mysql/{{ .Values.database.tls.caCertKey }}" + {{- end }} + {{- if .Values.database.tls.certKey }} + - name: FLEET_MYSQL_TLS_CERT + value: "/secrets/mysql/{{ .Values.database.tls.certKey }}" + {{- end }} + {{- if .Values.database.tls.keyKey }} + - name: FLEET_MYSQL_TLS_KEY + value: "/secrets/mysql/{{ .Values.database.tls.keyKey }}" + {{- end }} + - name: FLEET_MYSQL_TLS_CONFIG + value: "{{ .Values.database.tls.config }}" + - name: FLEET_MYSQL_TLS_SERVER_NAME + value: "{{ .Values.database.tls.serverName }}" + {{- end }} + ## END MYSQL SECTION + securityContext: + allowPrivilegeEscalation: false + capabilities: + {{- if .Values.gke.cloudSQL.enableProxy }} + add: + - SYS_PTRACE + {{- else }} + drop: [ALL] + {{- end }} + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 3333 + runAsUser: 3333 + runAsNonRoot: true + volumeMounts: + {{- if .Values.database.tls.enabled }} + - name: mysql-tls + readOnly: true + mountPath: /secrets/mysql + {{- end }} + {{- if .Values.gke.cloudSQL.enableProxy }} + - name: cloudsql-proxy + image: "{{ .Values.gke.cloudSQL.imageRepository }}:{{ .Values.gke.cloudSQL.imageTag }}" + command: + - "/cloud_sql_proxy" + - "-verbose={{ .Values.gke.cloudSQL.verbose}}" + - "-instances={{ .Values.gke.cloudSQL.instanceName }}=tcp:3306" + resources: + limits: + cpu: 0.5 # 500Mhz + memory: 150Mi + requests: + cpu: 0.1 # 100Mhz + memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: [ALL] + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 3333 + runAsUser: 3333 + runAsNonRoot: true + {{- end }} + serviceAccountName: fleet + volumes: + {{- if .Values.database.tls.enabled }} + - name: mysql-tls + secret: + secretName: "{{ .Values.database.secretName }}" + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index 74471d38b641..2d1c295dcca2 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -269,6 +269,16 @@ spec: value: "{{ .Values.osquery.logging.pubsub.resultTopic }}" {{- end }} ## END OSQUERY SECTION + + ## BEGIN VULNERABILITY PROCESSING + # Disable vulnerability processing in the main deployment when the + # dedicated cron is setup to reduce total cpu/memory utilization + {{- if .Values.vulnProcessing.dedicated }} + - name: FLEET_VULNERABILITIES_DISABLE_SCHEDULE + value: true + {{- end }} + ## END Vulnerability Processing + ## APPEND ENVIRONMENT VARIABLES FROM VALUES {{- range $key, $value := .Values.environments }} - name: {{ $key }} diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index 0228b3346ce5..4a6254258166 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -40,7 +40,8 @@ affinity: ingress: enabled: false className: "" - annotations: {} + annotations: + {} # kubernetes.io/tls-acme: "true" # nginx.ingress.kubernetes.io/proxy-body-size: 10m # kubernetes.io/ingress.class: nginx @@ -103,6 +104,20 @@ fleet: extraVolumes: [] extraVolumeMounts: [] +# Whether to make fleet vulnerability processing run in a dedicated container +# if you set dedicated=false, you need to increase the main resources section +# to 4Gi or the fleet container will be OOMKilled when vulnerability processing +# tries to run. +vulnProcessing: + dedicated: true + resources: + limits: + cpu: 1 # 1GHz + memory: 4Gi + requests: + cpu: 0.1 # 100Mhz + memory: 50Mi + ## Section: osquery # All of the settings related to osquery's interactions with the Fleet server osquery: From 7183fe0c8c5aa8e2306460b77aa6fc1461f13d58 Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 17:06:03 -0700 Subject: [PATCH 2/9] Fix boolean quoting of environment variable --- charts/fleet/templates/deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index 2d1c295dcca2..0ec7b9c39e16 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -275,7 +275,7 @@ spec: # dedicated cron is setup to reduce total cpu/memory utilization {{- if .Values.vulnProcessing.dedicated }} - name: FLEET_VULNERABILITIES_DISABLE_SCHEDULE - value: true + value: "true" {{- end }} ## END Vulnerability Processing From c6df27cb81077eadb070f337f8a466c5b59971f0 Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 17:08:33 -0700 Subject: [PATCH 3/9] Fix read-only FS issue --- charts/fleet/templates/cron-vulnprocessing.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index 65fd00014310..f977e6059794 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -129,6 +129,8 @@ spec: runAsUser: 3333 runAsNonRoot: true volumeMounts: + - name: tmp + mountPath: /tmp {{- if .Values.database.tls.enabled }} - name: mysql-tls readOnly: true @@ -160,6 +162,8 @@ spec: {{- end }} serviceAccountName: fleet volumes: + - name: tmp + emptyDir: {{- if .Values.database.tls.enabled }} - name: mysql-tls secret: From 9f6e4a8c2a7d2e530702b254ffd799754dfbf9a7 Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 17:08:50 -0700 Subject: [PATCH 4/9] Add additional env vars and annotations --- .../fleet/templates/cron-vulnprocessing.yaml | 38 +++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index f977e6059794..eecd14e49dec 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -114,6 +114,38 @@ spec: value: "{{ .Values.database.tls.serverName }}" {{- end }} ## END MYSQL SECTION + ## BEGIN REDIS SECTION + - name: FLEET_REDIS_ADDRESS + value: "{{ .Values.cache.address }}" + - name: FLEET_REDIS_DATABASE + value: "{{ .Values.cache.database }}" + {{- if .Values.cache.usePassword }} + - name: FLEET_REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: "{{ .Values.cache.secretName }}" + key: "{{ .Values.cache.passwordKey }}" + {{- end }} + ## END REDIS SECTION + ## APPEND ENVIRONMENT VARIABLES FROM VALUES + {{- range $key, $value := .Values.environments }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ## APPEND ENVIRONMENT VARIABLES FROM SECRETS/CMs + {{- range .Values.envsFrom }} + - name: {{ .name }} + valueFrom: + {{- if .valueFrom.configMapKeyRef }} + configMapKeyRef: + name: {{ .valueFrom.configMapKeyRef.name }} + key: {{ .valueFrom.configMapKeyRef.key }} + {{- else if .valueFrom.secretKeyRef }} + secretKeyRef: + name: {{ .valueFrom.secretKeyRef.name }} + key: {{ .valueFrom.secretKeyRef.key }} + {{- end }} + {{- end }} securityContext: allowPrivilegeEscalation: false capabilities: @@ -171,14 +203,14 @@ spec: {{- end }} {{- with .Values.nodeSelector }} nodeSelector: - {{- toYaml . | nindent 8 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.affinity }} affinity: - {{- toYaml . | nindent 8 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.tolerations }} tolerations: - {{- toYaml . | nindent 8 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- end }} From cda967e957ec66558c344f808b6e554a982344be Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 17:24:07 -0700 Subject: [PATCH 5/9] Add env var for vuln DB path and license key --- charts/fleet/templates/cron-vulnprocessing.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index eecd14e49dec..aef94b42a6e7 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -46,6 +46,9 @@ spec: cpu: {{ .Values.vulnProcessing.resources.requests.cpu }} memory: {{ .Values.vulnProcessing.resources.requests.memory }} env: + ## BEGIN FLEET SECTION + - name: FLEET_VULNERABILITIES_DATABASES_PATH + value: /tmp/vuln # /tmp might not work on all cloud providers by default # - name: FLEET_SERVER_ADDRESS # value: "0.0.0.0:{{ .Values.fleet.listenPort }}" # - name: FLEET_AUTH_BCRYPT_COST @@ -76,6 +79,13 @@ spec: # - name: FLEET_SERVER_KEY # value: "/secrets/tls/{{ .Values.fleet.tls.keySecretKey }}" # {{- end }} + {{- if .Values.fleet.license.secretName }} + - name: FLEET_LICENSE_KEY + valueFrom: + secretKeyRef: + key: {{ .Values.fleet.license.licenseKey }} + name: {{ .Values.fleet.license.secretName }} + {{- end }} ## END FLEET SECTION ## BEGIN MYSQL SECTION - name: FLEET_MYSQL_ADDRESS From 3b535d867c53d433723c3069b0523010ef827b7f Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 15 Jan 2025 18:08:54 -0700 Subject: [PATCH 6/9] fix indentation level of podAnnotations --- charts/fleet/templates/cron-vulnprocessing.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index aef94b42a6e7..0279f0660566 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -16,10 +16,10 @@ spec: ttlSecondsAfterFinished: 100 template: metadata: - {{- with .Values.podAnnotations }} +{{- with .Values.podAnnotations }} annotations: - {{- toYaml . | trim | nindent 8 }} - {{- end }} +{{- toYaml . | trim | nindent 12 }} +{{- end }} labels: app: fleet chart: fleet From a1577f3a6e1cae645973902fd4323802fc0fb51f Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Thu, 16 Jan 2025 14:11:53 -0700 Subject: [PATCH 7/9] Make scheduling customizable --- charts/fleet/templates/cron-vulnprocessing.yaml | 2 +- charts/fleet/values.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index 0279f0660566..1b636b063fb1 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -10,7 +10,7 @@ metadata: name: fleet-vulnprocessing namespace: {{ .Release.Namespace }} spec: - schedule: "0 1 * * *" + schedule: {{ .Values.vulnProcessing.schedule }} jobTemplate: spec: ttlSecondsAfterFinished: 100 diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index 4a6254258166..4f87959cc456 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -110,6 +110,7 @@ fleet: # tries to run. vulnProcessing: dedicated: true + schedule: "0 1 * * *" resources: limits: cpu: 1 # 1GHz From 9262e45473d964347ea5036a0eec2ecdfbd3b794 Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Wed, 22 Jan 2025 18:47:47 -0700 Subject: [PATCH 8/9] Make default schedule hourly --- charts/fleet/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index 4f87959cc456..2943859915f4 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -110,7 +110,7 @@ fleet: # tries to run. vulnProcessing: dedicated: true - schedule: "0 1 * * *" + schedule: "0 * * * *" resources: limits: cpu: 1 # 1GHz From 96ca2f6705337cdcf56243116a0309cc62f7950f Mon Sep 17 00:00:00 2001 From: Phillip Boushy Date: Thu, 23 Jan 2025 00:15:20 -0700 Subject: [PATCH 9/9] Forbid concurrency --- charts/fleet/templates/cron-vulnprocessing.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/fleet/templates/cron-vulnprocessing.yaml b/charts/fleet/templates/cron-vulnprocessing.yaml index 1b636b063fb1..395728223592 100644 --- a/charts/fleet/templates/cron-vulnprocessing.yaml +++ b/charts/fleet/templates/cron-vulnprocessing.yaml @@ -11,6 +11,7 @@ metadata: namespace: {{ .Release.Namespace }} spec: schedule: {{ .Values.vulnProcessing.schedule }} + concurrencyPolicy: Forbid jobTemplate: spec: ttlSecondsAfterFinished: 100