-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add support for kubernetes workload maintenance
- Loading branch information
Showing
6 changed files
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from datetime import datetime | ||
|
||
class FilterModule(object): | ||
"""Ansible custom filters""" | ||
|
||
def filters(self): | ||
"""Return the custom filters""" | ||
return { | ||
"k8s_workload_pods_restart_last_days": self._k8s_workload_pods_restart_last_days, | ||
"k8s_workload_check_service_type": self._k8s_workload_check_service_type | ||
} | ||
|
||
def _k8s_workload_pods_restart_last_days(self, pods, x_days): | ||
if not pods: | ||
return [] | ||
restarted_pods = [] | ||
for pod in pods: | ||
for status in pod.get('containerStatuses', []): | ||
started_at = datetime.fromisoformat(status.get('startedAt')) | ||
if (datetime.now(started_at.tzinfo) - started_at).days < x_days: | ||
restarted_pods.append({ | ||
"name": pod.get('name'), | ||
"started_at": started_at.strftime("%Y-%m-%d %H:%M:%S"), | ||
"restarts": status.get('restartCount') | ||
}) | ||
return restarted_pods | ||
|
||
def _k8s_workload_check_service_type(self, services, allowed_types): | ||
if not services: | ||
return [] | ||
faulty_service = [] | ||
for service in services: | ||
allowed_type = allowed_types.get(service.get('name')) | ||
if service.get('type') != allowed_type: | ||
faulty_service.append({ | ||
"name": service.get('name'), | ||
"type": service.get('type'), | ||
"allowed_type": allowed_type | ||
}) | ||
return faulty_service |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Ansible Role adfinis.maintenance.maintenance_63_kubernetes_workload |
22 changes: 22 additions & 0 deletions
22
roles/maintenance_63_kubernetes_workload/defaults/main.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
--- | ||
|
||
maintenance_global_exclude_tasks: [] | ||
maintenance_host_exclude_tasks: [] | ||
maintenance_exclude_tasks: "{{ maintenance_global_exclude_tasks + maintenance_host_exclude_tasks }}" | ||
|
||
# Define defaults which can be overriden on a host-by-host basis | ||
|
||
# the namespace in which the workload is running | ||
k8s_workload_namespace: "{{ inventory_hostname }}" | ||
|
||
# the service types which are allowed (if not ClusterIP) | ||
k8s_workload_allowed_service_types: {} | ||
# netbox: LoadBalancer | ||
|
||
# the deployments which should be highly available | ||
k8s_workload_ha_deployments: [] | ||
# - netbox | ||
|
||
# the statefulsets which should be highly available | ||
k8s_workload_ha_statefulsets: [] | ||
# - netbox-redis-replicas |
173 changes: 173 additions & 0 deletions
173
roles/maintenance_63_kubernetes_workload/tasks/main.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
--- | ||
|
||
- name: This task only serves as a template for the tasks below | ||
ansible.builtin.set_fact: | ||
ignoreme: &task | ||
name: "{{ vars.taskid }}: {{ vars.name }}" | ||
register: task | ||
when: | ||
- "vars.taskid not in maintenance_exclude_tasks" | ||
- "maintenance_only is not defined or maintenance_only == vars.taskid" | ||
vars: | ||
taskid: ignoreme | ||
name: bar | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-004 | ||
name: "Health: Check that all service related Pods in a healthy (Running, Completed) state | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Pod | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_pods | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-004 | ||
name: "Health: Check that all service related Pods in a healthy (Running, Completed) state | Report unhealthy Pods" | ||
unhealthy_pods: "{{ k8s_pods | json_query(\"resources[?status.phase!='Running' && status.phase!='Succeeded']\") }}" | ||
ansible.builtin.debug: | ||
msg: | | ||
Unhealthy Pods: | ||
{{ unhealthy_pods | json_query("[].metadata.name") | to_nice_json }} | ||
changed_when: unhealthy_pods | length > 0 | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-006 | ||
name: "Health: Check if a container of service related Pod got recently restarted, eg. OOMKilled | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Pod | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_pods | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-006 | ||
name: "Health: Check if a container of service related Pod got recently restarted, eg. OOMKilled | Report restarted Pods" | ||
pods_restarted: "{{ k8s_pods | json_query('resources[?status.containerStatuses[?restartCount > `0`]].{name: metadata.name, containerStatuses: status.containerStatuses[].{restartCount: restartCount, startedAt: (state.*.startedAt)[0]}}') }}" # noqa: yaml[line-length] | ||
pods_restarted_last_1d: "{{ pods_restarted | adfinis.maintenance.k8s_workload_pods_restart_last_days(1) }}" | ||
ansible.builtin.debug: | ||
var: pods_restarted_last_1d | ||
changed_when: pods_restarted_last_1d | length > 0 | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-012 | ||
name: "Config: Check if all Services are configured as ClusterIP unless necessary | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Service | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_services | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-012 | ||
name: "Config: Check if all Services are configured as ClusterIP unless necessary | Check for Services with wrong type" | ||
none_cluster_ip_services: "{{ k8s_services | json_query('resources[?spec.type!=`ClusterIP`].{name: metadata.name, type: spec.type}') }}" | ||
wrong_type_services: "{{ none_cluster_ip_services | adfinis.maintenance.k8s_workload_check_service_type(k8s_workload_allowed_service_types) }}" | ||
ansible.builtin.debug: | ||
var: wrong_type_services | ||
changed_when: wrong_type_services | length > 0 | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-013 | ||
name: "Config: Check that the PullPolicy is not configured to Always | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Pod | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_pods | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-013 | ||
name: "Config: Check that the PullPolicy is not configured to Always | Report all faulty PullPolicies" | ||
pull_policy_always: "{{ k8s_pods | json_query('resources[?spec.containers[?imagePullPolicy == `Always`] || spec.initContainers[?imagePullPolicy == `Always`]].{name: metadata.name}') }}" # noqa: yaml[line-length] | ||
ansible.builtin.debug: | ||
var: pull_policy_always | ||
changed_when: pull_policy_always | length > 0 | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-014 | ||
name: "Config: Check that the Ingress class is configured with the IngressClass attribute and not as annotation | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Ingress | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_ingresses | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-014 | ||
name: "Config: Check that the Ingress class is configured with the IngressClass attribute and not as annotation | Report all faulty Ingresses" | ||
ingresses_with_annotation: "{{ k8s_ingresses | json_query('resources[?metadata.annotations.\"kubernetes.io/ingress.class\"].metadata.name') }}" # noqa: yaml[line-length] | ||
ansible.builtin.debug: | ||
var: ingresses_with_annotation | ||
changed_when: ingresses_with_annotation | length > 0 | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-015 | ||
name: "Config: For HA deployments, check if replicas >= 2 for all relevant Deployments/StateFulSets | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: Deployment | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_deployments | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-015 | ||
name: "Config: For HA deployments, check if replicas >= 2 for all relevant Deployments/StateFulSets | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: StatefulSet | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_statefullsets | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-015 | ||
name: "Config: For HA deployments, check if replicas >= 2 for all relevant Deployments/StateFulSets | Report Deployments" | ||
low_replica_deployments: "{{ k8s_deployments | json_query('resources[?spec.replicas < `2`].metadata.name') }}" | ||
ansible.builtin.debug: | ||
msg: item | ||
changed_when: true | ||
loop: "{{ k8s_workload_ha_deployments }}" | ||
when: "item in k8s_workload_ha_deployments" | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-015 | ||
name: "Config: For HA deployments, check if replicas >= 2 for all relevant Deployments/StateFulSets | Report Deployments" | ||
low_replica_statefullsets: "{{ k8s_statefullsets | json_query('resources[?spec.replicas < `2`].metadata.name') }}" | ||
ansible.builtin.debug: | ||
var: item | ||
changed_when: true | ||
loop: "{{ k8s_workload_ha_statefulsets }}" | ||
when: "item in k8s_workload_ha_statefulsets" | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-018 | ||
name: "Config: For HA deployments, check if the HorizontalPodAutoscaler cannot scale below 2 replicas | Gather all infos" | ||
kubernetes.core.k8s_info: | ||
kind: HorizontalPodAutoscaler | ||
namespace: "{{ k8s_workload_namespace }}" | ||
register: k8s_hpas | ||
changed_when: false | ||
|
||
- <<: *task | ||
vars: | ||
taskid: 63-018 | ||
name: "Config: For HA deployments, check if the HorizontalPodAutoscaler cannot scale below 2 replicas | Report HPAs" | ||
low_replica_hpas: "{{ k8s_hpas | json_query('resources[?spec.minReplicas < `2`].metadata.name') }}" | ||
ansible.builtin.debug: | ||
var: low_replica_hpas | ||
changed_when: low_replica_hpas | length > 0 |