diff --git a/deploy/README.md b/deploy/README.md index 87eae5fe..6b43938b 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -353,15 +353,19 @@ helm upgrade --install cnpg cnpg/cloudnative-pg --namespace postgresql-system -- # redis operator helm repo add ot-helm https://ot-container-kit.github.io/helm-charts/ helm upgrade --install redis-operator ot-helm/redis-operator --namespace redis-system --create-namespace -``` - +# monitoring +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm upgrade --install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring-system --create-namespace -f ../deploy/deployments/addons/prometheus-values.yaml +kubectl apply -f deployments/addons/cilium-pod-monitor.yaml +kubectl apply -f deployments/addons/pgsql-operator-pod-monitor.yaml +``` - + @@ -408,3 +412,11 @@ curl --request POST --url https://api.cloudflare.com/client/v4/zones//login" +``` diff --git a/deploy/deployments/addons/cilium-pod-monitor.yaml b/deploy/deployments/addons/cilium-pod-monitor.yaml new file mode 100644 index 00000000..8f846925 --- /dev/null +++ b/deploy/deployments/addons/cilium-pod-monitor.yaml @@ -0,0 +1,23 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: cilium-agent + namespace: cilium-system +spec: + selector: + matchLabels: + app.kubernetes.io/name: cilium-agent + podMetricsEndpoints: + - port: prometheus +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: cilium-operator + namespace: cilium-system +spec: + selector: + matchLabels: + app.kubernetes.io/name: cilium-operator + podMetricsEndpoints: + - port: prometheus diff --git a/deploy/deployments/addons/cilium-values.yaml b/deploy/deployments/addons/cilium-values.yaml index ebac56cf..e7d9935b 100644 --- a/deploy/deployments/addons/cilium-values.yaml +++ b/deploy/deployments/addons/cilium-values.yaml @@ -41,3 +41,8 @@ image: operator: rollOutPods: true priorityClassName: "system-node-critical" + prometheus: + enabled: true + +prometheus: + enabled: true diff --git a/deploy/deployments/addons/ingress-nginx-values.yaml b/deploy/deployments/addons/ingress-nginx-values.yaml index 45c6e6ea..cb11e73c 100644 --- a/deploy/deployments/addons/ingress-nginx-values.yaml +++ b/deploy/deployments/addons/ingress-nginx-values.yaml @@ -1,6 +1,10 @@ controller: config: use-proxy-protocol: true + #proxy-real-ip-cidr: "173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22,2400:cb00::/32,2606:4700::/32,2803:f800::/32,2405:b500::/32,2405:8100::/32,2a06:98c0::/29,2c0f:f248::/32" + #proxy-real-ip-cidr: "10.0.0.8/32" + #use-forwarded-headers: true + #forwarded-for-header: "CF-Connecting-IP" replicaCount: 2 service: annotations: @@ -8,3 +12,7 @@ controller: load-balancer.hetzner.cloud/use-private-ip: true load-balancer.hetzner.cloud/name: load-balancer.hetzner.cloud/uses-proxyprotocol: true + metrics: + enabled: true + serviceMonitor: + enabled: true diff --git a/deploy/deployments/addons/pgsql-operator-pod-monitor.yaml b/deploy/deployments/addons/pgsql-operator-pod-monitor.yaml new file mode 100644 index 00000000..259aa6e4 --- /dev/null +++ b/deploy/deployments/addons/pgsql-operator-pod-monitor.yaml @@ -0,0 +1,11 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: cnpg-controller-manager + namespace: postgresql-system +spec: + selector: + matchLabels: + app.kubernetes.io/name: cloudnative-pg + podMetricsEndpoints: + - port: metrics diff --git a/deploy/deployments/addons/prometheus-values.yaml b/deploy/deployments/addons/prometheus-values.yaml new file mode 100644 index 00000000..9e2f5e53 --- /dev/null +++ b/deploy/deployments/addons/prometheus-values.yaml @@ -0,0 +1,100 @@ +# see https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml +alertmanager: + config: + route: + routes: + - receiver: "null" + matchers: + - alertname =~ "InfoInhibitor|Watchdog" + - receiver: telegram + matchers: + - severity =~ "critical|warning|info" + receivers: + - name: "null" + - name: telegram + telegram_configs: + - bot_token: + api_url: https://api.telegram.org + chat_id: + ingress: + enabled: false + ingressClassName: nginx + hosts: + - + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + tls: + - secretName: alertmanager-ingress-tls + hosts: + - +grafana: + adminPassword: + ingress: + enabled: true + ingressClassName: nginx + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + hosts: + - + tls: + - secretName: grafana-ingress-tls + hosts: + - +kube-state-metrics: + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node +nodeExporter: + serviceMonitor: + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node +prometheus-node-exporter: + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node +kubeProxy: + enabled: false # kube proxy not used because of proxy replacement by cilium +prometheus: + ingress: + enabled: false + ingressClassName: nginx + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + hosts: + - + tls: + - secretName: prometheus-ingress-tls + hosts: + - + prometheusSpec: + retention: 14d + storageSpec: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false diff --git a/deploy/deployments/portals/pgsql.yaml b/deploy/deployments/portals/pgsql.yaml index 9db380d9..6ccb9530 100644 --- a/deploy/deployments/portals/pgsql.yaml +++ b/deploy/deployments/portals/pgsql.yaml @@ -8,7 +8,39 @@ spec: primaryUpdateStrategy: unsupervised storage: size: 10Gi + postgresql: + parameters: + max_connections: "300" + shared_buffers: "80MB" bootstrap: initdb: database: app owner: app +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: portals-db + namespace: portals +spec: + selector: + matchLabels: + cnpg.io/cluster: portals-db + podMetricsEndpoints: + - port: metrics +#--- +#apiVersion: postgresql.cnpg.io/v1 +#kind: Pooler +#metadata: +# name: portals-db-pooler +# namespace: portals +#spec: +# cluster: +# name: portals-db +# instances: 3 +# type: rw +# pgbouncer: +# poolMode: transaction +# parameters: +# max_client_conn: "1000" +# default_pool_size: "20" diff --git a/deploy/deployments/portals/portals-values.yaml b/deploy/deployments/portals/portals-values.yaml index 38f0a18a..27b31bab 100644 --- a/deploy/deployments/portals/portals-values.yaml +++ b/deploy/deployments/portals/portals-values.yaml @@ -4,6 +4,7 @@ environment: APP_DEBUG: "false" APP_ENV: production APP_FORCE_HTTPS: "true" + APP_EVENT_TYPE: "demo" TUTOR_PASSWORD: password ADMIN_PASSWORD: admin DB_HOST: portals-db-rw.portals.svc @@ -18,10 +19,16 @@ ingress: - portals.fsr5.de annotations: cert-manager.io/cluster-issuer: "letsencrypt-prod" -replicaCount: 3 + nginx.ingress.kubernetes.io/limit-rps: "500" +replicaCount: 5 migrateJob: onInstall: true onUpgrade: false seed: true tutorsCsvConfigMapName: portals-tutors-csv studentsCsvConfigMapName: portals-students-csv +hpa: + enabled: true + minReplicas: 5 + maxReplicas: 10 + averageCPUUtilization: 90