Skip to content

Commit

Permalink
replaced alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
eziztm committed Sep 14, 2023
1 parent 6bb9841 commit 9b45180
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 6 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/deploy-alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ jobs:
- name: Deploy to dev
uses: nais/deploy/actions/deploy@v1
env:
APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY }}
CLUSTER: dev-fss
RESOURCE: apps/backend/nais/alerts-backend-dev-fss.yaml
APIKEY: ${{ secrets.NAIS_ORG_DEPLOY_APIKEY }}
CLUSTER: dev-gcp
RESOURCE: apps/backend/nais/alerts-backend-dev-gcp.yaml

- name: Deploy to prod
uses: nais/deploy/actions/deploy@v1
env:
APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY }}
CLUSTER: prod-fss
RESOURCE: apps/backend/nais/alerts-backend-prod-fss.yaml
APIKEY: ${{ secrets.NAIS_ORG_DEPLOY_APIKEY }}
CLUSTER: prod-gcp
RESOURCE: apps/backend/nais/alerts-backend-prod-gcp.yaml

apply-frackend-alerts:
name: Apply frackend alerts
Expand Down
30 changes: 30 additions & 0 deletions apps/backend/nais/alerts-backend-dev-gcp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: "monitoring.coreos.com/v1"
kind: "PrometheusRule"
metadata:
name: team-catalog-backend-alerts
namespace: org
labels:
team: org
spec:
groups:
- name: team-catalog-backend_down
rules:
- alert: team-catalog-backend pod nede
expr: kube_deployment_status_replicas_unavailable{deployment="team-catalog-backend"} > 0
for: 3m
annotations:
action: "`kubectl describe pod {{ $labels.pod }}` for events, og `kubectl logs {{ $labels.pod }} -c {{ $labels.app }}` for logger"
summary: "{{ $labels.app }} er nede"
labels:
namespace: nom
severity: critical
- name: team-catalog-backend_high_failrate
rules:
- alert: team-catalog-backend høy feilrate i logger
expr: (100 * sum by (app, namespace) (rate(log_messages_errors{app="team-catalog-backend",namespace="nom"}[3m])) / sum by (app, namespace) (rate(log_messages_total{app="team-catalog-backend",namespace="nom"}[3m]))) > 10
for: 3m
annotations:
action: "Sjekk loggene til app {{ $labels.app }}, for å se hvorfor det er så mye feil"
labels:
namespace: nom
severity: warning
30 changes: 30 additions & 0 deletions apps/backend/nais/alerts-backend-prod-gcp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: "monitoring.coreos.com/v1"
kind: "PrometheusRule"
metadata:
name: team-catalog-backend-alerts
namespace: org
labels:
team: org
spec:
groups:
- name: team-catalog-backend_down
rules:
- alert: team-catalog-backend applikasjon nede
expr: up{app="team-catalog-backend", job="kubernetes-pods"} == 0
for: 3m
annotations:
action: "`kubectl describe pod {{ $labels.pod }}` for events, og `kubectl logs {{ $labels.pod }} -c {{ $labels.app }}` for logger"
summary: "{{ $labels.app }} er nede"
labels:
namespace: nom
severity: critical
- name: team-catalog-backend_high_failrate
rules:
- alert: team-catalog-backend høy feilrate i logger
expr: (100 * sum by (app, namespace) (rate(log_messages_errors{app="team-catalog-backend",namespace="nom"}[3m])) / sum by (app, namespace) (rate(log_messages_total{app="team-catalog-backend",namespace="nom"}[3m]))) > 10
for: 3m
annotations:
action: "Sjekk loggene til app {{ $labels.app }}, for å se hvorfor det er så mye feil"
labels:
namespace: nom
severity: warning

0 comments on commit 9b45180

Please sign in to comment.