-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #100 from navikt/alarmer
Legger til alarmer (PrometheusRule) for veilarboppgave
- Loading branch information
Showing
5 changed files
with
72 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
name: Deploy alerts for veilarbfilter to prod-fss | ||
|
||
on: | ||
push: | ||
branches: | ||
- 'master' | ||
paths: | ||
- '.github/workflows/deploy-alerts-to-prod.yaml' | ||
- '.nais/alerts/alerts-config-prod.yaml' | ||
workflow_dispatch: | ||
|
||
jobs: | ||
deploy-alerts: | ||
name: Deploy alerts to prod-fss | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Deploy to prod-fss | ||
uses: nais/deploy/actions/deploy@v1 | ||
env: | ||
APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY_OBO }} | ||
CLUSTER: prod-fss | ||
RESOURCE: .nais/alerts/alerts-config-prod.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
apiVersion: "monitoring.coreos.com/v1" | ||
kind: PrometheusRule | ||
metadata: | ||
name: team-obo-alarmer-veilarboppgave | ||
namespace: obo | ||
labels: | ||
team: obo | ||
spec: | ||
groups: | ||
- name: team-obo-alarmer-veilarboppgave | ||
rules: | ||
# Kubernetes-spesifikke alerts | ||
- alert: Applikasjon er nede | ||
expr: kube_deployment_status_replicas_available{deployment="veilarboppgave"} == 0 | ||
for: 1m | ||
annotations: | ||
summary: "App {{ $labels.deployment }} er nede i namespace {{ $labels.namespace }}!" | ||
consequence: "Appen kan ikke nås av andre applikasjoner, noe som kan potensielt ha stor konsekvens for brukere (nedetid, mm.)." | ||
action: "Diagnostiser applikasjonen ved hjelp av relevante kubectl-kommandoer (`kubectl get pod -l app={{ $labels.deployment }}`, `kubectl describe pod <pod>`, `kubectl get events --field-selector involvedObject.name=<pod>`)." | ||
labels: | ||
namespace: obo | ||
severity: critical | ||
|
||
# Spring Boot spesifikke alerts | ||
- alert: Høy andel serverfeil (HTTP 5XX) | ||
expr: (100 * (sum(rate(http_server_requests_seconds_count{app="veilarboppgave", outcome="SERVER_ERROR"}[5m])) / sum(rate(http_server_requests_seconds_count{app="veilarboppgave"}[5m])))) > 1 | ||
for: 5m | ||
annotations: | ||
summary: "Andelen HTTP 5XX feil i veilarboppgave har oversteget 1% de siste 5 minuttene." | ||
consequence: "Potensielle konsekvenser for bruker kan være forhøyet andel opplevd feil, degradert ytelse, mm." | ||
action: "Sjekk logger for å se hvilke feil som oppstår og start feilsøking." | ||
labels: | ||
namespace: obo | ||
severity: critical | ||
|
||
- alert: Høy andel klientfeil (HTTP 4XX) | ||
expr: (100 * (sum(rate(http_server_requests_seconds_count{app="veilarboppgave", outcome="CLIENT_ERROR"}[5m])) / sum(rate(http_server_requests_seconds_count{app="veilarboppgave"}[5m])))) > 10 | ||
for: 5m | ||
annotations: | ||
summary: "Andelen HTTP 4XX feil i veilarboppgave har oversteget 10% de siste 5 minuttene." | ||
consequence: "Potensielle konsekvenser for bruker kan være forhøyet andel opplevd feil, degradert ytelse, mm." | ||
action: "Sjekk logger for å se hvilke feil som oppstår og start feilsøking." | ||
labels: | ||
namespace: obo | ||
severity: warning |
File renamed without changes.
File renamed without changes.