diff --git a/.github/workflows/deploy-alerts-to-prod.yaml b/.github/workflows/deploy-alerts-to-prod.yaml new file mode 100644 index 0000000..634b6a4 --- /dev/null +++ b/.github/workflows/deploy-alerts-to-prod.yaml @@ -0,0 +1,25 @@ +name: Deploy alerts for veilarbfilter to prod-fss + +on: + push: + branches: + - 'master' + paths: + - '.github/workflows/deploy-alerts-to-prod.yaml' + - '.nais/alerts/alerts-config-prod.yaml' + workflow_dispatch: + +jobs: + deploy-alerts: + name: Deploy alerts to prod-fss + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Deploy to prod-fss + uses: nais/deploy/actions/deploy@v1 + env: + APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY_OBO }} + CLUSTER: prod-fss + RESOURCE: .nais/alerts/alerts-config-prod.yaml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c391e97..6029e1f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -61,7 +61,7 @@ jobs: env: APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY }} CLUSTER: dev-fss - RESOURCE: nais-dev.yaml + RESOURCE: .nais/application/application-config-dev.yaml VAR: version=${{ env.IMAGE_TAG }} deploy-prod: @@ -77,7 +77,7 @@ jobs: env: APIKEY: ${{ secrets.NAIS_DEPLOY_APIKEY }} CLUSTER: prod-fss - RESOURCE: nais-prod.yaml + RESOURCE: .nais/application/application-config-prod.yaml VAR: version=${{ env.IMAGE_TAG }} - name: Create release uses: softprops/action-gh-release@v1 diff --git a/.nais/alerts/alerts-config-prod.yaml b/.nais/alerts/alerts-config-prod.yaml new file mode 100644 index 0000000..e24d35d --- /dev/null +++ b/.nais/alerts/alerts-config-prod.yaml @@ -0,0 +1,45 @@ +apiVersion: "monitoring.coreos.com/v1" +kind: PrometheusRule +metadata: + name: team-obo-alarmer-veilarboppgave + namespace: obo + labels: + team: obo +spec: + groups: + - name: team-obo-alarmer-veilarboppgave + rules: + # Kubernetes-spesifikke alerts + - alert: Applikasjon er nede + expr: kube_deployment_status_replicas_available{deployment="veilarboppgave"} == 0 + for: 1m + annotations: + summary: "App {{ $labels.deployment }} er nede i namespace {{ $labels.namespace }}!" + consequence: "Appen kan ikke nås av andre applikasjoner, noe som kan potensielt ha stor konsekvens for brukere (nedetid, mm.)." + action: "Diagnostiser applikasjonen ved hjelp av relevante kubectl-kommandoer (`kubectl get pod -l app={{ $labels.deployment }}`, `kubectl describe pod `, `kubectl get events --field-selector involvedObject.name=`)." + labels: + namespace: obo + severity: critical + + # Spring Boot spesifikke alerts + - alert: Høy andel serverfeil (HTTP 5XX) + expr: (100 * (sum(rate(http_server_requests_seconds_count{app="veilarboppgave", outcome="SERVER_ERROR"}[5m])) / sum(rate(http_server_requests_seconds_count{app="veilarboppgave"}[5m])))) > 1 + for: 5m + annotations: + summary: "Andelen HTTP 5XX feil i veilarboppgave har oversteget 1% de siste 5 minuttene." + consequence: "Potensielle konsekvenser for bruker kan være forhøyet andel opplevd feil, degradert ytelse, mm." + action: "Sjekk logger for å se hvilke feil som oppstår og start feilsøking." + labels: + namespace: obo + severity: critical + + - alert: Høy andel klientfeil (HTTP 4XX) + expr: (100 * (sum(rate(http_server_requests_seconds_count{app="veilarboppgave", outcome="CLIENT_ERROR"}[5m])) / sum(rate(http_server_requests_seconds_count{app="veilarboppgave"}[5m])))) > 10 + for: 5m + annotations: + summary: "Andelen HTTP 4XX feil i veilarboppgave har oversteget 10% de siste 5 minuttene." + consequence: "Potensielle konsekvenser for bruker kan være forhøyet andel opplevd feil, degradert ytelse, mm." + action: "Sjekk logger for å se hvilke feil som oppstår og start feilsøking." + labels: + namespace: obo + severity: warning diff --git a/nais-dev.yaml b/.nais/application/application-config-dev.yaml similarity index 100% rename from nais-dev.yaml rename to .nais/application/application-config-dev.yaml diff --git a/nais-prod.yaml b/.nais/application/application-config-prod.yaml similarity index 100% rename from nais-prod.yaml rename to .nais/application/application-config-prod.yaml