diff --git a/.github/workflows/publish-image-and-helm-on-tag.yml b/.github/workflows/publish-image-and-helm-on-tag.yml index 161a945..b8709b6 100644 --- a/.github/workflows/publish-image-and-helm-on-tag.yml +++ b/.github/workflows/publish-image-and-helm-on-tag.yml @@ -19,7 +19,7 @@ jobs: with: image_name: "infra-autoscaler" run_trivy_scan: true - tag_generation: "version_git_tag" + image_tag_generation: "version_git_tag" add_latest_tag: true container_registry: "ghcr.io" publish_helm: diff --git a/.github/workflows/publish-image-on-push-to-branch.yml b/.github/workflows/publish-image-on-push-to-branch.yml index 8d8020e..410360a 100644 --- a/.github/workflows/publish-image-on-push-to-branch.yml +++ b/.github/workflows/publish-image-on-push-to-branch.yml @@ -19,6 +19,6 @@ jobs: with: image_name: "infra-autoscaler" run_trivy_scan: true - tag_generation: "ticket_from_branch" + image_tag_generation: "ticket_from_branch" add_latest_tag: false container_registry: "ghcr.io" diff --git a/charts/infra-autoscaler/files/alertrules.yaml b/charts/infra-autoscaler/files/alertrules.yaml index b8609dc..c2e9568 100644 --- a/charts/infra-autoscaler/files/alertrules.yaml +++ b/charts/infra-autoscaler/files/alertrules.yaml @@ -1,11 +1,19 @@ - alert: AutoscalerHighComponentErrorRate - expr: rate(autoscaler_component_errors_total[5m]) / (autoscaler_cycle_time_seconds * autoscaler_instances{ready="true"}) * 100 > 10 - for: 10m + expr: rate(autoscaler_component_errors_total[5m]) * on(instance) group_left autoscaler_cycle_time_seconds / on(instance) group_left autoscaler_instances_count{ready="true"} * 100 > 10 + for: 5m labels: severity: error annotations: summary: "High error rate for autoscaler {{ $labels.component }}" description: "Autoscaler {{ $labels.component }} component ({{ $labels.component_type }}) has errors for {{ $value }}% of instances" +- alert: AutoscalerNoReadyInstances + expr: autoscaler_instances_count{ready="true"} == 0 + for: 5m + labels: + severity: error + annotations: + summary: "Autoscaler has no ready instances" + description: "" - alert: AutoscalerSlowCycleRate expr: rate(autoscaler_cycle_count[5m]) * autoscaler_cycle_time_seconds * 100 < 50 for: 10m