Skip to content

Commit

Permalink
Improve alert updates (#191)
Browse files Browse the repository at this point in the history
* Improve alert updates

* Fix integration tests
  • Loading branch information
assafad1 authored Oct 27, 2024
1 parent 5e68ac8 commit 2231aef
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 140 deletions.
28 changes: 20 additions & 8 deletions controllers/alertmanagerconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"context"
"fmt"
"reflect"
"regexp"
"time"

Expand Down Expand Up @@ -237,16 +238,25 @@ func (r *AlertmanagerConfigReconciler) linkCxAlertToCxIntegrations(ctx context.C
}

matchedReceiversMap := matchedRoutesToMatchedReceiversMap(matchRoutes, config.Spec.Receivers)
alert.Spec.NotificationGroups, err = generateNotificationGroupFromRoutes(matchRoutes, matchedReceiversMap)
notificationGroups, err := generateNotificationGroupFromRoutes(matchRoutes, matchedReceiversMap)
if err != nil {
succeed = false
log.Error(err, "Received an error while trying to generate NotificationGroup from routes")
continue
}
if err = r.Update(ctx, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to update Alert CRD from AlertmanagerConfig")
continue

if !reflect.DeepEqual(alert.Spec.NotificationGroups, notificationGroups) {
if err = r.Client.Get(ctx, client.ObjectKey{Namespace: alert.Namespace, Name: alert.Name}, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to get Alert CRD from AlertmanagerConfig")
}

alert.Spec.NotificationGroups = notificationGroups
if err = r.Update(ctx, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to update Alert CRD from AlertmanagerConfig")
continue
}
}
}

Expand All @@ -260,9 +270,11 @@ func (r *AlertmanagerConfigReconciler) deleteWebhooksFromRelatedAlerts(ctx conte
}

for _, alert := range alerts.Items {
alert.Spec.NotificationGroups = nil
if err := r.Update(ctx, &alert); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD from AlertmanagerConfig: %w", err)
if alert.Spec.NotificationGroups != nil {
alert.Spec.NotificationGroups = nil
if err := r.Update(ctx, &alert); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD from AlertmanagerConfig: %w", err)
}
}
}

Expand Down
82 changes: 20 additions & 62 deletions controllers/alphacontrollers/alert_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ package alphacontrollers

import (
"context"
stdErr "errors"
"fmt"

"github.com/go-logr/logr"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
Expand Down Expand Up @@ -106,9 +106,7 @@ func (r *AlertReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}

func (r *AlertReconciler) update(ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {
func (r *AlertReconciler) update(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
alertRequest, err := alert.Spec.ExtractUpdateAlertRequest(ctx, log, *alert.Status.ID)
if err != nil {
return fmt.Errorf("error to parse alert request: %w", err)
Expand All @@ -123,33 +121,15 @@ func (r *AlertReconciler) update(ctx context.Context,
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}
return fmt.Errorf("alert not found on remote, recreating it: %w", err)
return fmt.Errorf("alert not found on remote: %w", err)
}
return fmt.Errorf("error on updating alert: %w", err)
}
log.V(1).Info("Remote alert updated", "alert", protojson.Format(remoteUpdatedAlert))

status, err := getStatus(ctx, log, remoteUpdatedAlert.GetAlert(), alert.Spec)
if err != nil {
return fmt.Errorf("error on getting status: %w", err)
}

if err = r.Get(ctx, client.ObjectKeyFromObject(alert), alert); err != nil {
return fmt.Errorf("error on getting alert: %w", err)
}
alert.Status = status

if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}

return nil
}

func (r *AlertReconciler) delete(ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {

func (r *AlertReconciler) delete(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
log.V(1).Info("Deleting remote alert", "alert", *alert.Status.ID)
_, err := r.CoralogixClientSet.Alerts().DeleteAlert(ctx, &alerts.DeleteAlertByUniqueIdRequest{
Id: wrapperspb.String(*alert.Status.ID),
Expand All @@ -167,23 +147,7 @@ func (r *AlertReconciler) delete(ctx context.Context,
return nil
}

func (r *AlertReconciler) create(
ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {

if alert.Spec.Labels == nil {
alert.Spec.Labels = make(map[string]string)
}

if value, ok := alert.Spec.Labels["managed-by"]; !ok || value == "" {
alert.Spec.Labels["managed-by"] = "coralogix-operator"
}

if err := r.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
}

func (r *AlertReconciler) create(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
alertRequest, err := alert.ExtractCreateAlertRequest(ctx, log)
if err != nil {
return fmt.Errorf("error to parse alert request: %w", err)
Expand All @@ -201,38 +165,32 @@ func (r *AlertReconciler) create(
}

alert.Status.ID = pointer.String(response.GetAlert().GetUniqueIdentifier().GetValue())
if err = r.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}

if alert.Status, err = getStatus(ctx, log, response.GetAlert(), alert.Spec); err != nil {
return fmt.Errorf("error on getting status: %w", err)
updated := false
if alert.Spec.Labels == nil {
alert.Spec.Labels = make(map[string]string)
}
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)

if value, ok := alert.Spec.Labels["managed-by"]; !ok || value == "" {
alert.Spec.Labels["managed-by"] = "coralogix-operator"
updated = true
}

if !controllerutil.ContainsFinalizer(alert, alertFinalizerName) {
controllerutil.AddFinalizer(alert, alertFinalizerName)
}
if err = r.Client.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
updated = true
}

return nil
}

func getStatus(ctx context.Context, log logr.Logger, actualAlert *alerts.Alert, spec coralogixv1alpha1.AlertSpec) (coralogixv1alpha1.AlertStatus, error) {
if actualAlert == nil {
return coralogixv1alpha1.AlertStatus{}, stdErr.New("alert is nil")
if updated {
if err = r.Client.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
}
}

var status coralogixv1alpha1.AlertStatus
var err error

status.ID = utils.WrapperspbStringToStringPointer(actualAlert.GetUniqueIdentifier())

return status, err
return nil
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
13 changes: 2 additions & 11 deletions controllers/alphacontrollers/alert_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

cxsdk "github.com/coralogix/coralogix-management-sdk/go"
Expand Down Expand Up @@ -761,14 +760,7 @@ func TestFlattenAlerts(t *testing.T) {
},
}

spec := coralogixv1alpha1.AlertSpec{
Scheduling: &coralogixv1alpha1.Scheduling{
TimeZone: coralogixv1alpha1.TimeZone("UTC+02"),
},
}

ctx := context.Background()
log := log.FromContext(ctx)

controller := gomock.NewController(t)
defer controller.Finish()
Expand All @@ -777,12 +769,11 @@ func TestFlattenAlerts(t *testing.T) {
webhookMock.EXPECT().List(ctx, gomock.Any()).Return(&cxsdk.ListAllOutgoingWebhooksResponse{}, nil).AnyTimes()
coralogixv1alpha1.WebhooksClient = webhookMock

status, err := getStatus(ctx, log, alert, spec)
assert.NoError(t, err)
alertStatus := coralogixv1alpha1.AlertStatus{ID: utils.WrapperspbStringToStringPointer(alert.GetUniqueIdentifier())}

expected := &coralogixv1alpha1.AlertStatus{
ID: pointer.String("id1"),
}

assert.EqualValues(t, expected, &status)
assert.EqualValues(t, expected, &alertStatus)
}
2 changes: 1 addition & 1 deletion controllers/clientset/recording-rules-groups-client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ type RecordingRulesGroupsClientInterface interface {
Get(ctx context.Context, req *cxsdk.GetRuleGroupSetRequest) (*cxsdk.GetRuleGroupSetResponse, error)
Update(ctx context.Context, req *cxsdk.UpdateRuleGroupSetRequest) (*emptypb.Empty, error)
Delete(ctx context.Context, req *cxsdk.DeleteRuleGroupSetRequest) (*emptypb.Empty, error)
}
}
82 changes: 45 additions & 37 deletions controllers/prometheusrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@ package controllers
import (
"context"
"fmt"
"reflect"
"strconv"
"strings"
"time"

coralogixv1alpha1 "github.com/coralogix/coralogix-operator/apis/coralogix/v1alpha1"
"github.com/coralogix/coralogix-operator/controllers/clientset"
"github.com/go-logr/logr"
"go.uber.org/zap/zapcore"

prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"go.uber.org/zap/zapcore"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -25,6 +23,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

coralogixv1alpha1 "github.com/coralogix/coralogix-operator/apis/coralogix/v1alpha1"
"github.com/coralogix/coralogix-operator/controllers/clientset"
)

const (
Expand Down Expand Up @@ -85,22 +86,14 @@ func (r *PrometheusRuleReconciler) Reconcile(ctx context.Context, req ctrl.Reque
func (r *PrometheusRuleReconciler) convertPrometheusRuleRecordingRuleToCxRecordingRule(ctx context.Context, log logr.Logger, prometheusRule *prometheus.PrometheusRule, req reconcile.Request) error {
recordingRuleGroupSetSpec := prometheusRuleToRecordingRuleToRuleGroupSet(log, prometheusRule)
if len(recordingRuleGroupSetSpec.Groups) == 0 {
log.V(int(zapcore.DebugLevel)).Info("No recording rules found in PrometheusRule")
return nil
}

recordingRuleGroupSet := &coralogixv1alpha1.RecordingRuleGroupSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: prometheusRule.Namespace,
Name: prometheusRule.Name,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
},
Namespace: prometheusRule.Namespace,
Name: prometheusRule.Name,
OwnerReferences: []metav1.OwnerReference{getOwnerReference(prometheusRule)},
},
Spec: recordingRuleGroupSetSpec,
}
Expand Down Expand Up @@ -176,14 +169,7 @@ func (r *PrometheusRuleReconciler) convertPrometheusRuleAlertToCxAlert(ctx conte
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.Namespace = prometheusRule.Namespace
alertCRD.Name = alertCRDName
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
}
alertCRD.OwnerReferences = []metav1.OwnerReference{getOwnerReference(prometheusRule)}
alertCRD.Labels = map[string]string{"app.kubernetes.io/managed-by": prometheusRule.Name}
if val, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = val
Expand All @@ -197,21 +183,32 @@ func (r *PrometheusRuleReconciler) convertPrometheusRuleAlertToCxAlert(ctx conte
}
}

//Converting the PrometheusRule to the desired Alert.
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
updated := false
desiredSpec := prometheusRuleToCoralogixAlertSpec(rule)
// We keep NotificationGroups on update, to not override AlertmanagerConfig controller settings
desiredSpec.NotificationGroups = alertCRD.Spec.NotificationGroups
if !reflect.DeepEqual(alertCRD.Spec, desiredSpec) {
desiredSpec.DeepCopyInto(&alertCRD.Spec)
updated = true
}
if val, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = val

desiredOwnerReferences := []metav1.OwnerReference{getOwnerReference(prometheusRule)}
if !reflect.DeepEqual(alertCRD.OwnerReferences, desiredOwnerReferences) {
alertCRD.OwnerReferences = desiredOwnerReferences
updated = true
}
if err := r.Update(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD: %w", err)

if promRuleVal, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
if alertVal, ok := alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"]; !ok || alertVal != promRuleVal {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = promRuleVal
updated = true
}
}

if updated {
if err := r.Update(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD: %w", err)
}
}
}
}
Expand Down Expand Up @@ -298,7 +295,7 @@ func prometheusInnerRulesToCoralogixInnerRules(rules []prometheus.Rule) []coralo
}

func prometheusRuleToCoralogixAlertSpec(rule prometheus.Rule) coralogixv1alpha1.AlertSpec {
return coralogixv1alpha1.AlertSpec{
alertSpec := coralogixv1alpha1.AlertSpec{
Description: rule.Annotations["description"],
Severity: getSeverity(rule),
NotificationGroups: []coralogixv1alpha1.NotificationGroup{
Expand Down Expand Up @@ -328,6 +325,8 @@ func prometheusRuleToCoralogixAlertSpec(rule prometheus.Rule) coralogixv1alpha1.
},
Labels: rule.Labels,
}

return alertSpec
}

func getSeverity(rule prometheus.Rule) coralogixv1alpha1.AlertSeverity {
Expand Down Expand Up @@ -388,6 +387,15 @@ var prometheusAlertForToCoralogixPromqlAlertTimeWindow = map[prometheus.Duration
"24h": coralogixv1alpha1.MetricTimeWindow(coralogixv1alpha1.TimeWindowTwentyFourHours),
}

func getOwnerReference(promRule *prometheus.PrometheusRule) metav1.OwnerReference {
return metav1.OwnerReference{
APIVersion: promRule.APIVersion,
Kind: promRule.Kind,
Name: promRule.Name,
UID: promRule.UID,
}
}

// SetupWithManager sets up the controller with the Manager.
func (r *PrometheusRuleReconciler) SetupWithManager(mgr ctrl.Manager) error {
shouldTrackPrometheusRules := func(labels map[string]string) bool {
Expand Down
2 changes: 1 addition & 1 deletion kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ testDirs:
- tests/integration/prometheusrules
- tests/integration/alertmangerconfigs
namespace: default
timeout: 60
timeout: 360
Loading

0 comments on commit 2231aef

Please sign in to comment.