Skip to content

Commit

Permalink
add some metrics for condition api and orchestrator (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
DoctorVin authored Jan 18, 2024
1 parent e55f5e5 commit 0ad40f1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
16 changes: 16 additions & 0 deletions internal/orchestrator/updates.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
"time"

"github.com/google/uuid"
"github.com/metal-toolbox/conditionorc/internal/metrics"
"github.com/metal-toolbox/conditionorc/internal/status"
"github.com/nats-io/nats.go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"go.hollow.sh/toolbox/events/pkg/kv"
"go.hollow.sh/toolbox/events/registry"
Expand Down Expand Up @@ -284,6 +286,7 @@ func (o *Orchestrator) eventUpdate(ctx context.Context, evt *v1types.ConditionUp
}

if rctypes.StateIsComplete(evt.ConditionUpdate.State) {
// deal with the completed event
delErr := status.DeleteCondition(evt.Kind, o.facility, evt.ConditionUpdate.ConditionID.String())
if delErr != nil {
// if we fail to delete this event from the KV, the reconciler will catch it later
Expand All @@ -295,12 +298,21 @@ func (o *Orchestrator) eventUpdate(ctx context.Context, evt *v1types.ConditionUp
}).Warn("removing completed condition data")
return errors.Wrap(errCompleteEvent, delErr.Error())
}
metrics.ConditionCompleted.With(
prometheus.Labels{
"conditionKind": string(evt.Kind),
"state": string(evt.ConditionUpdate.State),
},
).Inc()

// queue any follow-on work as required
active, err := o.repository.GetActiveCondition(ctx, evt.ConditionUpdate.ServerID)
if err != nil {
o.logger.WithError(err).WithFields(logrus.Fields{
"condition.id": evt.ConditionUpdate.ConditionID,
"server.id": evt.ConditionUpdate.ServerID,
}).Warn("retrieving next active condition")
metrics.DependencyError("nats", "retrieve active condition")
return errors.Wrap(errCompleteEvent, err.Error())
}
// seeing as we only *just* completed this event it's hard to believe we'd
Expand All @@ -315,8 +327,12 @@ func (o *Orchestrator) eventUpdate(ctx context.Context, evt *v1types.ConditionUp
"server.id": evt.ConditionUpdate.ServerID,
"condition.kind": active.Kind,
}).Warn("publishing next active condition")
metrics.DependencyError("nats", "publish-condition")
return errors.Wrap(errCompleteEvent, err.Error())
}
metrics.ConditionQueued.With(
prometheus.Labels{"conditionKind": string(active.Kind)},
).Inc()
o.logger.WithFields(logrus.Fields{
"condition.id": active.ID,
"server.id": evt.ConditionUpdate.ServerID,
Expand Down
1 change: 1 addition & 0 deletions pkg/api/v1/events/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ func (h *Handler) UpdateCondition(ctx context.Context, updEvt *v1types.Condition
}).Info("condition update failed")
return errors.Wrap(errRetryThis, err.Error())
}

return nil
}

Expand Down
6 changes: 5 additions & 1 deletion pkg/api/v1/routes/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ func (r *Routes) firmwareInstall(c *gin.Context) (int, *v1types.ServerResponse)
}
}

metrics.ConditionQueued.With(
prometheus.Labels{"conditionKind": string(rctypes.FirmwareInstall)},
).Inc()

return http.StatusOK, &v1types.ServerResponse{
Message: "firmware install scheduled",
Records: &v1types.ConditionsResponse{
Expand Down Expand Up @@ -407,7 +411,7 @@ func (r *Routes) conditionCreate(otelCtx context.Context, newCondition *rctypes.

metrics.ConditionQueued.With(
prometheus.Labels{"conditionKind": string(newCondition.Kind)},
)
).Inc()

return http.StatusOK, &v1types.ServerResponse{
Message: "condition set",
Expand Down

0 comments on commit 0ad40f1

Please sign in to comment.