Skip to content

Commit

Permalink
enhance: Add logs for check health failed (#39208) (#39302)
Browse files Browse the repository at this point in the history
pr: #39208

Signed-off-by: Wei Liu <[email protected]>
  • Loading branch information
weiliu1031 authored Jan 16, 2025
1 parent 21df11b commit 76ed552
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 9 deletions.
6 changes: 3 additions & 3 deletions internal/datacoord/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func (s *Server) Init() error {
log.Info("DataCoord startup success")
return nil
}
s.stateCode.Store(commonpb.StateCode_StandBy)
s.UpdateStateCode(commonpb.StateCode_StandBy)
log.Info("DataCoord enter standby mode successfully")
return nil
}
Expand All @@ -328,7 +328,7 @@ func (s *Server) Init() error {

func (s *Server) initDataCoord() error {
log := log.Ctx(s.ctx)
s.stateCode.Store(commonpb.StateCode_Initializing)
s.UpdateStateCode(commonpb.StateCode_Initializing)
var err error
if err = s.initRootCoordClient(); err != nil {
return err
Expand Down Expand Up @@ -463,7 +463,7 @@ func (s *Server) startDataCoord() {
// })

s.afterStart()
s.stateCode.Store(commonpb.StateCode_Healthy)
s.UpdateStateCode(commonpb.StateCode_Healthy)
sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID())
}

Expand Down
6 changes: 6 additions & 0 deletions internal/datacoord/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,12 @@ func (s *Server) GetStateCode() commonpb.StateCode {
return code.(commonpb.StateCode)
}

// UpdateStateCode update state code
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
s.stateCode.Store(code)
log.Ctx(s.ctx).Info("update datacoord state", zap.String("state", code.String()))
}

// GetComponentStates returns DataCoord's current state
func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
code := s.GetStateCode()
Expand Down
14 changes: 8 additions & 6 deletions internal/http/healthz/healthz_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,28 +90,30 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
resp := &HealthResponse{
State: "OK",
}

unhealthyComponent := make([]string, 0)
ctx := context.Background()
healthNum := 0
for _, in := range handler.indicators {
handler.unregisterLock.RLock()
_, unregistered := handler.unregisteredRoles[in.GetName()]
handler.unregisterLock.RUnlock()
if unregistered {
healthNum++
continue
}
code := in.Health(ctx)
resp.Detail = append(resp.Detail, &IndicatorState{
Name: in.GetName(),
Code: code,
})
if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy {
healthNum++

if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy {
unhealthyComponent = append(unhealthyComponent, in.GetName())
}
}

if healthNum != handler.indicatorNum {
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum)
if len(unhealthyComponent) > 0 {
resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", handler.indicatorNum-len(unhealthyComponent), handler.indicatorNum)
log.Info("check health failed", zap.Strings("UnhealthyComponent", unhealthyComponent))
}

if resp.State == "OK" {
Expand Down
1 change: 1 addition & 0 deletions internal/querycoordv2/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ func (s *Server) Stop() error {
// UpdateStateCode updates the status of the coord, including healthy, unhealthy
func (s *Server) UpdateStateCode(code commonpb.StateCode) {
s.status.Store(int32(code))
log.Ctx(s.ctx).Info("update querycoord state", zap.String("state", code.String()))
}

func (s *Server) State() commonpb.StateCode {
Expand Down

0 comments on commit 76ed552

Please sign in to comment.