diff --git a/internal/datacoord/server.go b/internal/datacoord/server.go index d2cd6f9180a3c..c2c523565ceb9 100644 --- a/internal/datacoord/server.go +++ b/internal/datacoord/server.go @@ -318,7 +318,7 @@ func (s *Server) Init() error { log.Info("DataCoord startup success") return nil } - s.stateCode.Store(commonpb.StateCode_StandBy) + s.UpdateStateCode(commonpb.StateCode_StandBy) log.Info("DataCoord enter standby mode successfully") return nil } @@ -328,7 +328,7 @@ func (s *Server) Init() error { func (s *Server) initDataCoord() error { log := log.Ctx(s.ctx) - s.stateCode.Store(commonpb.StateCode_Initializing) + s.UpdateStateCode(commonpb.StateCode_Initializing) var err error if err = s.initRootCoordClient(); err != nil { return err @@ -463,7 +463,7 @@ func (s *Server) startDataCoord() { // }) s.afterStart() - s.stateCode.Store(commonpb.StateCode_Healthy) + s.UpdateStateCode(commonpb.StateCode_Healthy) sessionutil.SaveServerInfo(typeutil.DataCoordRole, s.session.GetServerID()) } diff --git a/internal/datacoord/services.go b/internal/datacoord/services.go index 86c58419734b8..000be0f6c05c3 100644 --- a/internal/datacoord/services.go +++ b/internal/datacoord/services.go @@ -688,6 +688,12 @@ func (s *Server) GetStateCode() commonpb.StateCode { return code.(commonpb.StateCode) } +// UpdateStateCode update state code +func (s *Server) UpdateStateCode(code commonpb.StateCode) { + s.stateCode.Store(code) + log.Ctx(s.ctx).Info("update datacoord state", zap.String("state", code.String())) +} + // GetComponentStates returns DataCoord's current state func (s *Server) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) { code := s.GetStateCode() diff --git a/internal/http/healthz/healthz_handler.go b/internal/http/healthz/healthz_handler.go index 62c98e1cd83cc..4e203d20a39e7 100644 --- a/internal/http/healthz/healthz_handler.go +++ b/internal/http/healthz/healthz_handler.go @@ -90,14 +90,14 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) resp := &HealthResponse{ State: "OK", } + + unhealthyComponent := make([]string, 0) ctx := context.Background() - healthNum := 0 for _, in := range handler.indicators { handler.unregisterLock.RLock() _, unregistered := handler.unregisteredRoles[in.GetName()] handler.unregisterLock.RUnlock() if unregistered { - healthNum++ continue } code := in.Health(ctx) @@ -105,13 +105,15 @@ func (handler *HealthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) Name: in.GetName(), Code: code, }) - if code == commonpb.StateCode_Healthy || code == commonpb.StateCode_StandBy { - healthNum++ + + if code != commonpb.StateCode_Healthy && code != commonpb.StateCode_StandBy { + unhealthyComponent = append(unhealthyComponent, in.GetName()) } } - if healthNum != handler.indicatorNum { - resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", healthNum, handler.indicatorNum) + if len(unhealthyComponent) > 0 { + resp.State = fmt.Sprintf("Not all components are healthy, %d/%d", handler.indicatorNum-len(unhealthyComponent), handler.indicatorNum) + log.Info("check health failed", zap.Strings("UnhealthyComponent", unhealthyComponent)) } if resp.State == "OK" { diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index 25c7714a69681..082a921bd3aa7 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -671,6 +671,7 @@ func (s *Server) Stop() error { // UpdateStateCode updates the status of the coord, including healthy, unhealthy func (s *Server) UpdateStateCode(code commonpb.StateCode) { s.status.Store(int32(code)) + log.Ctx(s.ctx).Info("update querycoord state", zap.String("state", code.String())) } func (s *Server) State() commonpb.StateCode {