From 48ef202cbe64a19eda6f2c85e4a3472f14118983 Mon Sep 17 00:00:00 2001 From: Zexi Li Date: Thu, 18 Apr 2024 10:04:07 +0800 Subject: [PATCH] fix(host): sync status of pod and container after service restarted (#20045) --- pkg/hostman/guestman/pod.go | 51 ++++++++++++++++++++++++++++-- pkg/hostman/hostutils/hostutils.go | 11 ++++++- pkg/util/pod/pod.go | 5 +-- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/pkg/hostman/guestman/pod.go b/pkg/hostman/guestman/pod.go index a7b1148ebdc..6bf17040fd2 100644 --- a/pkg/hostman/guestman/pod.go +++ b/pkg/hostman/guestman/pod.go @@ -40,6 +40,7 @@ import ( "yunion.io/x/onecloud/pkg/hostman/container/volume_mount" "yunion.io/x/onecloud/pkg/hostman/guestman/desc" deployapi "yunion.io/x/onecloud/pkg/hostman/hostdeployer/apis" + "yunion.io/x/onecloud/pkg/hostman/hostinfo" "yunion.io/x/onecloud/pkg/hostman/hostutils" "yunion.io/x/onecloud/pkg/hostman/isolated_device" "yunion.io/x/onecloud/pkg/hostman/options" @@ -103,10 +104,51 @@ func (s *sPodGuestInstance) CleanGuest(ctx context.Context, params interface{}) } func (s *sPodGuestInstance) ImportServer(pendingDelete bool) { - log.Infof("======pod %s ImportServer do nothing", s.Id) // TODO: 参考SKVMGuestInstance,可以做更多的事,比如同步状态 s.manager.SaveServer(s.Id, s) s.manager.RemoveCandidateServer(s) + s.SyncStatus("sync status after host started") +} + +func (s *sPodGuestInstance) SyncStatus(reason string) { + // sync pod status + var status = computeapi.VM_READY + if s.IsRunning() { + status = computeapi.VM_RUNNING + } + ctx := context.Background() + if status == computeapi.VM_READY { + // remove pod + if err := s.stopPod(ctx, 1); err != nil { + log.Warningf("stop cri pod when sync status: %s", s.Id) + } + } + statusInput := &apis.PerformStatusInput{ + Status: status, + Reason: reason, + PowerStates: GetPowerStates(s), + HostId: hostinfo.Instance().HostId, + } + + if _, err := hostutils.UpdateServerStatus(ctx, s.Id, statusInput); err != nil { + log.Errorf("failed update guest status %s", err) + } + // sync container's status + for _, c := range s.containers { + status, err := s.getContainerStatus(ctx, c.Id) + if err != nil { + log.Errorf("get container %s status of pod %s", c.Id, s.Id) + continue + } + statusInput = &apis.PerformStatusInput{ + Status: status, + Reason: reason, + HostId: hostinfo.Instance().HostId, + } + if _, err := hostutils.UpdateContainerStatus(ctx, c.Id, statusInput); err != nil { + log.Errorf("failed update container %s status: %s", c.Id, err) + } + } } func (s *sPodGuestInstance) DeployFs(ctx context.Context, userCred mcclient.TokenCredential, deployInfo *deployapi.DeployInfo) (jsonutils.JSONObject, error) { @@ -158,11 +200,14 @@ func (s *sPodGuestInstance) getPod(ctx context.Context) (*runtimeapi.PodSandbox, } func (s *sPodGuestInstance) IsRunning() bool { - _, err := s.getPod(context.Background()) + pod, err := s.getPod(context.Background()) if err != nil { return false } - return true + if pod.GetState() == runtimeapi.PodSandboxState_SANDBOX_READY { + return true + } + return false } func (s *sPodGuestInstance) HandleGuestStatus(ctx context.Context, status string, body *jsonutils.JSONDict) (jsonutils.JSONObject, error) { diff --git a/pkg/hostman/hostutils/hostutils.go b/pkg/hostman/hostutils/hostutils.go index 053a07c2c66..8f1c7879b10 100644 --- a/pkg/hostman/hostutils/hostutils.go +++ b/pkg/hostman/hostutils/hostutils.go @@ -38,6 +38,7 @@ import ( "yunion.io/x/onecloud/pkg/httperrors" "yunion.io/x/onecloud/pkg/mcclient" "yunion.io/x/onecloud/pkg/mcclient/auth" + "yunion.io/x/onecloud/pkg/mcclient/modulebase" modules "yunion.io/x/onecloud/pkg/mcclient/modules/compute" "yunion.io/x/onecloud/pkg/mcclient/modules/k8s" "yunion.io/x/onecloud/pkg/util/cgrouputils/cpuset" @@ -160,8 +161,16 @@ func RemoteStoragecacheCacheImage(ctx context.Context, storagecacheId, imageId, storagecacheId, imageId, query, params) } +func UpdateResourceStatus(ctx context.Context, man modulebase.IResourceManager, id string, statusInput *apis.PerformStatusInput) (jsonutils.JSONObject, error) { + return man.PerformAction(GetComputeSession(ctx), id, "status", jsonutils.Marshal(statusInput)) +} + +func UpdateContainerStatus(ctx context.Context, cid string, statusInput *apis.PerformStatusInput) (jsonutils.JSONObject, error) { + return UpdateResourceStatus(ctx, &modules.Containers, cid, statusInput) +} + func UpdateServerStatus(ctx context.Context, sid string, statusInput *apis.PerformStatusInput) (jsonutils.JSONObject, error) { - return modules.Servers.PerformAction(GetComputeSession(ctx), sid, "status", jsonutils.Marshal(statusInput)) + return UpdateResourceStatus(ctx, &modules.Servers, sid, statusInput) } func UpdateServerProgress(ctx context.Context, sid string, progress, progressMbps float64) (jsonutils.JSONObject, error) { diff --git a/pkg/util/pod/pod.go b/pkg/util/pod/pod.go index 523bc41022d..8212f4ab54f 100644 --- a/pkg/util/pod/pod.go +++ b/pkg/util/pod/pod.go @@ -10,7 +10,6 @@ import ( "google.golang.org/grpc/credentials/insecure" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" - "yunion.io/x/jsonutils" "yunion.io/x/log" "yunion.io/x/pkg/errors" ) @@ -168,8 +167,6 @@ func (c crictl) CreateContainer(ctx context.Context, SandboxConfig: podConfig, } - log.Infof("======container config: %s", jsonutils.Marshal(ctrConfig).PrettyString()) - image := ctrConfig.GetImage().GetImage() // When there is a withPull request or the image default mode is to @@ -187,7 +184,7 @@ func (c crictl) CreateContainer(ctx context.Context, log.Infof("Pull image %s", resp.String()) } - log.Infof("CreateContainerRequest: %v", req) + log.Debugf("CreateContainerRequest pod %s: %v", podId, req) r, err := c.GetRuntimeClient().CreateContainer(ctx, req) if err != nil { return "", errors.Wrapf(err, "CreateContainer with: %s", req)