From 4ff141c18dbeb923f25508335814e5206c8ba484 Mon Sep 17 00:00:00 2001 From: Patryk Matuszak Date: Thu, 19 Dec 2024 14:19:02 +0100 Subject: [PATCH] Don't wait too long for an answer from API Server If Multus plugin gets a DEL request, but the API Server is down (e.g. via 'crictl rmp'), the call takes so long, it actually never finishes. This prevents CRI-O from deleting the Pods. --- pkg/k8sclient/k8sclient.go | 9 +++++++++ pkg/multus/multus.go | 9 +++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pkg/k8sclient/k8sclient.go b/pkg/k8sclient/k8sclient.go index 83f306f80..a96113538 100644 --- a/pkg/k8sclient/k8sclient.go +++ b/pkg/k8sclient/k8sclient.go @@ -82,6 +82,15 @@ func (c *ClientInfo) GetPod(namespace, name string) (*v1.Pod, error) { return c.Client.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{}) } +// GetPodContext gets pod from kubernetes with context +func (c *ClientInfo) GetPodContext(ctx context.Context, namespace, name string) (*v1.Pod, error) { + if c.PodInformer != nil { + logging.Debugf("GetPod for [%s/%s] will use informer cache", namespace, name) + return listers.NewPodLister(c.PodInformer.GetIndexer()).Pods(namespace).Get(name) + } + return c.Client.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) +} + // DeletePod deletes a pod from kubernetes func (c *ClientInfo) DeletePod(namespace, name string) error { return c.Client.CoreV1().Pods(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{}) diff --git a/pkg/multus/multus.go b/pkg/multus/multus.go index 6ae6243c4..532ec1193 100644 --- a/pkg/multus/multus.go +++ b/pkg/multus/multus.go @@ -542,7 +542,10 @@ func GetPod(kubeClient *k8s.ClientInfo, k8sArgs *types.K8sArgs, isDel bool) (*v1 var pod *v1.Pod if err := wait.PollImmediate(pollDuration, shortPollTimeout, func() (bool, error) { var getErr error - pod, getErr = kubeClient.GetPod(podNamespace, podName) + // Use context with a short timeout so the call to API server doesn't take too long. + ctx, cancel := context.WithTimeout(context.TODO(), pollDuration) + defer cancel() + pod, getErr = kubeClient.GetPodContext(ctx, podNamespace, podName) if isCriticalRequestRetriable(getErr) || retryOnNotFound(getErr) { return false, nil } @@ -555,7 +558,9 @@ func GetPod(kubeClient *k8s.ClientInfo, k8sArgs *types.K8sArgs, isDel bool) (*v1 // Try one more time to get the pod directly from the apiserver; // TODO: figure out why static pods don't show up via the informer // and always hit this case. - pod, err = kubeClient.GetPod(podNamespace, podName) + ctx, cancel := context.WithTimeout(context.TODO(), pollDuration) + defer cancel() + pod, err = kubeClient.GetPodContext(ctx, podNamespace, podName) if err != nil { return nil, cmdErr(k8sArgs, "error waiting for pod: %v", err) }