Skip to content

Commit

Permalink
fix: retry on errors when watching pods (#9373)
Browse files Browse the repository at this point in the history
* fix: retry on errors when watching pods

If timeout (or some network error?) occurs while waiting for a pod
initialization or termination event, e.g. when build takes a long time,
skaffold becomes stuck and never finishes the operation. Use retry
watcher to handle the errors gracefully.

* chore: run `go mod vendor` to pull new dependencies

* test: fixup `TestWaitForPodSucceeded` unit test

* chore: run `go mod vendor` to sync dependencies

---------

Co-authored-by: Angel Montero <[email protected]>
  • Loading branch information
mikedld and alphanota authored Jan 14, 2025
1 parent 0b5cea8 commit cd7c1fb
Show file tree
Hide file tree
Showing 38 changed files with 8,663 additions and 2 deletions.
19 changes: 17 additions & 2 deletions pkg/skaffold/kubernetes/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/cache"
watchtools "k8s.io/client-go/tools/watch"

"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log"
)
Expand Down Expand Up @@ -61,7 +63,7 @@ func watchUntilTimeout(ctx context.Context, timeout time.Duration, w watch.Inter
func WaitForPodSucceeded(ctx context.Context, pods corev1.PodInterface, podName string, timeout time.Duration) error {
log.Entry(ctx).Infof("Waiting for %s to be complete", podName)

w, err := pods.Watch(ctx, metav1.ListOptions{})
w, err := newPodsWatcher(ctx, pods)
if err != nil {
return fmt.Errorf("initializing pod watcher: %s", err)
}
Expand Down Expand Up @@ -101,7 +103,7 @@ func isPodSucceeded(podName string) func(event *watch.Event) (bool, error) {
func WaitForPodInitialized(ctx context.Context, pods corev1.PodInterface, podName string) error {
log.Entry(ctx).Infof("Waiting for %s to be initialized", podName)

w, err := pods.Watch(ctx, metav1.ListOptions{})
w, err := newPodsWatcher(ctx, pods)
if err != nil {
return fmt.Errorf("initializing pod watcher: %s", err)
}
Expand Down Expand Up @@ -158,3 +160,16 @@ func WaitForDeploymentToStabilize(ctx context.Context, c kubernetes.Interface, n
return false, nil
})
}

func newPodsWatcher(ctx context.Context, pods corev1.PodInterface) (watch.Interface, error) {
initList, err := pods.List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}

return watchtools.NewRetryWatcher(initList.GetResourceVersion(), &cache.ListWatch{
WatchFunc: func(listOptions metav1.ListOptions) (watch.Interface, error) {
return pods.Watch(ctx, listOptions)
},
})
}
11 changes: 11 additions & 0 deletions pkg/skaffold/kubernetes/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ import (

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
fakekubeclientset "k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"

"github.com/GoogleContainerTools/skaffold/v2/testutil"
)
Expand Down Expand Up @@ -62,6 +64,9 @@ func TestWaitForPodSucceeded(t *testing.T) {
pod := &v1.Pod{}
client := fakekubeclientset.NewSimpleClientset(pod)

client.PrependReactor("list", "pods", func(action clienttesting.Action) (handled bool, ret runtime.Object, err error) {
return true, &v1.PodList{ListMeta: metav1.ListMeta{ResourceVersion: "1"}}, nil
})
fakeWatcher := watch.NewRaceFreeFake()
client.PrependWatchReactor("*", testutil.SetupFakeWatcher(fakeWatcher))
fakePods := client.CoreV1().Pods("")
Expand All @@ -78,12 +83,18 @@ func TestWaitForPodSucceeded(t *testing.T) {
switch phase {
case v1.PodPending, v1.PodRunning, v1.PodFailed, v1.PodSucceeded, v1.PodUnknown:
fakeWatcher.Modify(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
ResourceVersion: "1",
},
Status: v1.PodStatus{
Phase: phase,
},
})
default:
fakeWatcher.Modify(&metav1.Status{
ListMeta: metav1.ListMeta{
ResourceVersion: "1",
},
Status: "Failure",
})
}
Expand Down
202 changes: 202 additions & 0 deletions vendor/k8s.io/apimachinery/pkg/util/cache/expiring.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cd7c1fb

Please sign in to comment.