From d6134ec444d6ff73cbb802d090bafa59b0d471ee Mon Sep 17 00:00:00 2001 From: Lyndon-Li Date: Mon, 31 Jul 2023 21:45:04 +0800 Subject: [PATCH] fix issue 6561 Signed-off-by: Lyndon-Li --- Dockerfile | 4 +- cmd/velero-helper/velero-helper.go | 27 ++++++++++ .../data_download_controller_test.go | 15 +++++- pkg/controller/data_upload_controller_test.go | 15 +++++- pkg/exposer/csi_snapshot.go | 12 +++-- pkg/exposer/csi_snapshot_test.go | 16 ++++++ pkg/exposer/generic_restore.go | 12 +++-- pkg/exposer/generic_restore_test.go | 15 ++++++ pkg/exposer/image.go | 49 +++++++++++++++++++ pkg/nodeagent/node_agent.go | 10 ++++ 10 files changed, 166 insertions(+), 9 deletions(-) create mode 100644 cmd/velero-helper/velero-helper.go create mode 100644 pkg/exposer/image.go diff --git a/Dockerfile b/Dockerfile index 7f600d0f27..2edd111ba8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,9 @@ COPY . /go/src/github.com/vmware-tanzu/velero RUN mkdir -p /output/usr/bin && \ export GOARM=$( echo "${GOARM}" | cut -c2-) && \ go build -o /output/${BIN} \ - -ldflags "${LDFLAGS}" ${PKG}/cmd/${BIN} + -ldflags "${LDFLAGS}" ${PKG}/cmd/${BIN} && \ + go build -o /output/velero-helper \ + -ldflags "${LDFLAGS}" ${PKG}/cmd/velero-helper # Restic binary build section FROM --platform=$BUILDPLATFORM golang:1.20-bullseye as restic-builder diff --git a/cmd/velero-helper/velero-helper.go b/cmd/velero-helper/velero-helper.go new file mode 100644 index 0000000000..5991531a60 --- /dev/null +++ b/cmd/velero-helper/velero-helper.go @@ -0,0 +1,27 @@ +package main + +import ( + "fmt" + "os" + "time" +) + +const ( + // workingModePause indicates it is for general purpose to hold the pod under running state + workingModePause = "pause" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "ERROR: at least one argument must be provided, the working mode") + os.Exit(1) + } + + switch os.Args[1] { + case workingModePause: + time.Sleep(time.Duration(1<<63 - 1)) + default: + fmt.Fprintln(os.Stderr, "ERROR: wrong working mode provided") + os.Exit(1) + } +} diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index a0a9b645f0..726d9b6a03 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -27,6 +27,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -141,6 +142,18 @@ func initDataDownloadReconcilerWithError(objects []runtime.Object, needError ... } func TestDataDownloadReconcile(t *testing.T) { + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "node-agent", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "DaemonSet", + APIVersion: appsv1.SchemeGroupVersion.String(), + }, + Spec: appsv1.DaemonSetSpec{}, + } + tests := []struct { name string dd *velerov2alpha1api.DataDownload @@ -283,7 +296,7 @@ func TestDataDownloadReconcile(t *testing.T) { t.Run(test.name, func(t *testing.T) { var objs []runtime.Object if test.targetPVC != nil { - objs = []runtime.Object{test.targetPVC} + objs = []runtime.Object{test.targetPVC, daemonSet} } r, err := initDataDownloadReconciler(objs, test.needErrs...) require.NoError(t, err) diff --git a/pkg/controller/data_upload_controller_test.go b/pkg/controller/data_upload_controller_test.go index 3e0c046065..e17aad5271 100644 --- a/pkg/controller/data_upload_controller_test.go +++ b/pkg/controller/data_upload_controller_test.go @@ -27,6 +27,7 @@ import ( "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -145,6 +146,18 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci RestoreSize: &restoreSize, }, } + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "node-agent", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "DaemonSet", + APIVersion: appsv1.SchemeGroupVersion.String(), + }, + Spec: appsv1.DaemonSetSpec{}, + } + now, err := time.Parse(time.RFC1123, time.RFC1123) if err != nil { return nil, err @@ -176,7 +189,7 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci } fakeSnapshotClient := snapshotFake.NewSimpleClientset(vsObject, vscObj) - fakeKubeClient := clientgofake.NewSimpleClientset() + fakeKubeClient := clientgofake.NewSimpleClientset(daemonSet) fakeFS := velerotest.NewFakeFileSystem() pathGlob := fmt.Sprintf("/host_pods/%s/volumes/*/%s", "", dataUploadName) _, err = fakeFS.Create(pathGlob) diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 4d452a39ed..a98ee0ad5d 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -345,6 +345,11 @@ func (e *csiSnapshotExposer) createBackupPVC(ctx context.Context, ownerObject co func (e *csiSnapshotExposer) createBackupPod(ctx context.Context, ownerObject corev1.ObjectReference, backupPVC *corev1.PersistentVolumeClaim, label map[string]string) (*corev1.Pod, error) { podName := ownerObject.Name + podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace) + if err != nil { + return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") + } + var gracePeriod int64 = 0 pod := &corev1.Pod{ @@ -366,15 +371,16 @@ func (e *csiSnapshotExposer) createBackupPod(ctx context.Context, ownerObject co Containers: []corev1.Container{ { Name: podName, - Image: "alpine:latest", - ImagePullPolicy: corev1.PullIfNotPresent, - Command: []string{"sleep", "infinity"}, + Image: podInfo.image, + ImagePullPolicy: corev1.PullNever, + Command: []string{"/velero-helper", "pause"}, VolumeMounts: []corev1.VolumeMount{{ Name: backupPVC.Name, MountPath: "/" + backupPVC.Name, }}, }, }, + ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: []corev1.Volume{{ Name: backupPVC.Name, diff --git a/pkg/exposer/csi_snapshot_test.go b/pkg/exposer/csi_snapshot_test.go index e538e721ee..dfd4884d1b 100644 --- a/pkg/exposer/csi_snapshot_test.go +++ b/pkg/exposer/csi_snapshot_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/kubernetes/fake" clientTesting "k8s.io/client-go/testing" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" @@ -82,6 +83,18 @@ func TestExpose(t *testing.T) { }, } + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "node-agent", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "DaemonSet", + APIVersion: appsv1.SchemeGroupVersion.String(), + }, + Spec: appsv1.DaemonSetSpec{}, + } + tests := []struct { name string snapshotClientObj []runtime.Object @@ -257,6 +270,9 @@ func TestExpose(t *testing.T) { vsObject, vscObj, }, + kubeClientObj: []runtime.Object{ + daemonSet, + }, kubeReactors: []reactor{ { verb: "create", diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index f11ac27c85..8881b12b16 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -251,6 +251,11 @@ func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObjec restorePodName := ownerObject.Name restorePVCName := ownerObject.Name + podInfo, err := getInheritedPodInfo(ctx, e.kubeClient, ownerObject.Namespace) + if err != nil { + return nil, errors.Wrap(err, "error to get inherited pod info from node-agent") + } + var gracePeriod int64 = 0 pod := &corev1.Pod{ @@ -272,15 +277,16 @@ func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObjec Containers: []corev1.Container{ { Name: restorePodName, - Image: "alpine:latest", - ImagePullPolicy: corev1.PullIfNotPresent, - Command: []string{"sleep", "infinity"}, + Image: podInfo.image, + ImagePullPolicy: corev1.PullNever, + Command: []string{"/velero-helper", "pause"}, VolumeMounts: []corev1.VolumeMount{{ Name: restorePVCName, MountPath: "/" + restorePVCName, }}, }, }, + ServiceAccountName: podInfo.serviceAccount, TerminationGracePeriodSeconds: &gracePeriod, Volumes: []corev1.Volume{{ Name: restorePVCName, diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 23dd3f4800..e55657710c 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -30,6 +30,7 @@ import ( velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerotest "github.com/vmware-tanzu/velero/pkg/test" + appsv1 "k8s.io/api/apps/v1" corev1api "k8s.io/api/core/v1" clientTesting "k8s.io/client-go/testing" ) @@ -64,6 +65,18 @@ func TestRestoreExpose(t *testing.T) { }, } + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "velero", + Name: "node-agent", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "DaemonSet", + APIVersion: appsv1.SchemeGroupVersion.String(), + }, + Spec: appsv1.DaemonSetSpec{}, + } + tests := []struct { name string kubeClientObj []runtime.Object @@ -97,6 +110,7 @@ func TestRestoreExpose(t *testing.T) { ownerRestore: restore, kubeClientObj: []runtime.Object{ targetPVCObj, + daemonSet, }, kubeReactors: []reactor{ { @@ -116,6 +130,7 @@ func TestRestoreExpose(t *testing.T) { ownerRestore: restore, kubeClientObj: []runtime.Object{ targetPVCObj, + daemonSet, }, kubeReactors: []reactor{ { diff --git a/pkg/exposer/image.go b/pkg/exposer/image.go new file mode 100644 index 0000000000..c29a59447c --- /dev/null +++ b/pkg/exposer/image.go @@ -0,0 +1,49 @@ +/* +Copyright The Velero Contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exposer + +import ( + "context" + + "github.com/pkg/errors" + "k8s.io/client-go/kubernetes" + + "github.com/vmware-tanzu/velero/pkg/nodeagent" +) + +type inheritedPodInfo struct { + image string + serviceAccount string +} + +func getInheritedPodInfo(ctx context.Context, client kubernetes.Interface, veleroNamespace string) (inheritedPodInfo, error) { + podInfo := inheritedPodInfo{} + + podSpec, err := nodeagent.GetPodSpec(ctx, client, veleroNamespace) + if err != nil { + return podInfo, errors.Wrap(err, "error to get node-agent pod template") + } + + if len(podSpec.Containers) != 1 { + return podInfo, errors.Wrap(err, "unexpected pod template from node-agent") + } + + podInfo.image = podSpec.Containers[0].Image + podInfo.serviceAccount = podSpec.ServiceAccountName + + return podInfo, nil +} diff --git a/pkg/nodeagent/node_agent.go b/pkg/nodeagent/node_agent.go index b0d8e6725d..83e76d2a45 100644 --- a/pkg/nodeagent/node_agent.go +++ b/pkg/nodeagent/node_agent.go @@ -21,6 +21,7 @@ import ( "fmt" "github.com/pkg/errors" + v1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes" "github.com/vmware-tanzu/velero/pkg/util/kube" @@ -73,3 +74,12 @@ func IsRunningInNode(ctx context.Context, namespace string, nodeName string, pod return errors.Errorf("daemonset pod not found in running state in node %s", nodeName) } + +func GetPodSpec(ctx context.Context, kubeClient kubernetes.Interface, namespace string) (*v1.PodSpec, error) { + ds, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonSet, metav1.GetOptions{}) + if err != nil { + return nil, errors.Wrap(err, "error to get node-agent daemonset") + } + + return &ds.Spec.Template.Spec, nil +}