Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check orphan PVC before updating statefulSet #526

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 44 additions & 6 deletions controllers/zookeepercluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,20 @@ func (r *ZookeeperClusterReconciler) reconcileStatefulSet(instance *zookeeperv1b
} else if err != nil {
return err
} else {
// check whether orphans PVCs need to be deleted before updating the sts
if instance.Spec.Persistence != nil &&
instance.Spec.Persistence.VolumeReclaimPolicy == zookeeperv1beta1.VolumeReclaimPolicyDelete {
pvcCount, err := r.getPVCCount(instance)
if err != nil {
return err
}
r.Log.Info("PVC count", "count", pvcCount, "replicas", foundSts.Status.Replicas, "cr replicas", instance.Spec.Replicas)
if pvcCount > int(foundSts.Status.Replicas) {
r.Log.Info("Deleting PVCs", "count", pvcCount, "replicas", instance.Status.Replicas)
return nil
}
}

// check whether zookeeperCluster is updated before updating the sts
cmp := compareResourceVersion(instance, foundSts)
if cmp < 0 {
Expand Down Expand Up @@ -258,8 +272,6 @@ func (r *ZookeeperClusterReconciler) updateStatefulSet(instance *zookeeperv1beta
if err != nil {
return err
}
instance.Status.Replicas = foundSts.Status.Replicas
instance.Status.ReadyReplicas = foundSts.Status.ReadyReplicas
return nil
}

Expand Down Expand Up @@ -558,6 +570,15 @@ func (r *ZookeeperClusterReconciler) reconcileClusterStatus(instance *zookeeperv
instance.Status.Members.Ready = readyMembers
instance.Status.Members.Unready = unreadyMembers

foundSts := &appsv1.StatefulSet{}
err = r.Client.Get(context.TODO(), types.NamespacedName{
Name: instance.GetName(),
Namespace: instance.Namespace,
}, foundSts)

instance.Status.Replicas = foundSts.Status.Replicas
instance.Status.ReadyReplicas = foundSts.Status.ReadyReplicas

// If Cluster is in a ready state...
if instance.Spec.Replicas == instance.Status.ReadyReplicas && (!instance.Status.MetaRootCreated) {
r.Log.Info("Cluster is Ready, Creating ZK Metadata...")
Expand Down Expand Up @@ -707,21 +728,38 @@ func (r *ZookeeperClusterReconciler) getPVCCount(instance *zookeeperv1beta1.Zook
}

func (r *ZookeeperClusterReconciler) cleanupOrphanPVCs(instance *zookeeperv1beta1.ZookeeperCluster) (err error) {
// get the up to date STS
foundSts := &appsv1.StatefulSet{}
err = r.Client.Get(context.TODO(), types.NamespacedName{
Name: instance.GetName(),
Namespace: instance.Namespace,
}, foundSts)
if err != nil {
if errors.IsNotFound(err) {
return nil
}
return err
}

// this check should make sure we do not delete the PVCs before the STS has scaled down
if instance.Status.ReadyReplicas == instance.Spec.Replicas {
if foundSts.Status.ReadyReplicas == foundSts.Status.Replicas {
pvcCount, err := r.getPVCCount(instance)
if err != nil {
return err
}
r.Log.Info("cleanupOrphanPVCs", "PVC Count", pvcCount, "ReadyReplicas Count", instance.Status.ReadyReplicas)
if pvcCount > int(instance.Spec.Replicas) {

r.Log.Info("cleanupOrphanPVCs",
"PVC Count", pvcCount,
"Replicas Count", foundSts.Spec.Replicas)
if pvcCount > int(*foundSts.Spec.Replicas) {
pvcList, err := r.getPVCList(instance)
if err != nil {
return err
}
for _, pvcItem := range pvcList.Items {
// delete only Orphan PVCs
if utils.IsPVCOrphan(pvcItem.Name, instance.Spec.Replicas) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hoyhbx why are we deleting pvcs based on Sts replicas. Operator is looking for zookeeper cluster resource. Is there any issue are you seeing if we delete based on instance.Spec.Replicas?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to handle the race condition, where the operator has not yet deleted the orphan pvcs after scaling down, but the user scale the cluster back up. In that case, if instance.Spec.Replicas is used to delete old pvcs, the old pvcs will never get deleted.

Just as the added e2e test, when scaling down from 3 to 1, two pods are deleted, and statefulSet's replica gets down to 1. Then it takes sometime for the operator to delete the orphan pvcs. But before operator is able to delete the orphan pvcs, user scales up from 1 to 3, changing the instance.Spec.Replicas to 3. Then the old PVCs will never be deleted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @anishakj , does the above explanation make sense to you?
The problem is basically because there is a race condition when deleting the PVC and user upscaling

if utils.IsPVCOrphan(pvcItem.Name, *foundSts.Spec.Replicas) {
r.Log.Info("cleanupOrphanPVCs", "Deleting Orphan PVC", pvcItem.Name)
r.deletePVC(pvcItem)
}
}
Expand Down
39 changes: 39 additions & 0 deletions test/e2e/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,43 @@ var _ = Describe("Perform scale for cluster upgrade", func() {
Expect(zk_e2eutil.WaitForClusterToTerminate(logger, k8sClient, zk)).NotTo(HaveOccurred())
})
})

Context("Scale down and up", func() {
It("should wait for orphan PVCs cleaned before scaling up", func() {
defaultCluster := zk_e2eutil.NewDefaultCluster(testNamespace)
defaultCluster.WithDefaults()

defaultCluster.Status.Init()
defaultCluster.Spec.Persistence.VolumeReclaimPolicy = "Delete"

zk, err := zk_e2eutil.CreateCluster(logger, k8sClient, defaultCluster)

Expect(err).NotTo(HaveOccurred())

// A default zk cluster should have 3 pods
podSize := 3
Expect(zk_e2eutil.WaitForClusterToBecomeReady(logger, k8sClient, zk, podSize)).NotTo(HaveOccurred())

// This is to get the latest zk cluster object
zk, err = zk_e2eutil.GetCluster(logger, k8sClient, zk)
Expect(err).NotTo(HaveOccurred())

// Scale down zk cluster, decrease replicas to 1
zk.Spec.Replicas = 1
podSize = 1
Expect(zk_e2eutil.UpdateCluster(logger, k8sClient, zk)).NotTo(HaveOccurred())

Expect(zk_e2eutil.WaitForClusterToBecomeReady(logger, k8sClient, zk, podSize)).NotTo(HaveOccurred())

zk, err = zk_e2eutil.GetCluster(logger, k8sClient, zk)
Expect(err).NotTo(HaveOccurred())

// Scale up zk cluster to 3 again, before the PVCs are cleaned up
zk.Spec.Replicas = 3
podSize = 3
Expect(zk_e2eutil.UpdateCluster(logger, k8sClient, zk)).NotTo(HaveOccurred())

Expect(zk_e2eutil.WaitForClusterToBecomeReady(logger, k8sClient, zk, podSize)).NotTo(HaveOccurred())
})
})
})