Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix non control plane node deletion when the cluster has no control plane machines #11552

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -784,21 +784,24 @@ func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1
}
}

// Get all of the active machines that belong to this cluster.
machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines)
if err != nil {
return err
}
if util.IsControlPlaneMachine(machine) {
// Get all the machines that belong to this cluster.
machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster)
if err != nil {
return err
}

// Whether or not it is okay to delete the NodeRef depends on the
// number of remaining control plane members and whether or not this
// machine is one of them.
numControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name)))
if numControlPlaneMachines == 0 {
// Do not delete the NodeRef if there are no remaining members of
// the control plane.
return errNoControlPlaneNodes
// Whether or not it is okay to delete the NodeRef depends on the
// number of remaining control plane members and whether or not this
// machine is one of them.
numOfControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name)))
if numOfControlPlaneMachines == 1 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are not filtering by active machines anymore, what happens if this returns 2 because there's another cp machine but that one is also being deleted?

// Do not delete the NodeRef as this is the last remaining member of
// the control plane.
return errLastControlPlaneNode
}
}

// Otherwise it is okay to delete the NodeRef.
return nil
}
Expand Down
156 changes: 101 additions & 55 deletions internal/controllers/machine/machine_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2529,9 +2529,53 @@ func nodeNameIndex(o client.Object) []string {
func TestIsDeleteNodeAllowed(t *testing.T) {
deletionts := metav1.Now()

cp1 := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "cp1",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
clusterv1.MachineControlPlaneLabel: "",
},
Finalizers: []string{clusterv1.MachineFinalizer},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test1",
},
},
}
cp2 := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "cp2",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
clusterv1.MachineControlPlaneLabel: "",
},
Finalizers: []string{clusterv1.MachineFinalizer},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test2",
},
},
}

testCases := []struct {
name string
cluster *clusterv1.Cluster
extraMachines []*clusterv1.Machine
machine *clusterv1.Machine
expectedError error
}{
Expand Down Expand Up @@ -2559,10 +2603,13 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
Status: clusterv1.MachineStatus{},
},
extraMachines: nil,
expectedError: errNilNodeRef,
},
{
name: "no control plane members",
// This situation can happen when the control plane is not implemented via Cluster API,
// so for Cluster API the control plane machine members count is zero in this case.
name: "no control plane members, deleting a worker",
damdo marked this conversation as resolved.
Show resolved Hide resolved
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Expand All @@ -2576,7 +2623,8 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
},
Finalizers: []string{clusterv1.MachineFinalizer},
Finalizers: []string{clusterv1.MachineFinalizer},
DeletionTimestamp: &metav1.Time{Time: time.Now().UTC()},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
Expand All @@ -2589,10 +2637,11 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
expectedError: errNoControlPlaneNodes,
extraMachines: nil,
expectedError: nil,
},
{
name: "is last control plane member",
name: "is last control plane member, trying to delete it",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Expand Down Expand Up @@ -2621,7 +2670,40 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
expectedError: errNoControlPlaneNodes,
extraMachines: nil,
expectedError: errLastControlPlaneNode,
},
{
name: "only one control plane member, trying to delete a worker",
cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Namespace: metav1.NamespaceDefault,
},
},
machine: &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "created",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
},
Finalizers: []string{clusterv1.MachineFinalizer},
DeletionTimestamp: &metav1.Time{Time: time.Now().UTC()},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test",
},
},
},
extraMachines: nil,
expectedError: nil,
},
{
name: "has nodeRef and control plane is healthy",
Expand Down Expand Up @@ -2651,6 +2733,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
extraMachines: []*clusterv1.Machine{cp1, cp2},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've refactored the testing struct slightly so we can pass in zero control plane machines, otherwise without this change there are always two control plane machines provided to each test entry.

expectedError: nil,
},
{
Expand All @@ -2664,6 +2747,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
machine: &clusterv1.Machine{},
extraMachines: nil,
expectedError: errClusterIsBeingDeleted,
},
{
Expand Down Expand Up @@ -2702,6 +2786,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
extraMachines: []*clusterv1.Machine{cp1, cp2},
expectedError: nil,
},
{
Expand Down Expand Up @@ -2740,6 +2825,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
extraMachines: nil,
expectedError: errControlPlaneIsBeingDeleted,
},
{
Expand Down Expand Up @@ -2778,6 +2864,7 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
},
},
},
extraMachines: nil,
expectedError: errControlPlaneIsBeingDeleted,
},
}
Expand Down Expand Up @@ -2822,61 +2909,19 @@ func TestIsDeleteNodeAllowed(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)

m1 := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "cp1",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
},
Finalizers: []string{clusterv1.MachineFinalizer},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test1",
},
},
}
m2 := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "cp2",
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.ClusterNameLabel: "test-cluster",
},
Finalizers: []string{clusterv1.MachineFinalizer},
},
Spec: clusterv1.MachineSpec{
ClusterName: "test-cluster",
InfrastructureRef: corev1.ObjectReference{},
Bootstrap: clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
},
Status: clusterv1.MachineStatus{
NodeRef: &corev1.ObjectReference{
Name: "test2",
},
},
}
// For isDeleteNodeAllowed to be true we assume a healthy control plane.
if tc.expectedError == nil {
m1.Labels[clusterv1.MachineControlPlaneLabel] = ""
m2.Labels[clusterv1.MachineControlPlaneLabel] = ""
}

c := fake.NewClientBuilder().WithObjects(
objects := []client.Object{
tc.cluster,
tc.machine,
m1,
m2,
emp,
mcpBeingDeleted,
empBeingDeleted,
).Build()
}

for _, e := range tc.extraMachines {
objects = append(objects, e)
}

c := fake.NewClientBuilder().WithObjects(objects...).Build()
mr := &Reconciler{
Client: c,
}
Expand Down Expand Up @@ -3197,6 +3242,7 @@ func TestNodeDeletion(t *testing.T) {
Namespace: metav1.NamespaceDefault,
Labels: map[string]string{
clusterv1.MachineControlPlaneLabel: "",
clusterv1.ClusterNameLabel: "test-cluster",
},
Annotations: map[string]string{
"machine.cluster.x-k8s.io/exclude-node-draining": "",
Expand Down
1 change: 1 addition & 0 deletions util/collections/machine_filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ func ControlPlaneMachines(clusterName string) func(machine *clusterv1.Machine) b
if machine == nil {
return false
}

return selector.Matches(labels.Set(machine.Labels))
}
}
Expand Down
Loading