From fcc37edbee12c5a156820769e88d602472b6f4a2 Mon Sep 17 00:00:00 2001 From: Dawid Korzepa <56738926+korzepadawid@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:18:25 +0200 Subject: [PATCH] [ISSUE-1178]: Disk Eject, fixed fake attach after node reboot during DR (#1179) * [ISSUE-1178]: fixed fake attach after node reboot during DR Signed-off-by: Dawid Korzepa * [ISSUE-1178]: working reboot with DR fake-attach Signed-off-by: Dawid Korzepa --------- Signed-off-by: Dawid Korzepa --- pkg/crcontrollers/drive/drivecontroller.go | 8 +- .../drive/drivecontroller_test.go | 22 ++--- pkg/node/node.go | 25 ++++-- pkg/node/node_test.go | 88 ++++++++++++++++++- 4 files changed, 118 insertions(+), 25 deletions(-) diff --git a/pkg/crcontrollers/drive/drivecontroller.go b/pkg/crcontrollers/drive/drivecontroller.go index 26ef518a1..46caf94f0 100644 --- a/pkg/crcontrollers/drive/drivecontroller.go +++ b/pkg/crcontrollers/drive/drivecontroller.go @@ -57,8 +57,8 @@ const ( fakeAttachVolumeAnnotation = "fake-attach" fakeAttachVolumeKey = "yes" - allVolumesFakeAttachedAnnotation = "all-volumes-fake-attached" - allVolumesFakeAttachedKey = "yes" + allDRVolumesFakeAttachedAnnotation = "all-dr-volumes-fake-attached" + allDRVolumesFakeAttachedKey = "yes" ) // NewController creates new instance of Controller structure @@ -181,8 +181,8 @@ func (c *Controller) handleDriveUpdate(ctx context.Context, log *logrus.Entry, d break } - value, foundAllVolFakeAttach := drive.Annotations[allVolumesFakeAttachedAnnotation] - fakeAttachDR := !drive.Spec.IsClean && foundAllVolFakeAttach && value == allVolumesFakeAttachedKey + value, foundAllDRVolFakeAttach := drive.Annotations[allDRVolumesFakeAttachedAnnotation] + fakeAttachDR := !drive.Spec.IsClean && foundAllDRVolFakeAttach && value == allDRVolumesFakeAttachedKey if drive.Spec.IsClean || fakeAttachDR { log.Infof("Initiating automatic removal of drive: %s", drive.GetName()) } else { diff --git a/pkg/crcontrollers/drive/drivecontroller_test.go b/pkg/crcontrollers/drive/drivecontroller_test.go index 97c01d442..f646cc621 100644 --- a/pkg/crcontrollers/drive/drivecontroller_test.go +++ b/pkg/crcontrollers/drive/drivecontroller_test.go @@ -468,7 +468,7 @@ func TestDriveController_handleDriveUpdate(t *testing.T) { assert.NotNil(t, expectedD) expectedD.Spec.Usage = apiV1.DriveUsageReleased expectedD.Spec.IsClean = false - expectedD.Annotations = map[string]string{allVolumesFakeAttachedAnnotation: allVolumesFakeAttachedKey} + expectedD.Annotations = map[string]string{allDRVolumesFakeAttachedAnnotation: allDRVolumesFakeAttachedKey} assert.Nil(t, dc.client.CreateCR(testCtx, expectedD.Name, expectedD)) expectedV := failedVolCR.DeepCopy() @@ -880,19 +880,19 @@ func TestDriveController_handleDriveUsageRemovingWithLED(t *testing.T) { expectedLedState string }{ { - name: "Check locate drive state - Success", - client: &mocks.MockDriveMgrClient{}, - currentUsage: apiV1.DriveUsageRemoving, - currentLedState: fmt.Sprintf("%d", apiV1.LocateStatusOff), - expectedUsage: apiV1.DriveUsageRemoved, + name: "Check locate drive state - Success", + client: &mocks.MockDriveMgrClient{}, + currentUsage: apiV1.DriveUsageRemoving, + currentLedState: fmt.Sprintf("%d", apiV1.LocateStatusOff), + expectedUsage: apiV1.DriveUsageRemoved, expectedLedState: fmt.Sprintf("%d", apiV1.LocateStatusOn), }, { - name: "Check locate drive state - Locate failure", - client: &mocks.MockDriveMgrClientFail{}, - currentUsage: apiV1.DriveUsageRemoving, - currentLedState: fmt.Sprintf("%d", apiV1.LocateStatusOff), - expectedUsage: apiV1.DriveUsageFailed, + name: "Check locate drive state - Locate failure", + client: &mocks.MockDriveMgrClientFail{}, + currentUsage: apiV1.DriveUsageRemoving, + currentLedState: fmt.Sprintf("%d", apiV1.LocateStatusOff), + expectedUsage: apiV1.DriveUsageFailed, expectedLedState: fmt.Sprintf("%d", apiV1.LocateStatusOff), }, } diff --git a/pkg/node/node.go b/pkg/node/node.go index c5fb10258..3c268bbd8 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -60,8 +60,8 @@ const ( fakeAttachVolumeAnnotation = "fake-attach" fakeAttachVolumeKey = "yes" - allVolumesFakeAttachedAnnotation = "all-volumes-fake-attached" - allVolumesFakeAttachedKey = "yes" + allDRVolumesFakeAttachedAnnotation = "all-dr-volumes-fake-attached" + allDRVolumesFakeAttachedKey = "yes" fakeDeviceVolumeAnnotation = "fake-device" fakeDeviceSrcFileDir = "/var/lib/kubelet/plugins/kubernetes.io/csi/volumeDevices/fake/" @@ -200,17 +200,23 @@ func (s *CSINodeService) processFakeAttachInNodeStageVolume( return nil } -// annotateIfAllVolsFakeAttached checks if all volumes associated with a drive used by a given volume are fake attached. +// annotateIfAllDRVolsFakeAttached checks if all volumes associated with a drive (under DR) used by a given volume are fake attached. // // The function takes a context.Context object and a pointer to a volumecrd.Volume object as parameters. // It returns an error if there is an error during the process. -func (s *CSINodeService) annotateIfAllVolsFakeAttached(ctx context.Context, volumeCR *volumecrd.Volume) error { +func (s *CSINodeService) annotateIfAllDRVolsFakeAttached(ctx context.Context, volumeCR *volumecrd.Volume) error { driveCR, err := s.crHelper.GetDriveCRByVolume(volumeCR) if err != nil { s.log.Errorf("failed to get driveCR by volumeCR %s: %v", volumeCR.Name, err) return err } + isDR := driveCR.Spec.Status == apiV1.DriveStatusOnline + if !isDR { + s.log.Debugf("drive %s is not online, no need to annotate", driveCR.Spec.GetUUID()) + return nil + } + volumes, err := s.crHelper.GetVolumesByLocation(ctx, driveCR.Spec.GetUUID()) if err != nil { s.log.Errorf("failed to list volumes by location %s: %v", driveCR.Spec.GetUUID(), err) @@ -228,7 +234,7 @@ func (s *CSINodeService) annotateIfAllVolsFakeAttached(ctx context.Context, volu driveCR.Annotations = make(map[string]string) } - driveCR.Annotations[allVolumesFakeAttachedAnnotation] = allVolumesFakeAttachedKey + driveCR.Annotations[allDRVolumesFakeAttachedAnnotation] = allDRVolumesFakeAttachedKey if err = s.k8sClient.UpdateCR(ctx, driveCR); err != nil { s.log.Errorf("failed to update driveCR %s: %v", driveCR.Name, err) return err @@ -246,7 +252,7 @@ func (s *CSINodeService) annotateIfAllVolsFakeAttached(ctx context.Context, volu // // Returns: // - bool: a boolean indicating whether fake attach annotation is present, it's always true when fake attach is needed in DR. -// - bool: a boolean indicating whether fake attach is needed in DR. +// - bool: a boolean indicating whether fake attach is needed in DR or all volumes related to drive were fake attached before reboot. func (s *CSINodeService) isFakeAttachNeed(volumeCR *volumecrd.Volume) (bool, bool) { fakeAttachBasic, fakeAttachDR := false, false @@ -259,7 +265,10 @@ func (s *CSINodeService) isFakeAttachNeed(volumeCR *volumecrd.Volume) (bool, boo if value, ok := pvc.Annotations[fakeAttachAnnotation]; ok && value == fakeAttachAllowKey { fakeAttachBasic = true if driveCR, err := s.crHelper.GetDriveCRByVolume(volumeCR); err == nil { - fakeAttachDR = driveCR.Spec.Usage == apiV1.DriveUsageReleased && driveCR.Spec.Status == apiV1.DriveStatusOnline + value, found := driveCR.Annotations[allDRVolumesFakeAttachedAnnotation] + // it's required to keep fake-attach during node reboot + allVolsPreviouslyFakeAttached := found && value == allDRVolumesFakeAttachedKey + fakeAttachDR = (driveCR.Spec.Usage == apiV1.DriveUsageReleased && driveCR.Spec.Status == apiV1.DriveStatusOnline) || allVolsPreviouslyFakeAttached } } @@ -603,7 +612,7 @@ func (s *CSINodeService) NodePublishVolume(ctx context.Context, req *csi.NodePub resp, errToReturn = nil, fmt.Errorf("failed to publish volume: update volume CR error") } - if err = s.annotateIfAllVolsFakeAttached(ctx, volumeCR); err != nil { + if err = s.annotateIfAllDRVolsFakeAttached(ctx, volumeCR); err != nil { ll.Errorf("Failed to annotate drive CR, error: %v", err) resp, errToReturn = nil, fmt.Errorf("failed to publish volume: annotate drive CR error, %s", err.Error()) } diff --git a/pkg/node/node_test.go b/pkg/node/node_test.go index 3e041e95e..3b3408b6a 100644 --- a/pkg/node/node_test.go +++ b/pkg/node/node_test.go @@ -741,7 +741,54 @@ var _ = Describe("CSINodeService Fake-Attach", func() { Expect(vol1.Annotations[fakeAttachVolumeAnnotation]).To(Equal(fakeAttachVolumeKey)) Expect(vol1.Annotations[fakeDeviceVolumeAnnotation]).To(Equal(expectedFakeDevice)) }) + It("Should stage healthy block-mode volume successfully after node reboot (DR)", func() { + req := getNodeStageRequest(testVolume1.Id, *testVolumeCap) + drive1 := &drivecrd.Drive{} + drive1.Name = "drive1" + drive1.Spec.Usage = apiV1.DriveUsageRemoved + drive1.Spec.Status = apiV1.DriveStatusOnline + drive1.Annotations = map[string]string{allDRVolumesFakeAttachedAnnotation: allDRVolumesFakeAttachedKey} + err := node.k8sClient.CreateCR(testCtx, drive1.Name, drive1) + Expect(err).To(BeNil()) + + vol1 := &vcrd.Volume{} + err = node.k8sClient.ReadCR(testCtx, testVolume1.Id, testNs, vol1) + Expect(err).To(BeNil()) + vol1.Spec.CSIStatus = apiV1.Created + vol1.Spec.Mode = apiV1.ModeRAWPART + vol1.Spec.LocationType = apiV1.LocationTypeDrive + vol1.Spec.Location = drive1.Name + err = node.k8sClient.UpdateCR(testCtx, vol1) + Expect(err).To(BeNil()) + + createPVAndPVCForFakeAttach(vol1.Name) + partitionPath := "/partition/path/for/volume1" + prov.On("GetVolumePath", &vol1.Spec).Return(partitionPath, nil) + fsOps.On("PrepareAndPerformMount", + partitionPath, path.Join(req.GetStagingTargetPath(), stagingFileName), true, false). + Return(nil).Once() + + expectedFakeDevice := "/dev/loop2" + fakeDeviceSrcFile := fakeDeviceSrcFileDir + vol1.Name + + // The case that create fake device successfully + fsOps.On("CreateFakeDevice", fakeDeviceSrcFile). + Return(expectedFakeDevice, nil).Once() + fsOps.On("PrepareAndPerformMount", + expectedFakeDevice, path.Join(req.GetStagingTargetPath(), stagingFileName), true, false). + Return(nil).Once() + + resp, err := node.NodeStageVolume(testCtx, req) + Expect(resp).NotTo(BeNil()) + Expect(err).To(BeNil()) + + err = node.k8sClient.ReadCR(testCtx, testV1ID, "", vol1) + Expect(err).To(BeNil()) + Expect(vol1.Spec.CSIStatus).To(Equal(apiV1.VolumeReady)) + Expect(vol1.Annotations[fakeAttachVolumeAnnotation]).To(Equal(fakeAttachVolumeKey)) + Expect(vol1.Annotations[fakeDeviceVolumeAnnotation]).To(Equal(expectedFakeDevice)) + }) It("Should stage unhealthy block-mode volume successfully", func() { req := getNodeStageRequest(testVolume1.Id, *testVolumeCap) vol1 := &vcrd.Volume{} @@ -1001,7 +1048,7 @@ var _ = Describe("CSINodeService Fake-Attach", func() { driveCR := &drivecrd.Drive{} err = node.k8sClient.ReadCR(testCtx, drive1.Name, "", driveCR) Expect(err).To(BeNil()) - Expect(driveCR.Annotations[allVolumesFakeAttachedAnnotation]).To(Equal(allVolumesFakeAttachedKey)) + Expect(driveCR.Annotations[allDRVolumesFakeAttachedAnnotation]).To(Equal(allDRVolumesFakeAttachedKey)) }) It("Should publish unhealthy fs-mode volume with fake-attach annotation and annotate drive (Drive)", func() { req := getNodePublishRequest(testV1ID, targetPath, *testVolumeCap) @@ -1037,7 +1084,44 @@ var _ = Describe("CSINodeService Fake-Attach", func() { driveCR := &drivecrd.Drive{} err = node.k8sClient.ReadCR(testCtx, drive1.Name, "", driveCR) Expect(err).To(BeNil()) - Expect(driveCR.Annotations[allVolumesFakeAttachedAnnotation]).To(Equal(allVolumesFakeAttachedKey)) + Expect(driveCR.Annotations[allDRVolumesFakeAttachedAnnotation]).To(Equal(allDRVolumesFakeAttachedKey)) + }) + It("Should publish unhealthy fs-mode volume with fake-attach annotation and skip annotating an offline drive (Drive)", func() { + req := getNodePublishRequest(testV1ID, targetPath, *testVolumeCap) + req.VolumeContext[util.PodNameKey] = testPod1Name + + drive1 := &drivecrd.Drive{} + drive1.Name = "drive-hdd-1" + drive1.Spec.Usage = apiV1.DriveUsageReleased + drive1.Spec.Status = apiV1.DriveStatusOffline + drive1.Spec.UUID = drive1.Name + err := node.k8sClient.CreateCR(testCtx, drive1.Name, drive1) + Expect(err).To(BeNil()) + + vol1 := &vcrd.Volume{} + err = node.k8sClient.ReadCR(testCtx, testVolume1.Id, testNs, vol1) + Expect(err).To(BeNil()) + vol1.Spec.CSIStatus = apiV1.Published + vol1.Spec.LocationType = apiV1.LocationTypeDrive + vol1.Spec.Location = drive1.Name + vol1.Annotations = map[string]string{fakeAttachVolumeAnnotation: fakeAttachVolumeKey} + vol1.Spec.Mode = apiV1.ModeFS + err = node.k8sClient.UpdateCR(testCtx, vol1) + Expect(err).To(BeNil()) + + fsOps.On("MountFakeTmpfs", + testV1ID, req.GetTargetPath()). + Return(nil) + + resp, err := node.NodePublishVolume(testCtx, req) + Expect(resp).NotTo(BeNil()) + Expect(err).To(BeNil()) + + driveCR := &drivecrd.Drive{} + err = node.k8sClient.ReadCR(testCtx, drive1.Name, "", driveCR) + Expect(err).To(BeNil()) + _, foundAllDRVolsAnnotation := driveCR.Annotations[allDRVolumesFakeAttachedAnnotation] + Expect(foundAllDRVolsAnnotation).To(BeFalse()) }) It("Should stage healthy block-mode volume with fake-attach and valid fake-device annotation", func() { req := getNodeStageRequest(testVolume1.Id, *testVolumeCap)