From 8085a0a2b61703f6646f68684c31e6cc1c8f815f Mon Sep 17 00:00:00 2001 From: CraneShiEMC <64512450+CraneShiEMC@users.noreply.github.com> Date: Mon, 31 Jul 2023 04:17:02 +0800 Subject: [PATCH] [ISSUE-1018] Refine Storage Group Feature (#1031) * add StorageGroupStatus Signed-off-by: Shi, Crane * support StorageGroupStatus in current workflows Signed-off-by: Shi, Crane * refine log Signed-off-by: Shi, Crane * refine error handling Signed-off-by: Shi, Crane * trigger storage group resync if applicable in drive removal Signed-off-by: Shi, Crane * A drive whose Usage is REMOVED will not be selected in any storage group and its existing sg label takes no effect Signed-off-by: Shi, Crane * sg feature will not apply to drive physically removed Signed-off-by: Shi, Crane * handle the drive removal case of drive with manual sg label Signed-off-by: Shi, Crane * fix go lint error Signed-off-by: Shi, Crane * add UT case for drive-removal-triggered sg sync Signed-off-by: Shi, Crane * improve UT coverage Signed-off-by: Shi, Crane * refine sg annotation for drive removal Signed-off-by: Shi, Crane * handle case of invalid sg for drive removal Signed-off-by: Shi, Crane * also exclude removing sg for trigger sg resync in drive removal Signed-off-by: Shi, Crane * refine sg removal status handling Signed-off-by: Shi, Crane * Revert "refine error handling" This reverts commit 06607e73e916c0c350fafa9298bbf72558dea810. Signed-off-by: Shi, Crane * refine log and some code logic Signed-off-by: Shi, Crane * try to add immutability validation rule to storagegroup spec Signed-off-by: Shi, Crane * upgrade controller-gen version to v0.9.2 Signed-off-by: Shi, Crane * add storagegroupcontroller UT initial Signed-off-by: Shi, Crane * Revert "add storagegroupcontroller UT initial" This reverts commit 1ea8660af8d6ce679b988026741e47b91dba55e9. Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * fix Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * refactor and add UT of storagegroupcontroller Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * fix storagegroupcontroller UT Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * refine the logic of sg deletion Signed-off-by: Shi, Crane * refine Signed-off-by: Shi, Crane * fix bug Signed-off-by: Shi, Crane * fix go-lint err Signed-off-by: Shi, Crane * fix go-lint error Signed-off-by: Shi, Crane * add drive IsClean support, decrease k8s api call, remove manual sg labeling support Signed-off-by: Shi, Crane * fix Signed-off-by: Shi, Crane * fix Signed-off-by: Shi, Crane * fix UT Signed-off-by: Shi, Crane * refine corner case handling Signed-off-by: Shi, Crane * fix Signed-off-by: Shi, Crane * refine and add UT to storagegroupcontroller Signed-off-by: Shi, Crane * refine storagegroupcontroller and add UT Signed-off-by: Shi, Crane * make controller svc's k8scache also sync sg and lvg objs' Signed-off-by: Shi, Crane * use k8s cache, re-support sg label manual change and refine in sg ctrl Signed-off-by: Shi, Crane * fix lint err Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * add storagegroupcontroller UT Signed-off-by: Shi, Crane * storagegroup controller will not reconcile on drive delete event Signed-off-by: Shi, Crane * not support Health, Status, Usage and IsClean as DriveSelector's MatchFields Signed-off-by: Shi, Crane * in storagegroupcontroller's reconcile, only sync drive when reqName is uuid Signed-off-by: Shi, Crane * refine the logic to avoid nil pointer error Signed-off-by: Shi, Crane * revert the usage of k8scache Signed-off-by: Shi, Crane * add custom storage group proposal draft Signed-off-by: Shi, Crane * refine custom storage group proposal Signed-off-by: Shi, Crane --------- Signed-off-by: Shi, Crane --- api/generated/v1/types.pb.go | 163 ++-- api/v1/constants.go | 10 + api/v1/storagegroupcrd/storagegroup_types.go | 13 +- api/v1/types.proto | 4 + docs/proposals/custom-storage-group.md | 30 + pkg/crcontrollers/drive/drivecontroller.go | 39 + .../drive/drivecontroller_test.go | 105 +++ .../storagegroup/storagegroupcontroller.go | 480 +++++++----- .../storagegroupcontroller_test.go | 714 ++++++++++++++++++ variables.mk | 4 +- 10 files changed, 1292 insertions(+), 270 deletions(-) create mode 100644 docs/proposals/custom-storage-group.md create mode 100644 pkg/crcontrollers/storagegroup/storagegroupcontroller_test.go diff --git a/api/generated/v1/types.pb.go b/api/generated/v1/types.pb.go index 4ebaa5bf7..8065b782a 100644 --- a/api/generated/v1/types.pb.go +++ b/api/generated/v1/types.pb.go @@ -889,6 +889,45 @@ func (m *DriveSelector) GetMatchFields() map[string]string { return nil } +type StorageGroupStatus struct { + Phase string `protobuf:"bytes,1,opt,name=phase,proto3" json:"phase,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *StorageGroupStatus) Reset() { *m = StorageGroupStatus{} } +func (m *StorageGroupStatus) String() string { return proto.CompactTextString(m) } +func (*StorageGroupStatus) ProtoMessage() {} +func (*StorageGroupStatus) Descriptor() ([]byte, []int) { + return fileDescriptor_d938547f84707355, []int{11} +} + +func (m *StorageGroupStatus) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_StorageGroupStatus.Unmarshal(m, b) +} +func (m *StorageGroupStatus) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_StorageGroupStatus.Marshal(b, m, deterministic) +} +func (m *StorageGroupStatus) XXX_Merge(src proto.Message) { + xxx_messageInfo_StorageGroupStatus.Merge(m, src) +} +func (m *StorageGroupStatus) XXX_Size() int { + return xxx_messageInfo_StorageGroupStatus.Size(m) +} +func (m *StorageGroupStatus) XXX_DiscardUnknown() { + xxx_messageInfo_StorageGroupStatus.DiscardUnknown(m) +} + +var xxx_messageInfo_StorageGroupStatus proto.InternalMessageInfo + +func (m *StorageGroupStatus) GetPhase() string { + if m != nil { + return m.Phase + } + return "" +} + func init() { proto.RegisterType((*Drive)(nil), "v1api.Drive") proto.RegisterType((*Volume)(nil), "v1api.Volume") @@ -903,70 +942,72 @@ func init() { proto.RegisterType((*StorageGroupSpec)(nil), "v1api.StorageGroupSpec") proto.RegisterType((*DriveSelector)(nil), "v1api.DriveSelector") proto.RegisterMapType((map[string]string)(nil), "v1api.DriveSelector.MatchFieldsEntry") + proto.RegisterType((*StorageGroupStatus)(nil), "v1api.StorageGroupStatus") } func init() { proto.RegisterFile("types.proto", fileDescriptor_d938547f84707355) } var fileDescriptor_d938547f84707355 = []byte{ - // 954 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x56, 0xcd, 0x6e, 0xe4, 0x44, - 0x10, 0x96, 0xe7, 0x2f, 0x99, 0x9a, 0x24, 0x9b, 0x74, 0x42, 0xd4, 0x44, 0x11, 0x8a, 0x2c, 0x81, - 0x72, 0x58, 0x8d, 0x20, 0x1c, 0x58, 0xad, 0x10, 0x62, 0x93, 0xc9, 0xee, 0x5a, 0xec, 0x66, 0x23, - 0x0f, 0xd9, 0x03, 0xb7, 0x8e, 0x5d, 0x24, 0x16, 0x9e, 0xb1, 0xe9, 0xf6, 0xcc, 0xca, 0x7b, 0x41, - 0xbc, 0x02, 0xcf, 0xc0, 0x73, 0xf0, 0x00, 0x5c, 0xb9, 0xf1, 0x34, 0xa8, 0xba, 0x7b, 0xec, 0xf6, - 0x8c, 0x2f, 0xdc, 0xaa, 0xbe, 0xaa, 0xea, 0x2a, 0x7f, 0xf5, 0xb9, 0x6d, 0x18, 0x15, 0x65, 0x8e, - 0x6a, 0x9c, 0xcb, 0xac, 0xc8, 0x58, 0x7f, 0xf9, 0x95, 0xc8, 0x13, 0xff, 0x9f, 0x1e, 0xf4, 0x27, - 0x32, 0x59, 0x22, 0x63, 0xd0, 0xbb, 0xbb, 0x0b, 0x26, 0xdc, 0x3b, 0xf3, 0xce, 0x87, 0xa1, 0xb6, - 0xd9, 0x3e, 0x74, 0xdf, 0x07, 0x13, 0xde, 0xd1, 0x10, 0x99, 0x84, 0xdc, 0x06, 0x13, 0xde, 0x35, - 0xc8, 0x6d, 0x30, 0x61, 0x3e, 0xec, 0x4c, 0x51, 0x26, 0x22, 0xbd, 0x59, 0xcc, 0xee, 0x51, 0xf2, - 0x9e, 0x0e, 0x35, 0x30, 0x76, 0x0c, 0x83, 0xd7, 0x28, 0xd2, 0xe2, 0x91, 0xf7, 0x75, 0xd4, 0x7a, - 0xd4, 0xf3, 0xc7, 0x32, 0x47, 0x3e, 0x30, 0x3d, 0xc9, 0x26, 0x6c, 0x9a, 0x7c, 0x44, 0xbe, 0x75, - 0xe6, 0x9d, 0x77, 0x43, 0x6d, 0x53, 0xfd, 0xb4, 0x10, 0xc5, 0x42, 0xf1, 0x6d, 0x53, 0x6f, 0x3c, - 0x76, 0x04, 0xfd, 0x3b, 0x25, 0x1e, 0x90, 0x0f, 0x35, 0x6c, 0x1c, 0xca, 0xbe, 0xc9, 0x62, 0x0c, - 0x62, 0x0e, 0x26, 0xdb, 0x78, 0x74, 0xf2, 0xad, 0x28, 0x1e, 0xf9, 0xc8, 0x74, 0x23, 0x9b, 0x9d, - 0xc2, 0xf0, 0x7a, 0x1e, 0xa5, 0x99, 0x5a, 0x48, 0xe4, 0x3b, 0x3a, 0x50, 0x03, 0x7a, 0x96, 0x34, - 0x2b, 0xf8, 0xae, 0xa9, 0x20, 0x9b, 0x18, 0xb8, 0x14, 0x25, 0xdf, 0x33, 0x0c, 0x5c, 0x8a, 0x92, - 0x9d, 0xc0, 0xf6, 0xcb, 0x44, 0xce, 0x3e, 0x08, 0x89, 0xfc, 0x89, 0x86, 0x2b, 0xdf, 0x9c, 0x1f, - 0x2f, 0xa4, 0x98, 0x47, 0xc8, 0xf7, 0xf5, 0x23, 0xd5, 0x00, 0x55, 0xbe, 0xb9, 0x9e, 0xd0, 0xc3, - 0x20, 0x3f, 0x30, 0x95, 0x2b, 0x9f, 0x62, 0x81, 0x9a, 0x96, 0xaa, 0xc0, 0x19, 0x67, 0x67, 0xde, - 0xf9, 0x76, 0x58, 0xf9, 0x8c, 0xc3, 0x56, 0xa0, 0xae, 0x52, 0x14, 0x73, 0x7e, 0xa8, 0x43, 0x2b, - 0x97, 0x7d, 0x01, 0x7b, 0x53, 0x8c, 0x16, 0x32, 0x29, 0x4a, 0xcb, 0xd8, 0x91, 0x3e, 0x77, 0x0d, - 0x65, 0x4f, 0xe1, 0xe0, 0x7a, 0x1e, 0xc9, 0x32, 0x2f, 0x92, 0x6c, 0x7e, 0x25, 0x72, 0x71, 0x9f, - 0x22, 0xff, 0x44, 0xa7, 0x6e, 0x06, 0xd8, 0x18, 0x58, 0x0d, 0xde, 0x92, 0x7e, 0xa2, 0x2c, 0xe5, - 0xc7, 0x3a, 0xbd, 0x25, 0xe2, 0xff, 0xd9, 0x85, 0xc1, 0xfb, 0x2c, 0x5d, 0xcc, 0x90, 0xed, 0x41, - 0x27, 0x88, 0xad, 0xa8, 0x3a, 0x41, 0xac, 0x1f, 0x39, 0x8b, 0x04, 0xa5, 0x5b, 0x5d, 0x55, 0x3e, - 0x49, 0x69, 0x65, 0x6b, 0x59, 0x18, 0x95, 0x35, 0x30, 0x2d, 0xb7, 0x22, 0x93, 0xe2, 0x01, 0xaf, - 0x52, 0xa1, 0x54, 0x25, 0x37, 0x07, 0x73, 0x04, 0xd0, 0x6f, 0x08, 0xe0, 0x18, 0x06, 0xef, 0x3e, - 0xcc, 0x51, 0x2a, 0x3e, 0x38, 0xeb, 0x12, 0x6e, 0xbc, 0x56, 0xc9, 0x31, 0xe8, 0xbd, 0xcd, 0x62, - 0xb4, 0x82, 0xd3, 0x76, 0x25, 0xd7, 0xa1, 0x23, 0xd7, 0x5a, 0xda, 0xd0, 0x90, 0xf6, 0x53, 0x38, - 0x78, 0x97, 0xa3, 0xd4, 0x83, 0x8b, 0xd4, 0xee, 0xc2, 0x28, 0x6f, 0x33, 0x40, 0x32, 0xb9, 0x9a, - 0x06, 0x36, 0xcb, 0xca, 0xb0, 0x02, 0x6a, 0x99, 0xef, 0xba, 0x32, 0x27, 0x69, 0xe5, 0x8f, 0x38, - 0x43, 0x29, 0x52, 0x2d, 0xc7, 0xed, 0xb0, 0x06, 0x1c, 0x9e, 0x5e, 0xc9, 0x6c, 0x91, 0x5b, 0x61, - 0x36, 0x30, 0xff, 0x37, 0x38, 0x78, 0xb1, 0x14, 0x49, 0x4a, 0x3b, 0xa6, 0x55, 0x47, 0x49, 0x51, - 0x36, 0x16, 0xe4, 0xad, 0x2d, 0xa8, 0x26, 0xb6, 0xd3, 0x20, 0xd6, 0x87, 0x1d, 0xe5, 0x2e, 0xc5, - 0x2e, 0xce, 0xc5, 0x2a, 0x92, 0x7b, 0x35, 0xc9, 0xfe, 0xbf, 0x1e, 0x9c, 0x6e, 0x4c, 0x10, 0xa2, - 0x42, 0xb9, 0x34, 0x0d, 0x4f, 0x61, 0x78, 0x23, 0x66, 0xa8, 0x72, 0x11, 0xa1, 0x9d, 0xa6, 0x06, - 0x9c, 0x6b, 0xa1, 0xd3, 0xb8, 0x16, 0xbe, 0x81, 0x1d, 0x1a, 0x2c, 0xc4, 0x5f, 0x17, 0xa8, 0x0a, - 0x33, 0xce, 0xe8, 0xe2, 0x70, 0xac, 0xaf, 0xbc, 0xb1, 0x1b, 0x0a, 0x1b, 0x89, 0xec, 0x07, 0x38, - 0x74, 0xba, 0x57, 0xf5, 0xbd, 0xb3, 0xee, 0xf9, 0xe8, 0xe2, 0x53, 0x5b, 0xbf, 0x99, 0x11, 0xb6, - 0x55, 0xf9, 0xaf, 0x9b, 0x53, 0xd0, 0xb3, 0x58, 0x1b, 0xe9, 0x85, 0x20, 0x01, 0xd6, 0x00, 0xd1, - 0x6e, 0x0e, 0x41, 0x22, 0x97, 0x82, 0x95, 0xef, 0x7f, 0x04, 0xb6, 0xd9, 0x80, 0x7d, 0x0f, 0x4f, - 0x6a, 0xca, 0x34, 0xa4, 0x19, 0x1a, 0x5d, 0x1c, 0xdb, 0x41, 0xd7, 0xa2, 0xe1, 0x7a, 0x3a, 0xad, - 0xcd, 0x39, 0x57, 0xd9, 0xbe, 0x0d, 0xcc, 0xff, 0xdd, 0xdb, 0x68, 0x43, 0xab, 0xa4, 0x25, 0xac, - 0x3e, 0x15, 0x64, 0x6f, 0xbc, 0x97, 0x9d, 0x96, 0xf7, 0x72, 0x25, 0x81, 0xae, 0xf3, 0x9e, 0xad, - 0xeb, 0xb4, 0xd7, 0xa2, 0xd3, 0xbf, 0x3c, 0x60, 0x6f, 0xb2, 0x87, 0x24, 0x12, 0xa9, 0xb9, 0x55, - 0x34, 0xdc, 0x3a, 0x06, 0x61, 0xf4, 0xda, 0x76, 0x2c, 0x46, 0xaf, 0xed, 0x29, 0x0c, 0x57, 0x0a, - 0x26, 0x2d, 0x68, 0xe2, 0x2b, 0xa0, 0x4d, 0x97, 0xec, 0x33, 0x00, 0xd3, 0x28, 0xc4, 0x9f, 0x15, - 0xef, 0xeb, 0x12, 0x07, 0x71, 0x84, 0x37, 0x68, 0x08, 0xaf, 0xbe, 0x0c, 0xb6, 0xdc, 0xcb, 0xc0, - 0xff, 0xc3, 0x33, 0x63, 0xb5, 0x7e, 0x64, 0x9f, 0xc1, 0xf0, 0x45, 0x1c, 0x4b, 0x54, 0x0a, 0xcd, - 0x0a, 0x46, 0x17, 0x27, 0x8e, 0x54, 0xc7, 0x55, 0xf0, 0x7a, 0x5e, 0xc8, 0x32, 0xac, 0x93, 0x4f, - 0xbe, 0x85, 0xbd, 0x66, 0x90, 0x3e, 0x4e, 0xbf, 0x60, 0x69, 0x8f, 0x27, 0x93, 0xee, 0x8e, 0xa5, - 0x48, 0x17, 0x2b, 0x46, 0x8c, 0xf3, 0xbc, 0xf3, 0xcc, 0xf3, 0x6f, 0x60, 0xdf, 0x65, 0x79, 0x9a, - 0x63, 0xc4, 0x9e, 0xc3, 0x6e, 0x4c, 0x7f, 0x03, 0x53, 0x4c, 0x31, 0x2a, 0x32, 0x69, 0x15, 0x75, - 0x64, 0xe7, 0x99, 0xb8, 0xb1, 0xb0, 0x99, 0xea, 0xff, 0xed, 0xc1, 0x6e, 0x23, 0x81, 0x7d, 0x09, - 0x87, 0x73, 0xfd, 0x03, 0xa0, 0x61, 0x75, 0x8b, 0x52, 0xef, 0x86, 0xce, 0xec, 0x87, 0x6d, 0x21, - 0xf6, 0x0a, 0x46, 0x33, 0x51, 0x44, 0x8f, 0x2f, 0x13, 0x4c, 0xe3, 0x15, 0x1b, 0x9f, 0xb7, 0x75, - 0x1f, 0xbf, 0xad, 0xf3, 0x0c, 0x31, 0x6e, 0xe5, 0xc9, 0x77, 0xb0, 0xbf, 0x9e, 0xf0, 0x7f, 0xc8, - 0xb9, 0xdc, 0xfa, 0xc9, 0xfc, 0x20, 0xdd, 0x0f, 0xf4, 0xef, 0xd2, 0xd7, 0xff, 0x05, 0x00, 0x00, - 0xff, 0xff, 0xbd, 0x19, 0x44, 0x54, 0x3d, 0x09, 0x00, 0x00, + // 968 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x56, 0xcd, 0x6e, 0xdb, 0x46, + 0x10, 0x06, 0xf5, 0x67, 0x6b, 0x64, 0x3b, 0xf6, 0xda, 0x35, 0xb6, 0x86, 0x51, 0x08, 0x04, 0x5a, + 0x18, 0x45, 0x20, 0xb4, 0xee, 0xa1, 0x41, 0x50, 0x14, 0x8d, 0x2d, 0x27, 0x21, 0x9a, 0x38, 0x02, + 0x55, 0xe7, 0xd0, 0xdb, 0x9a, 0x9c, 0x5a, 0x44, 0x29, 0x91, 0xdd, 0xa5, 0x14, 0xd0, 0x97, 0xa2, + 0xaf, 0xd0, 0x67, 0xe8, 0x73, 0xf4, 0x01, 0x7a, 0xed, 0xad, 0x4f, 0x53, 0xcc, 0x2e, 0x45, 0x2e, + 0x25, 0x5e, 0x72, 0x9b, 0xf9, 0x66, 0x66, 0x67, 0xf8, 0xcd, 0xc7, 0x25, 0x61, 0x90, 0xe5, 0x29, + 0xaa, 0x51, 0x2a, 0x93, 0x2c, 0x61, 0xdd, 0xd5, 0xd7, 0x22, 0x8d, 0xdc, 0x7f, 0x3b, 0xd0, 0x1d, + 0xcb, 0x68, 0x85, 0x8c, 0x41, 0xe7, 0xee, 0xce, 0x1b, 0x73, 0x67, 0xe8, 0x5c, 0xf4, 0x7d, 0x6d, + 0xb3, 0x43, 0x68, 0xbf, 0xf7, 0xc6, 0xbc, 0xa5, 0x21, 0x32, 0x09, 0x99, 0x78, 0x63, 0xde, 0x36, + 0xc8, 0xc4, 0x1b, 0x33, 0x17, 0xf6, 0xa6, 0x28, 0x23, 0x11, 0xdf, 0x2e, 0xe7, 0xf7, 0x28, 0x79, + 0x47, 0x87, 0x6a, 0x18, 0x3b, 0x85, 0xde, 0x6b, 0x14, 0x71, 0x36, 0xe3, 0x5d, 0x1d, 0x2d, 0x3c, + 0xea, 0xf9, 0x53, 0x9e, 0x22, 0xef, 0x99, 0x9e, 0x64, 0x13, 0x36, 0x8d, 0x1e, 0x91, 0xef, 0x0c, + 0x9d, 0x8b, 0xb6, 0xaf, 0x6d, 0xaa, 0x9f, 0x66, 0x22, 0x5b, 0x2a, 0xbe, 0x6b, 0xea, 0x8d, 0xc7, + 0x4e, 0xa0, 0x7b, 0xa7, 0xc4, 0x03, 0xf2, 0xbe, 0x86, 0x8d, 0x43, 0xd9, 0xb7, 0x49, 0x88, 0x5e, + 0xc8, 0xc1, 0x64, 0x1b, 0x8f, 0x4e, 0x9e, 0x88, 0x6c, 0xc6, 0x07, 0xa6, 0x1b, 0xd9, 0xec, 0x1c, + 0xfa, 0x37, 0x8b, 0x20, 0x4e, 0xd4, 0x52, 0x22, 0xdf, 0xd3, 0x81, 0x0a, 0xd0, 0xb3, 0xc4, 0x49, + 0xc6, 0xf7, 0x4d, 0x05, 0xd9, 0xc4, 0xc0, 0x95, 0xc8, 0xf9, 0x81, 0x61, 0xe0, 0x4a, 0xe4, 0xec, + 0x0c, 0x76, 0x5f, 0x46, 0x72, 0xfe, 0x41, 0x48, 0xe4, 0x4f, 0x34, 0x5c, 0xfa, 0xe6, 0xfc, 0x70, + 0x29, 0xc5, 0x22, 0x40, 0x7e, 0xa8, 0x1f, 0xa9, 0x02, 0xa8, 0xf2, 0xcd, 0xcd, 0x98, 0x1e, 0x06, + 0xf9, 0x91, 0xa9, 0x5c, 0xfb, 0x14, 0xf3, 0xd4, 0x34, 0x57, 0x19, 0xce, 0x39, 0x1b, 0x3a, 0x17, + 0xbb, 0x7e, 0xe9, 0x33, 0x0e, 0x3b, 0x9e, 0xba, 0x8e, 0x51, 0x2c, 0xf8, 0xb1, 0x0e, 0xad, 0x5d, + 0xf6, 0x05, 0x1c, 0x4c, 0x31, 0x58, 0xca, 0x28, 0xcb, 0x0b, 0xc6, 0x4e, 0xf4, 0xb9, 0x1b, 0x28, + 0x7b, 0x0a, 0x47, 0x37, 0x8b, 0x40, 0xe6, 0x69, 0x16, 0x25, 0x8b, 0x6b, 0x91, 0x8a, 0xfb, 0x18, + 0xf9, 0x27, 0x3a, 0x75, 0x3b, 0xc0, 0x46, 0xc0, 0x2a, 0x70, 0x42, 0xfa, 0x09, 0x92, 0x98, 0x9f, + 0xea, 0xf4, 0x86, 0x88, 0xfb, 0x57, 0x1b, 0x7a, 0xef, 0x93, 0x78, 0x39, 0x47, 0x76, 0x00, 0x2d, + 0x2f, 0x2c, 0x44, 0xd5, 0xf2, 0x42, 0xfd, 0xc8, 0x49, 0x20, 0x28, 0xbd, 0xd0, 0x55, 0xe9, 0x93, + 0x94, 0xd6, 0xb6, 0x96, 0x85, 0x51, 0x59, 0x0d, 0xd3, 0x72, 0xcb, 0x12, 0x29, 0x1e, 0xf0, 0x3a, + 0x16, 0x4a, 0x95, 0x72, 0xb3, 0x30, 0x4b, 0x00, 0xdd, 0x9a, 0x00, 0x4e, 0xa1, 0xf7, 0xee, 0xc3, + 0x02, 0xa5, 0xe2, 0xbd, 0x61, 0x9b, 0x70, 0xe3, 0x35, 0x4a, 0x8e, 0x41, 0xe7, 0x6d, 0x12, 0x62, + 0x21, 0x38, 0x6d, 0x97, 0x72, 0xed, 0x5b, 0x72, 0xad, 0xa4, 0x0d, 0x35, 0x69, 0x3f, 0x85, 0xa3, + 0x77, 0x29, 0x4a, 0x3d, 0xb8, 0x88, 0x8b, 0x5d, 0x18, 0xe5, 0x6d, 0x07, 0x48, 0x26, 0xd7, 0x53, + 0xaf, 0xc8, 0x2a, 0x64, 0x58, 0x02, 0x95, 0xcc, 0xf7, 0x6d, 0x99, 0x93, 0xb4, 0xd2, 0x19, 0xce, + 0x51, 0x8a, 0x58, 0xcb, 0x71, 0xd7, 0xaf, 0x00, 0x8b, 0xa7, 0x57, 0x32, 0x59, 0xa6, 0x85, 0x30, + 0x6b, 0x98, 0xfb, 0x3b, 0x1c, 0xbd, 0x58, 0x89, 0x28, 0xa6, 0x1d, 0xd3, 0xaa, 0x83, 0x28, 0xcb, + 0x6b, 0x0b, 0x72, 0x36, 0x16, 0x54, 0x11, 0xdb, 0xaa, 0x11, 0xeb, 0xc2, 0x9e, 0xb2, 0x97, 0x52, + 0x2c, 0xce, 0xc6, 0x4a, 0x92, 0x3b, 0x15, 0xc9, 0xee, 0x7f, 0x0e, 0x9c, 0x6f, 0x4d, 0xe0, 0xa3, + 0x42, 0xb9, 0x32, 0x0d, 0xcf, 0xa1, 0x7f, 0x2b, 0xe6, 0xa8, 0x52, 0x11, 0x60, 0x31, 0x4d, 0x05, + 0x58, 0xd7, 0x42, 0xab, 0x76, 0x2d, 0x7c, 0x0b, 0x7b, 0x34, 0x98, 0x8f, 0xbf, 0x2d, 0x51, 0x65, + 0x66, 0x9c, 0xc1, 0xe5, 0xf1, 0x48, 0x5f, 0x79, 0x23, 0x3b, 0xe4, 0xd7, 0x12, 0xd9, 0x8f, 0x70, + 0x6c, 0x75, 0x2f, 0xeb, 0x3b, 0xc3, 0xf6, 0xc5, 0xe0, 0xf2, 0xd3, 0xa2, 0x7e, 0x3b, 0xc3, 0x6f, + 0xaa, 0x72, 0x5f, 0xd7, 0xa7, 0xa0, 0x67, 0x29, 0x6c, 0xa4, 0x17, 0x82, 0x04, 0x58, 0x01, 0x44, + 0xbb, 0x39, 0x04, 0x89, 0x5c, 0x0a, 0x96, 0xbe, 0xfb, 0x08, 0x6c, 0xbb, 0x01, 0xfb, 0x01, 0x9e, + 0x54, 0x94, 0x69, 0x48, 0x33, 0x34, 0xb8, 0x3c, 0x2d, 0x06, 0xdd, 0x88, 0xfa, 0x9b, 0xe9, 0xb4, + 0x36, 0xeb, 0x5c, 0x55, 0xf4, 0xad, 0x61, 0xee, 0x1f, 0xce, 0x56, 0x1b, 0x5a, 0x25, 0x2d, 0x61, + 0xfd, 0xa9, 0x20, 0x7b, 0xeb, 0xbd, 0x6c, 0x35, 0xbc, 0x97, 0x6b, 0x09, 0xb4, 0xad, 0xf7, 0x6c, + 0x53, 0xa7, 0x9d, 0x06, 0x9d, 0xfe, 0xed, 0x00, 0x7b, 0x93, 0x3c, 0x44, 0x81, 0x88, 0xcd, 0xad, + 0xa2, 0xe1, 0xc6, 0x31, 0x08, 0xa3, 0xd7, 0xb6, 0x55, 0x60, 0xf4, 0xda, 0x9e, 0x43, 0x7f, 0xad, + 0x60, 0xd2, 0x82, 0x26, 0xbe, 0x04, 0x9a, 0x74, 0xc9, 0x3e, 0x03, 0x30, 0x8d, 0x7c, 0xfc, 0x45, + 0xf1, 0xae, 0x2e, 0xb1, 0x10, 0x4b, 0x78, 0xbd, 0x9a, 0xf0, 0xaa, 0xcb, 0x60, 0xc7, 0xbe, 0x0c, + 0xdc, 0x3f, 0x1d, 0x33, 0x56, 0xe3, 0x47, 0xf6, 0x19, 0xf4, 0x5f, 0x84, 0xa1, 0x44, 0xa5, 0xd0, + 0xac, 0x60, 0x70, 0x79, 0x66, 0x49, 0x75, 0x54, 0x06, 0x6f, 0x16, 0x99, 0xcc, 0xfd, 0x2a, 0xf9, + 0xec, 0x3b, 0x38, 0xa8, 0x07, 0xe9, 0xe3, 0xf4, 0x2b, 0xe6, 0xc5, 0xf1, 0x64, 0xd2, 0xdd, 0xb1, + 0x12, 0xf1, 0x72, 0xcd, 0x88, 0x71, 0x9e, 0xb7, 0x9e, 0x39, 0xee, 0x2d, 0x1c, 0xda, 0x2c, 0x4f, + 0x53, 0x0c, 0xd8, 0x73, 0xd8, 0x0f, 0xe9, 0x6f, 0x60, 0x8a, 0x31, 0x06, 0x59, 0x22, 0x0b, 0x45, + 0x9d, 0x14, 0xf3, 0x8c, 0xed, 0x98, 0x5f, 0x4f, 0x75, 0xff, 0x71, 0x60, 0xbf, 0x96, 0xc0, 0xbe, + 0x82, 0xe3, 0x85, 0xfe, 0x01, 0xd0, 0xb0, 0x9a, 0xa0, 0xd4, 0xbb, 0xa1, 0x33, 0xbb, 0x7e, 0x53, + 0x88, 0xbd, 0x82, 0xc1, 0x5c, 0x64, 0xc1, 0xec, 0x65, 0x84, 0x71, 0xb8, 0x66, 0xe3, 0xf3, 0xa6, + 0xee, 0xa3, 0xb7, 0x55, 0x9e, 0x21, 0xc6, 0xae, 0x3c, 0xfb, 0x1e, 0x0e, 0x37, 0x13, 0x3e, 0x8a, + 0x9c, 0x2f, 0x81, 0xd5, 0xc8, 0x29, 0x2f, 0xe2, 0x74, 0x26, 0xd4, 0x5a, 0x72, 0xc6, 0xb9, 0xda, + 0xf9, 0xd9, 0xfc, 0x4c, 0xdd, 0xf7, 0xf4, 0xaf, 0xd5, 0x37, 0xff, 0x07, 0x00, 0x00, 0xff, 0xff, + 0x9b, 0x5f, 0xec, 0x44, 0x69, 0x09, 0x00, 0x00, } diff --git a/api/v1/constants.go b/api/v1/constants.go index 13d2a0375..4c98bc414 100644 --- a/api/v1/constants.go +++ b/api/v1/constants.go @@ -141,4 +141,14 @@ const ( // CSI StorageGroup label key StorageGroupLabelKey = "drive.csi-baremetal.dell.com/storage-group" + + // CSI StorageGroup Status + StorageGroupPhaseSyncing = "SYNCING" + StorageGroupPhaseSynced = "SYNCED" + StorageGroupPhaseRemoving = "REMOVING" + StorageGroupPhaseInvalid = "INVALID" + + // CSI StorageGroup annotations + StorageGroupAnnotationDriveRemovalPrefix = "drive-removal" + StorageGroupAnnotationDriveRemovalDone = "done" ) diff --git a/api/v1/storagegroupcrd/storagegroup_types.go b/api/v1/storagegroupcrd/storagegroup_types.go index 779e0c935..58b356984 100644 --- a/api/v1/storagegroupcrd/storagegroup_types.go +++ b/api/v1/storagegroupcrd/storagegroup_types.go @@ -26,20 +26,21 @@ import ( // StorageGroup is the Schema for the StorageGroups API // +kubebuilder:resource:scope=Cluster,shortName={sg,sgs} // +kubebuilder:printcolumn:name="DRIVES_PER_NODE",type="string",JSONPath=".spec.driveSelector.numberDrivesPerNode",description="numberDrivesPerNode of StorageGroup's DriveSelector" -// +kubebuilder:printcolumn:name="TYPE",type="string",JSONPath=".spec.driveSelector.matchFields.Type",description="Drive Type of StorageGroup's DriveSelector" -// +kubebuilder:printcolumn:name="SLOT",type="string",JSONPath=".spec.driveSelector.matchFields.Slot",description="Drive Slot of StorageGroup's DriveSelector" -// +kubebuilder:printcolumn:name="PATH",type="string",JSONPath=".spec.driveSelector.matchFields.Path",description="Drive Path of StorageGroup's DriveSelector" -// +kubebuilder:printcolumn:name="SYSTEM",type="string",JSONPath=".spec.driveSelector.matchFields.IsSystem",description="Whether StorageGroup's DriveSelector to Select System Drive" +// +kubebuilder:printcolumn:name="DRIVE_FIELDS",type="string",JSONPath=".spec.driveSelector.matchFields",description="Match Fields of StorageGroup's DriveSelector to Select Drives on Field Values" +// +kubebuilder:printcolumn:name="STATUS",type="string",JSONPath=".status.phase",description="status of StorageGroup" type StorageGroup struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` - Spec api.StorageGroupSpec `json:"spec,omitempty"` + + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="updates to storagegroup spec are forbidden" + Spec api.StorageGroupSpec `json:"spec,omitempty"` + Status api.StorageGroupStatus `json:"status,omitempty"` } // +kubebuilder:object:root=true // StorageGroupList contains a list of StorageGroup -//+kubebuilder:object:generate=true +// +kubebuilder:object:generate=true type StorageGroupList struct { metav1.TypeMeta `json:",inline"` metav1.ListMeta `json:"metadata,omitempty"` diff --git a/api/v1/types.proto b/api/v1/types.proto index 0ef31e1f0..5a51da747 100644 --- a/api/v1/types.proto +++ b/api/v1/types.proto @@ -108,3 +108,7 @@ message DriveSelector { int32 numberDrivesPerNode = 1; map matchFields = 2; } + +message StorageGroupStatus { + string phase = 1; +} diff --git a/docs/proposals/custom-storage-group.md b/docs/proposals/custom-storage-group.md new file mode 100644 index 000000000..0cd9b715c --- /dev/null +++ b/docs/proposals/custom-storage-group.md @@ -0,0 +1,30 @@ +# Proposal: Custom Storage Group + +Last updated: 15-05-2023 + + +## Abstract + +At present, for each PVC, CSI just chooses one of those physical drives whose available capacity meets the requested +storage size of PVC to provision corresponding volume and doesn't support the functionality to provision volume on the physical drive specified by user. +Here, we propose a custom-storage-group solution that user can select specific physical drives to provision their volumes. + +## Background + +To use CSI to create persistent volumes, you need to specify the storage classes managed by CSI in your k8s volume specifications. All the current CSI-managed storage classes are pre-defined storage classes created during CSI installation, based on local disk types including HDD, SSD, NVME and whether to use 1 entire local drive or create 1 logical volume for the k8s persistent volume. +In our current Bare-metal CSI, users can only have PVC satisfying their basic request based on storage type and size and cannot select the specific drive for their PVC. Instead, for each PVC, CSI will select 1 among those local drives satisfying the request of PVC. Furthermore, for LVG PVCs, the current CSI’s drive selection strategy is that it tries to accumulate those PVCs on the drive with existing LVG PVCs and the least free capacity if applicable. +This strategy aims to leave more entire free disks, which is useful if there would be requirements of non-LVG PVCs later, but it cannot satisfy the requirement of selecting specific drives in volume provisioning. + +## Proposal + +The main idea of custom storage group solution is that, to make PVCs land on some specific drives, we can firstly create StorageGroup with driveSelector selecting those specific drives and then just add the corresponding storage-group k8s label in the spec of volume claims. + +Here, StorageGroup is a new custom resource in Baremetal CSI. + +User can create custom storage group on specific drives by specifying some criteria to select drives on properties via driveSelector field of StorageGroup. + +Then, the corresponding custom-storage-group k8s label **drive.csi-baremetal.dell.com/storage-group: ${storage-group-name}** would be immediately synced to all the CSI Drives selected in this storage group by the storage group controller after each storage group created in kubernetes. Actually, all the corresponding custom-storage-group k8s label would then be also further immediately synced to the drives' corresponding AC objects. + +Here, we actually use the storage-group k8s label to symbolize the drives list in the corresponding storage group, which can be conveniently listed by k8s list api with labelSelector. + +Then, to make PVCs land on those specific drives selected, just add the corresponding custom storage group label **drive.csi-baremetal.dell.com/storage-group: ${storage-group-name}** in volume claim spec. Then, CSI will ensure the custom-storage-group label matching in selecting drives for PVCs, and PVC with k8s label **drive.csi-baremetal.dell.com/storage-group: ${storage-group-name}** would only reside on one of those specific drives with the same k8s label **drive.csi-baremetal.dell.com/storage-group: ${storage-group-name}**. diff --git a/pkg/crcontrollers/drive/drivecontroller.go b/pkg/crcontrollers/drive/drivecontroller.go index d95b39f53..fa49bfd7a 100644 --- a/pkg/crcontrollers/drive/drivecontroller.go +++ b/pkg/crcontrollers/drive/drivecontroller.go @@ -6,6 +6,7 @@ import ( "time" "github.com/sirupsen/logrus" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -15,6 +16,7 @@ import ( api "github.com/dell/csi-baremetal/api/generated/v1" apiV1 "github.com/dell/csi-baremetal/api/v1" "github.com/dell/csi-baremetal/api/v1/drivecrd" + sgcrd "github.com/dell/csi-baremetal/api/v1/storagegroupcrd" "github.com/dell/csi-baremetal/api/v1/volumecrd" "github.com/dell/csi-baremetal/pkg/base" errTypes "github.com/dell/csi-baremetal/pkg/base/error" @@ -396,6 +398,9 @@ func (c *Controller) handleDriveUsageRemoved(ctx context.Context, log *logrus.En if err := c.removeRelatedAC(ctx, log, drive); err != nil { return ignore, err } + if err := c.triggerStorageGroupResyncIfApplicable(ctx, log, drive); err != nil { + return ignore, err + } return remove, nil } @@ -460,6 +465,40 @@ func (c *Controller) removeRelatedAC(ctx context.Context, log *logrus.Entry, cur return nil } +func (c *Controller) triggerStorageGroupResyncIfApplicable(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive) error { + if drive.Labels[apiV1.StorageGroupLabelKey] != "" { + storageGroup := &sgcrd.StorageGroup{} + err := c.client.ReadCR(ctx, drive.Labels[apiV1.StorageGroupLabelKey], "", storageGroup) + if err != nil { + if k8serrors.IsNotFound(err) { + log.Warnf("no existing storage group %s", drive.Labels[apiV1.StorageGroupLabelKey]) + return nil + } + log.Errorf("error in reading storagegroup %s: %v", drive.Labels[apiV1.StorageGroupLabelKey], err) + return err + } + + if storageGroup.Status.Phase != apiV1.StorageGroupPhaseInvalid && + storageGroup.Status.Phase != apiV1.StorageGroupPhaseRemoving && + storageGroup.Spec.DriveSelector.NumberDrivesPerNode > 0 { + if storageGroup.Status.Phase == apiV1.StorageGroupPhaseSynced { + storageGroup.Status.Phase = apiV1.StorageGroupPhaseSyncing + } + if storageGroup.Annotations == nil { + storageGroup.Annotations = map[string]string{} + } + // also add annotation for specific drive removal to storage group + annotationKey := fmt.Sprintf("%s/%s", apiV1.StorageGroupAnnotationDriveRemovalPrefix, drive.Name) + storageGroup.Annotations[annotationKey] = apiV1.StorageGroupAnnotationDriveRemovalDone + if err := c.client.UpdateCR(ctx, storageGroup); err != nil { + log.Errorf("Unable to update StorageGroup with error: %v.", err) + return err + } + } + } + return nil +} + // handle drive lable update func (c *Controller) handleDriveLableUpdate(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive) error { var taintLabelExisted bool diff --git a/pkg/crcontrollers/drive/drivecontroller_test.go b/pkg/crcontrollers/drive/drivecontroller_test.go index fa40055a3..50e910683 100644 --- a/pkg/crcontrollers/drive/drivecontroller_test.go +++ b/pkg/crcontrollers/drive/drivecontroller_test.go @@ -3,6 +3,7 @@ package drive import ( "context" "errors" + "fmt" "log" "testing" "time" @@ -22,6 +23,7 @@ import ( accrd "github.com/dell/csi-baremetal/api/v1/availablecapacitycrd" dcrd "github.com/dell/csi-baremetal/api/v1/drivecrd" "github.com/dell/csi-baremetal/api/v1/lvgcrd" + sgcrd "github.com/dell/csi-baremetal/api/v1/storagegroupcrd" vcrd "github.com/dell/csi-baremetal/api/v1/volumecrd" "github.com/dell/csi-baremetal/pkg/base" errTypes "github.com/dell/csi-baremetal/pkg/base/error" @@ -46,6 +48,7 @@ var ( acCR2Name = uuid.New().String() aclvgCR2Name = uuid.New().String() lvgCRName = uuid.New().String() + testSG1Name = "test-hdd-1" drive1 = api.Drive{ UUID: driveUUID, @@ -137,6 +140,17 @@ var ( Location: lvgCRName, NodeId: nodeID}, } + + testSG1 = sgcrd.StorageGroup{ + TypeMeta: v1.TypeMeta{Kind: "StorageGroup", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: testSG1Name}, + Spec: api.StorageGroupSpec{ + DriveSelector: &api.DriveSelector{ + NumberDrivesPerNode: 1, + MatchFields: map[string]string{"Type": "HDD"}, + }, + }, + } ) func setup() *k8s.KubeClient { @@ -871,6 +885,97 @@ func TestDriveController_removeRelatedAC(t *testing.T) { }) } +func TestDriveController_triggerStorageGroupResyncIfApplicable(t *testing.T) { + dc := NewController(setup(), nodeID, &mocks.MockDriveMgrClient{}, new(events.Recorder), testLogger) + assert.NotNil(t, dc) + assert.NotNil(t, dc.crHelper) + t.Run("drive has no storage group label", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + + err := dc.triggerStorageGroupResyncIfApplicable(testCtx, dc.log, expectedD) + assert.Nil(t, err) + }) + t.Run("drive has sg label of non-existing sg", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + expectedD.Labels = map[string]string{apiV1.StorageGroupLabelKey: testSG1Name} + + err := dc.triggerStorageGroupResyncIfApplicable(testCtx, dc.log, expectedD) + assert.Nil(t, err) + }) + t.Run("drive has sg label but reading sg fails", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + expectedD.Labels = map[string]string{apiV1.StorageGroupLabelKey: testSG1Name} + + err := dc.triggerStorageGroupResyncIfApplicable(k8s.GetFailCtx, dc.log, expectedD) + assert.NotNil(t, err) + }) + t.Run("drive has sg label of existing sg without status", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + expectedD.Labels = map[string]string{apiV1.StorageGroupLabelKey: testSG1Name} + + expectedSG := testSG1.DeepCopy() + assert.NotNil(t, expectedSG) + err := dc.client.CreateCR(testCtx, expectedSG.Name, expectedSG) + assert.Nil(t, err) + + err = dc.triggerStorageGroupResyncIfApplicable(testCtx, dc.log, expectedD) + assert.Nil(t, err) + + resultSG := &sgcrd.StorageGroup{} + err = dc.client.ReadCR(testCtx, expectedSG.Name, "", resultSG) + assert.Nil(t, err) + assert.NotNil(t, resultSG) + assert.Equal(t, "", resultSG.Status.Phase) + annotationKey := fmt.Sprintf("%s/%s", apiV1.StorageGroupAnnotationDriveRemovalPrefix, expectedD.Name) + assert.Equal(t, apiV1.StorageGroupAnnotationDriveRemovalDone, resultSG.Annotations[annotationKey]) + + assert.Nil(t, dc.client.DeleteCR(testCtx, expectedSG)) + }) + t.Run("drive has sg label of existing sg with status", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + expectedD.Labels = map[string]string{apiV1.StorageGroupLabelKey: testSG1Name} + + expectedSG := testSG1.DeepCopy() + assert.NotNil(t, expectedSG) + expectedSG.Status.Phase = apiV1.StorageGroupPhaseSynced + err := dc.client.CreateCR(testCtx, expectedSG.Name, expectedSG) + assert.Nil(t, err) + + err = dc.triggerStorageGroupResyncIfApplicable(testCtx, dc.log, expectedD) + assert.Nil(t, err) + + resultSG := &sgcrd.StorageGroup{} + err = dc.client.ReadCR(testCtx, expectedSG.Name, "", resultSG) + assert.Nil(t, err) + assert.NotNil(t, resultSG) + assert.Equal(t, apiV1.StorageGroupPhaseSyncing, resultSG.Status.Phase) + annotationKey := fmt.Sprintf("%s/%s", apiV1.StorageGroupAnnotationDriveRemovalPrefix, expectedD.Name) + assert.Equal(t, apiV1.StorageGroupAnnotationDriveRemovalDone, resultSG.Annotations[annotationKey]) + + assert.Nil(t, dc.client.DeleteCR(testCtx, expectedSG)) + }) + t.Run("drive has sg label of existing sg but sg status update fails", func(t *testing.T) { + expectedD := testBadCRDrive.DeepCopy() + assert.NotNil(t, expectedD) + expectedD.Labels = map[string]string{apiV1.StorageGroupLabelKey: testSG1Name} + + expectedSG := testSG1.DeepCopy() + assert.NotNil(t, expectedSG) + err := dc.client.CreateCR(testCtx, expectedSG.Name, expectedSG) + assert.Nil(t, err) + + err = dc.triggerStorageGroupResyncIfApplicable(k8s.UpdateFailCtx, dc.log, expectedD) + assert.NotNil(t, err) + + assert.Nil(t, dc.client.DeleteCR(testCtx, expectedSG)) + }) +} + func TestDriveController_stopLocateNodeLED(t *testing.T) { mockK8sClient := &mocks.K8Client{} kubeClient := k8s.NewKubeClient(mockK8sClient, testLogger, objects.NewObjectLogger(), testNs) diff --git a/pkg/crcontrollers/storagegroup/storagegroupcontroller.go b/pkg/crcontrollers/storagegroup/storagegroupcontroller.go index 5492a80d8..98881eb55 100644 --- a/pkg/crcontrollers/storagegroup/storagegroupcontroller.go +++ b/pkg/crcontrollers/storagegroup/storagegroupcontroller.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/google/uuid" "github.com/sirupsen/logrus" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -30,14 +31,17 @@ import ( ) const ( - sgFinalizer = "dell.emc.csi/sg-cleanup" - sgTempStatusAnnotationKey = "storagegroup.csi-baremetal.dell.com/status" - contextTimeoutSeconds = 60 + sgFinalizer = "dell.emc.csi/sg-cleanup" + contextTimeoutSeconds = 60 + normalRequeueInterval = 1 * time.Second ) +var unsupportedDriveMatchFields = []string{"Health", "Status", "Usage", "IsClean"} + // Controller to reconcile storagegroup custom resource type Controller struct { client *k8s.KubeClient + k8sCache k8s.CRReader log *logrus.Entry crHelper k8s.CRHelper cachedCrHelper k8s.CRHelper @@ -49,6 +53,7 @@ type Controller struct { func NewController(client *k8s.KubeClient, k8sCache k8s.CRReader, log *logrus.Logger) *Controller { c := &Controller{ client: client, + k8sCache: k8sCache, crHelper: k8s.NewCRHelperImpl(client, log), cachedCrHelper: k8s.NewCRHelperImpl(client, log).SetReader(k8sCache), log: log.WithField("component", "StorageGroupController"), @@ -63,6 +68,9 @@ func (c *Controller) SetupWithManager(mgr ctrl.Manager) error { WithOptions(controller.Options{}). Watches(&source.Kind{Type: &drivecrd.Drive{}}, &handler.EnqueueRequestForObject{}). WithEventFilter(predicate.Funcs{ + DeleteFunc: func(e event.DeleteEvent) bool { + return c.filterDeleteEvent(e.Object) + }, UpdateFunc: func(e event.UpdateEvent) bool { return c.filterUpdateEvent(e.ObjectOld, e.ObjectNew) }, @@ -70,6 +78,11 @@ func (c *Controller) SetupWithManager(mgr ctrl.Manager) error { Complete(c) } +func (c *Controller) filterDeleteEvent(obj runtime.Object) bool { + _, isStorageGroup := obj.(*sgcrd.StorageGroup) + return isStorageGroup +} + func (c *Controller) filterUpdateEvent(old runtime.Object, new runtime.Object) bool { if newDrive, ok := new.(*drivecrd.Drive); ok { if oldDrive, ok := old.(*drivecrd.Drive); ok { @@ -80,7 +93,9 @@ func (c *Controller) filterUpdateEvent(old runtime.Object, new runtime.Object) b } func filterDriveUpdateEvent(old *drivecrd.Drive, new *drivecrd.Drive) bool { - return old.Labels[apiV1.StorageGroupLabelKey] != new.Labels[apiV1.StorageGroupLabelKey] + oldLabel := old.Labels[apiV1.StorageGroupLabelKey] + newLabel, newLabeled := new.Labels[apiV1.StorageGroupLabelKey] + return (oldLabel != newLabel) || (!old.Spec.IsClean && new.Spec.IsClean && !newLabeled) } // Reconcile reconciles StorageGroup custom resources @@ -89,69 +104,31 @@ func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu defer cancelFn() // read name - name := req.Name + reqName := req.Name // customize logging - log := c.log.WithFields(logrus.Fields{"method": "Reconcile", "name": name}) - - drive := &drivecrd.Drive{} - if err := c.client.ReadCR(ctx, name, "", drive); err == nil { - return c.syncDriveStorageGroupLabel(ctx, drive) - } else if !k8serrors.IsNotFound(err) { - log.Errorf("error in reading %s as drive object: %v", name, err) + log := c.log.WithFields(logrus.Fields{"method": "Reconcile", "name": reqName}) + + if _, err := uuid.Parse(reqName); err == nil { + drive := &drivecrd.Drive{} + // A drive just physically removed but not yet totally deleted yet, i.e. Usage == "REMOVED" && Status == "OFFLINE" + // will not be selected in any storage group and its existing sg label takes no effect + if err := c.client.ReadCR(ctx, reqName, "", drive); err == nil && + !(drive.Spec.Usage == apiV1.DriveUsageRemoved && drive.Spec.Status == apiV1.DriveStatusOffline) { + return c.reconcileDriveStorageGroupLabel(ctx, drive) + } else if err != nil && !k8serrors.IsNotFound(err) { + log.Errorf("error in reading %s as drive object: %v", reqName, err) + return ctrl.Result{}, err + } } storageGroup := &sgcrd.StorageGroup{} - if err := c.client.ReadCR(ctx, name, "", storageGroup); err != nil { + if err := c.client.ReadCR(ctx, reqName, "", storageGroup); err != nil { if !k8serrors.IsNotFound(err) { - log.Errorf("error in reading %s as drive or storagegroup object: %v", name, err) + log.Errorf("error in reading %s as drive or storagegroup object: %v", reqName, err) } return ctrl.Result{}, client.IgnoreNotFound(err) } - - log.Debugf("Reconcile StorageGroup: %v", storageGroup) - - // StorageGroup Deletion request - if !storageGroup.DeletionTimestamp.IsZero() { - return c.handleStorageGroupDeletion(ctx, log, storageGroup) - } - - if !util.ContainsString(storageGroup.Finalizers, sgFinalizer) { - // append finalizer - log.Debugf("Appending finalizer for StorageGroup") - storageGroup.Finalizers = append(storageGroup.Finalizers, sgFinalizer) - if err := c.client.UpdateCR(ctx, storageGroup); err != nil { - log.Errorf("Unable to append finalizer %s to StorageGroup with error: %v.", sgFinalizer, err) - return ctrl.Result{Requeue: true}, err - } - } - - if storageGroup.Annotations == nil { - storageGroup.Annotations = map[string]string{} - } - sgStatus, ok := storageGroup.Annotations[sgTempStatusAnnotationKey] - if !ok { - if !c.isStorageGroupValid(log, storageGroup) { - storageGroup.Annotations[sgTempStatusAnnotationKey] = apiV1.Failed - if err := c.client.UpdateCR(ctx, storageGroup); err != nil { - log.Errorf("Unable to update StorageGroup status with error: %v.", err) - return ctrl.Result{Requeue: true}, err - } - return ctrl.Result{}, nil - } - // Pass storage group valiation, change to CREATING status - sgStatus = apiV1.Creating - storageGroup.Annotations[sgTempStatusAnnotationKey] = apiV1.Creating - if err := c.client.UpdateCR(ctx, storageGroup); err != nil { - log.Errorf("Unable to update StorageGroup status with error: %v.", err) - return ctrl.Result{Requeue: true}, err - } - } - - if sgStatus == apiV1.Creating { - return c.handleStorageGroupCreationOrUpdate(ctx, log, storageGroup) - } - - return ctrl.Result{}, nil + return c.reconcileStorageGroup(ctx, storageGroup) } // combine the following similar funcs @@ -173,7 +150,7 @@ func (c *Controller) removeDriveStorageGroupLabel(ctx context.Context, log *logr return nil } -func (c *Controller) addDriveStorageGroupLabel(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive, +func (c *Controller) updateDriveStorageGroupLabel(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive, sgName string) error { if drive.Labels == nil { drive.Labels = map[string]string{} @@ -186,7 +163,7 @@ func (c *Controller) addDriveStorageGroupLabel(ctx context.Context, log *logrus. return nil } -func (c *Controller) addACStorageGroupLabel(ctx context.Context, log *logrus.Entry, ac *accrd.AvailableCapacity, +func (c *Controller) updateACStorageGroupLabel(ctx context.Context, log *logrus.Entry, ac *accrd.AvailableCapacity, sgName string) error { if ac.Labels == nil { ac.Labels = map[string]string{} @@ -199,53 +176,39 @@ func (c *Controller) addACStorageGroupLabel(ctx context.Context, log *logrus.Ent return nil } -func (c *Controller) syncDriveOnAllStorageGroups(ctx context.Context, drive *drivecrd.Drive, ac *accrd.AvailableCapacity) (ctrl.Result, error) { - log := c.log.WithFields(logrus.Fields{"method": "syncDriveOnAllStorageGroups", "name": drive.Name}) +func (c *Controller) findAndAddMatchedStorageGroupLabel(ctx context.Context, drive *drivecrd.Drive, + ac *accrd.AvailableCapacity) (ctrl.Result, error) { + log := c.log.WithFields(logrus.Fields{"method": "findAndAddMatchedStorageGroupLabel", "name": drive.Name}) sgList := &sgcrd.StorageGroupList{} if err := c.client.ReadList(ctx, sgList); err != nil { log.Errorf("failed to read storage group list: %v", err) return ctrl.Result{Requeue: true}, err } + for _, storageGroup := range sgList.Items { sg := storageGroup - sgStatus, ok := sg.Annotations[sgTempStatusAnnotationKey] - if !ok { - if !c.isStorageGroupValid(log, &sg) { - sg.Annotations[sgTempStatusAnnotationKey] = apiV1.Failed - if err := c.client.UpdateCR(ctx, &sg); err != nil { - log.Errorf("Unable to update StorageGroup status with error: %v", err) - return ctrl.Result{Requeue: true}, err - } - continue - } - // Pass storage group valiation, change to CREATING status - sgStatus = apiV1.Creating - sg.Annotations[sgTempStatusAnnotationKey] = apiV1.Creating - if err := c.client.UpdateCR(ctx, &sg); err != nil { - log.Errorf("Unable to update StorageGroup status with error: %v", err) - return ctrl.Result{Requeue: true}, err - } - } - if sgStatus != apiV1.Failed && c.isDriveSelectedByValidMatchFields(log, &drive.Spec, &sg.Spec.DriveSelector.MatchFields) { + if (sg.Status.Phase == apiV1.StorageGroupPhaseSynced || (sg.Status.Phase == apiV1.StorageGroupPhaseSyncing && + sg.Spec.DriveSelector.NumberDrivesPerNode == 0)) && c.isDriveSelectedByValidMatchFields(log, &drive.Spec, + &sg.Spec.DriveSelector.MatchFields) { if sg.Spec.DriveSelector.NumberDrivesPerNode == 0 { log.Infof("Expect to add label of storagegroup %s to drive %s", sg.Name, drive.Name) - if err := c.addDriveStorageGroupLabel(ctx, log, drive, sg.Name); err != nil { + if err := c.updateDriveStorageGroupLabel(ctx, log, drive, sg.Name); err != nil { return ctrl.Result{Requeue: true}, err } - if err := c.addACStorageGroupLabel(ctx, log, ac, sg.Name); err != nil { + log.Infof("Successfully add label of storagegroup %s to drive %s", sg.Name, drive.Name) + if err := c.updateACStorageGroupLabel(ctx, log, ac, sg.Name); err != nil { return ctrl.Result{Requeue: true}, err } - log.Infof("Successfully add label of storagegroup %s to drive %s and its corresponding AC", - sg.Name, drive.Name) + log.Infof("Successfully add label of storagegroup %s to drive %s's corresponding AC", sg.Name, drive.Name) return ctrl.Result{}, nil } log.Debugf("drive %s will probably be selected by storagegroup %s", drive.Name, sg.Name) - if sg.Annotations[sgTempStatusAnnotationKey] != apiV1.Creating { + if sg.Status.Phase == apiV1.StorageGroupPhaseSynced { // trigger the subsequent reconciliation of the potentially-matched storage group - sg.Annotations[sgTempStatusAnnotationKey] = apiV1.Creating + sg.Status.Phase = apiV1.StorageGroupPhaseSyncing if err := c.client.UpdateCR(ctx, &sg); err != nil { log.Errorf("Unable to update StorageGroup status with error: %v", err) return ctrl.Result{Requeue: true}, err @@ -256,39 +219,83 @@ func (c *Controller) syncDriveOnAllStorageGroups(ctx context.Context, drive *dri return ctrl.Result{}, nil } -func (c *Controller) handleManualDriveStorageGroupLabelAddition(ctx context.Context, log *logrus.Entry, +func (c *Controller) handleSeparateDriveStorageGroupLabelAddition(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive, ac *accrd.AvailableCapacity, driveSGLabel string, lvgExists bool) (ctrl.Result, error) { - if lvgExists { - log.Warnf("We can't add storage group label to drive %s with existing LVG", drive.Name) + if !drive.Spec.IsClean || lvgExists || ac.Spec.Size == 0 { + log.Warnf("We shouldn't storage group label to not clean drive %s", drive.Name) + + // if this drive has been really selected by this existing sg with NumberDrivesPerNode > 0 and SYNCED status, + // we need to also trigger the resync of this sg afterward + sg := &sgcrd.StorageGroup{} + if err := c.client.ReadCR(ctx, driveSGLabel, "", sg); err != nil && !k8serrors.IsNotFound(err) { + return ctrl.Result{Requeue: true}, err + } + if sg.Status.Phase == apiV1.StorageGroupPhaseSynced && sg.Spec.DriveSelector.NumberDrivesPerNode > 0 && + c.isDriveSelectedByValidMatchFields(log, &drive.Spec, &sg.Spec.DriveSelector.MatchFields) { + sg.Status.Phase = apiV1.StorageGroupPhaseSyncing + if err := c.client.UpdateCR(ctx, sg); err != nil { + return ctrl.Result{Requeue: true}, err + } + } + if err := c.removeDriveStorageGroupLabel(ctx, log, drive); err != nil { return ctrl.Result{Requeue: true}, err } return ctrl.Result{}, nil } + if err := c.updateACStorageGroupLabel(ctx, log, ac, driveSGLabel); err != nil { + return ctrl.Result{Requeue: true}, err + } + log.Infof("successfully add label of storage group %s to drive %s and its corresponding AC manually", driveSGLabel, drive.Name) + return ctrl.Result{}, nil +} + +func (c *Controller) handleManualDriveStorageGroupLabelRemoval(ctx context.Context, log *logrus.Entry, + drive *drivecrd.Drive, ac *accrd.AvailableCapacity, acSGLabel string) (ctrl.Result, error) { volumes, err := c.crHelper.GetVolumesByLocation(ctx, drive.Spec.UUID) if err != nil { log.Errorf("Error when getting volumes on drive %s: %v", drive.Name, err) return ctrl.Result{Requeue: true}, err } if len(volumes) > 0 { - log.Warnf("We can't add storage group label to drive %s with existing volumes", drive.Name) - if err = c.removeDriveStorageGroupLabel(ctx, log, drive); err != nil { + log.Warnf("We shouldn't remove label of storage group %s from drive %s with existing volumes", + acSGLabel, drive.Name) + if err := c.updateDriveStorageGroupLabel(ctx, log, drive, acSGLabel); err != nil { return ctrl.Result{Requeue: true}, err } return ctrl.Result{}, nil } - log.Debugf("Also add storage-group %s label to AC %s corresponding to drive %s", driveSGLabel, ac.Name, drive.Name) - if err = c.addACStorageGroupLabel(ctx, log, ac, driveSGLabel); err != nil { + sg := &sgcrd.StorageGroup{} + err = c.client.ReadCR(ctx, acSGLabel, "", sg) + switch { + case err == nil && (sg.Status.Phase == apiV1.StorageGroupPhaseSynced || sg.Status.Phase == apiV1.StorageGroupPhaseSyncing) && + c.isDriveSelectedByValidMatchFields(log, &drive.Spec, &sg.Spec.DriveSelector.MatchFields): + log.Warnf("We shouldn't remove label of storage group %s from drive %s still selected by this storage group", + acSGLabel, drive.Name) + if err := c.updateDriveStorageGroupLabel(ctx, log, drive, acSGLabel); err != nil { + return ctrl.Result{Requeue: true}, err + } + return ctrl.Result{}, nil + case err != nil && !k8serrors.IsNotFound(err): + log.Errorf("Failed to read StorageGroup %s with error: %v", acSGLabel, err) return ctrl.Result{Requeue: true}, err + + // the case that the drive's storage group label removal is valid and we should sync the removal to AC + default: + if err := c.removeACStorageGroupLabel(ctx, log, ac); err != nil { + return ctrl.Result{Requeue: true}, err + } + log.Infof("successfully remove label of storage group %s from drive %s and its corresponding AC", + acSGLabel, drive.Name) } return ctrl.Result{}, nil } -// Here, we will sync the storage-group label of the drive object if applicable -func (c *Controller) syncDriveStorageGroupLabel(ctx context.Context, drive *drivecrd.Drive) (ctrl.Result, error) { - log := c.log.WithFields(logrus.Fields{"method": "syncDriveStorageGroupLabel", "name": drive.Name}) +// Here, we will sync the storage-group label of single drive object if applicable +func (c *Controller) reconcileDriveStorageGroupLabel(ctx context.Context, drive *drivecrd.Drive) (ctrl.Result, error) { + log := c.log.WithFields(logrus.Fields{"method": "reconcileDriveStorageGroupLabel", "name": drive.Name}) location := drive.Name lvg, err := c.crHelper.GetLVGByDrive(ctx, drive.Spec.UUID) @@ -300,78 +307,93 @@ func (c *Controller) syncDriveStorageGroupLabel(ctx context.Context, drive *driv location = lvg.Name } - ac, err := c.cachedCrHelper.GetACByLocation(location) + ac, err := c.crHelper.GetACByLocation(location) if err != nil { + log.Errorf("Error when getting AC of drive %s: %v", drive.Name, err) if err != errTypes.ErrorNotFound { - log.Errorf("Error when getting AC by location %s: %v", location, err) + return ctrl.Result{Requeue: true}, err } - return ctrl.Result{Requeue: true}, err + return ctrl.Result{RequeueAfter: normalRequeueInterval}, nil } acSGLabel, acSGLabeled := ac.Labels[apiV1.StorageGroupLabelKey] driveSGLabel, driveSGLabeled := drive.Labels[apiV1.StorageGroupLabelKey] if acSGLabel == driveSGLabel { - if !acSGLabeled && !driveSGLabeled && lvg == nil { - volumes, err := c.crHelper.GetVolumesByLocation(ctx, drive.Spec.UUID) - if err != nil { - log.Errorf("Error when getting volumes on drive %s: %v", drive.Name, err) - return ctrl.Result{Requeue: true}, err - } - if len(volumes) == 0 { - return c.syncDriveOnAllStorageGroups(ctx, drive, ac) - } + if !acSGLabeled && !driveSGLabeled && drive.Spec.IsClean && lvg == nil && ac.Spec.Size > 0 { + return c.findAndAddMatchedStorageGroupLabel(ctx, drive, ac) } return ctrl.Result{}, nil } // Current manual sg labeling support - log.Debugf("Handle manual change of storage group label of drive %s", drive.Name) + log.Debugf("Handle separate change of storage group label of drive %s", drive.Name) switch { - // add new storagegroup label to drive + // add new storagegroup label to drive separately case !acSGLabeled && driveSGLabeled: - return c.handleManualDriveStorageGroupLabelAddition(ctx, log, drive, ac, driveSGLabel, lvg != nil) + return c.handleSeparateDriveStorageGroupLabelAddition(ctx, log, drive, ac, driveSGLabel, lvg != nil) - // remove storagegroup label from drive + // remove storagegroup label from drive manually case acSGLabeled && !driveSGLabeled: - volumes, err := c.crHelper.GetVolumesByLocation(ctx, drive.Spec.UUID) - if err != nil { - log.Errorf("Error when getting volumes on drive %s: %v", drive.Name, err) + return c.handleManualDriveStorageGroupLabelRemoval(ctx, log, drive, ac, acSGLabel) + + // actually just the case acSGLabeled && driveSGLabeled here, i.e. update drive's storage group label manually, + // need to restore the update + default: + log.Warnf("We cannot update the drive %s's storage group label from %s to %s", drive.Name, acSGLabel, driveSGLabel) + if err := c.updateDriveStorageGroupLabel(ctx, log, drive, acSGLabel); err != nil { return ctrl.Result{Requeue: true}, err } - if len(volumes) > 0 { - log.Warnf("We can't remove storage group %s label from drive %s with existing volumes", - acSGLabel, drive.Name) - if err := c.addDriveStorageGroupLabel(ctx, log, drive, acSGLabel); err != nil { + return ctrl.Result{}, nil + } +} + +func (c *Controller) reconcileStorageGroup(ctx context.Context, storageGroup *sgcrd.StorageGroup) (ctrl.Result, error) { + log := c.log.WithFields(logrus.Fields{"method": "reconcileStorageGroup", "name": storageGroup.Name}) + + log.Debugf("Reconcile StorageGroup: %+v", storageGroup) + + // StorageGroup Deletion request + if !storageGroup.DeletionTimestamp.IsZero() { + if storageGroup.Status.Phase != apiV1.StorageGroupPhaseRemoving { + storageGroup.Status.Phase = apiV1.StorageGroupPhaseRemoving + if err := c.client.UpdateCR(ctx, storageGroup); err != nil { + log.Errorf("Unable to update StorageGroup status with error: %v.", err) return ctrl.Result{Requeue: true}, err } - return ctrl.Result{}, nil } + return c.handleStorageGroupDeletion(ctx, log, storageGroup) + } - sg := &sgcrd.StorageGroup{} - err = c.client.ReadCR(ctx, acSGLabel, "", sg) - switch { - case err == nil && c.isDriveSelectedByValidMatchFields(log, &drive.Spec, &sg.Spec.DriveSelector.MatchFields): - log.Warnf("We can't remove storage group %s label from drive %s still selected by this storage group", - acSGLabel, drive.Name) - if err := c.addDriveStorageGroupLabel(ctx, log, drive, acSGLabel); err != nil { - return ctrl.Result{Requeue: true}, err - } - return ctrl.Result{}, nil - case err != nil && !k8serrors.IsNotFound(err): - log.Errorf("Failed to read StorageGroup %s with error: %v", acSGLabel, err) + if !util.ContainsString(storageGroup.Finalizers, sgFinalizer) { + // append finalizer + log.Debugf("Appending finalizer for StorageGroup") + storageGroup.Finalizers = append(storageGroup.Finalizers, sgFinalizer) + if err := c.client.UpdateCR(ctx, storageGroup); err != nil { + log.Errorf("Unable to append finalizer %s to StorageGroup with error: %v.", sgFinalizer, err) return ctrl.Result{Requeue: true}, err + } + } - // the case that the storage-group label removal is valid and we should sync the removal to AC - default: - log.Debugf("Also remove the storage-group %s label of AC %s corresponding to drive %s", acSGLabel, - ac.Name, drive.Name) - if err := c.removeACStorageGroupLabel(ctx, log, ac); err != nil { + if storageGroup.Status.Phase == "" { + if !c.isStorageGroupValid(log, storageGroup) { + storageGroup.Status.Phase = apiV1.StorageGroupPhaseInvalid + if err := c.client.UpdateCR(ctx, storageGroup); err != nil { + log.Errorf("Unable to update StorageGroup status with error: %v.", err) return ctrl.Result{Requeue: true}, err } + return ctrl.Result{}, nil + } + // Pass storage group valiation, change to SYNCING status phase + storageGroup.Status.Phase = apiV1.StorageGroupPhaseSyncing + if err := c.client.UpdateCR(ctx, storageGroup); err != nil { + log.Errorf("Unable to update StorageGroup status with error: %v.", err) + return ctrl.Result{Requeue: true}, err } + } - // TODO restore the update of storagegroup label of drive + if storageGroup.Status.Phase == apiV1.StorageGroupPhaseSyncing { + return c.handleStorageGroupCreationOrUpdate(ctx, log, storageGroup) } return ctrl.Result{}, nil @@ -379,6 +401,8 @@ func (c *Controller) syncDriveStorageGroupLabel(ctx context.Context, drive *driv func (c *Controller) handleStorageGroupDeletion(ctx context.Context, log *logrus.Entry, sg *sgcrd.StorageGroup) (ctrl.Result, error) { + log.Infof("handle deletion of storage group %s", sg.Name) + drivesList := &drivecrd.DriveList{} if err := c.client.ReadList(ctx, drivesList); err != nil { log.Errorf("failed to read drives list: %v", err) @@ -386,17 +410,28 @@ func (c *Controller) handleStorageGroupDeletion(ctx context.Context, log *logrus } var labelRemovalErrMsgs []string + + // whether there is some drive with existing volumes in this storage group + driveHasExistingVolumes := false for _, drive := range drivesList.Items { drive := drive if drive.Labels[apiV1.StorageGroupLabelKey] == sg.Name { - if err := c.removeDriveAndACStorageGroupLabel(ctx, log, &drive, sg); err != nil { + successful, err := c.removeDriveAndACStorageGroupLabel(ctx, log, &drive, sg.Name) + if err != nil { labelRemovalErrMsgs = append(labelRemovalErrMsgs, err.Error()) + } else if !successful { + driveHasExistingVolumes = true } } } if len(labelRemovalErrMsgs) > 0 { return ctrl.Result{Requeue: true}, fmt.Errorf(strings.Join(labelRemovalErrMsgs, "\n")) } + if driveHasExistingVolumes { + log.Warnf("Storage group %s has drive with existing volumes. The deletion will be retried later.", sg.Name) + return ctrl.Result{RequeueAfter: normalRequeueInterval}, nil + } + log.Infof("deletion of storage group %s successfully completed", sg.Name) return c.removeFinalizer(ctx, log, sg) } @@ -414,6 +449,8 @@ func (c *Controller) removeFinalizer(ctx context.Context, log *logrus.Entry, func (c *Controller) handleStorageGroupCreationOrUpdate(ctx context.Context, log *logrus.Entry, sg *sgcrd.StorageGroup) (ctrl.Result, error) { + log.Infof("handle creation or update of storage group %s", sg.Name) + drivesList := &drivecrd.DriveList{} if err := c.client.ReadList(ctx, drivesList); err != nil { log.Errorf("failed to read drives list: %v", err) @@ -427,17 +464,25 @@ func (c *Controller) handleStorageGroupCreationOrUpdate(ctx context.Context, log // used for candidate drives to be selected by storagegroup with numberDrivesPerNode > 0 var candidateDrives []*drivecrd.Drive + for _, d := range drivesList.Items { drive := d + + // A drive just physically removed but not yet totally deleted yet, i.e. Usage == "REMOVED" && Status == "OFFLINE" + // will not be selected in any storage group and its existing sg label takes no effect + if drive.Spec.Usage == apiV1.DriveUsageRemoved && drive.Spec.Status == apiV1.DriveStatusOffline { + continue + } + existingStorageGroup, exists := drive.Labels[apiV1.StorageGroupLabelKey] if exists { if existingStorageGroup == sg.Name { - log.Debugf("Drive %s has already been selected by current storage group", drive.Name) noDriveSelected = false if driveSelector.NumberDrivesPerNode > 0 { drivesCount[drive.Spec.NodeId]++ } } + log.Debugf("Drive %s has already been selected by storage group %s", drive.Name, existingStorageGroup) continue } @@ -448,40 +493,57 @@ func (c *Controller) handleStorageGroupCreationOrUpdate(ctx context.Context, log continue } - if err := c.addDriveAndACStorageGroupLabel(ctx, log, &drive, sg); err != nil { + driveLabeled, err := c.addDriveAndACStorageGroupLabel(ctx, log, &drive, sg.Name) + if driveLabeled { + noDriveSelected = false + } else if err != nil { labelingErrMsgs = append(labelingErrMsgs, err.Error()) } - noDriveSelected = false } } for _, d := range candidateDrives { drive := d if drivesCount[drive.Spec.NodeId] < driveSelector.NumberDrivesPerNode { - if err := c.addDriveAndACStorageGroupLabel(ctx, log, drive, sg); err != nil { + driveLabeled, err := c.addDriveAndACStorageGroupLabel(ctx, log, drive, sg.Name) + if driveLabeled { + noDriveSelected = false + drivesCount[drive.Spec.NodeId]++ + } else if err != nil { labelingErrMsgs = append(labelingErrMsgs, err.Error()) } - noDriveSelected = false - drivesCount[drive.Spec.NodeId]++ } } if noDriveSelected { log.Warnf("No drive can be selected by current storage group %s", sg.Name) } - if len(labelingErrMsgs) == 0 { - sg.Annotations[sgTempStatusAnnotationKey] = apiV1.Created - if err := c.client.UpdateCR(ctx, sg); err != nil { - log.Errorf("Unable to update StorageGroup status with error: %v.", err) - return ctrl.Result{Requeue: true}, err - } - return ctrl.Result{}, nil + + if len(labelingErrMsgs) != 0 { + return ctrl.Result{Requeue: true}, fmt.Errorf(strings.Join(labelingErrMsgs, "\n")) } - return ctrl.Result{Requeue: true}, fmt.Errorf(strings.Join(labelingErrMsgs, "\n")) + sg.Status.Phase = apiV1.StorageGroupPhaseSynced + if err := c.client.UpdateCR(ctx, sg); err != nil { + log.Errorf("Unable to update StorageGroup status with error: %v.", err) + return ctrl.Result{Requeue: true}, err + } + log.Infof("creation or update of storage group %s completed", sg.Name) + return ctrl.Result{}, nil } func (c *Controller) isDriveSelectedByValidMatchFields(log *logrus.Entry, drive *api.Drive, matchFields *map[string]string) bool { for fieldName, fieldValue := range *matchFields { + isSupportedDriveMatchField := true + for _, unsupportedField := range unsupportedDriveMatchFields { + if fieldName == unsupportedField { + isSupportedDriveMatchField = false + break + } + } + if !isSupportedDriveMatchField { + continue + } + driveField := reflect.ValueOf(drive).Elem().FieldByName(fieldName) switch driveField.Type().String() { case "string": @@ -537,77 +599,93 @@ func (c *Controller) isMatchFieldsValid(log *logrus.Entry, matchFields *map[stri return true } -// TODO Need more check on whether storagegroup is valid func (c *Controller) isStorageGroupValid(log *logrus.Entry, sg *sgcrd.StorageGroup) bool { - return c.isMatchFieldsValid(log, &sg.Spec.DriveSelector.MatchFields) + return sg.Spec.DriveSelector != nil && c.isMatchFieldsValid(log, &sg.Spec.DriveSelector.MatchFields) } func (c *Controller) removeDriveAndACStorageGroupLabel(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive, - sg *sgcrd.StorageGroup) error { - log.Debugf("try to remove storagegroup label of drive %s", drive.Name) + sgName string) (bool, error) { + log.Infof("try to remove storagegroup label of drive %s and its corresponding AC", drive.Name) + + // check whether this drive has existing volumes volumes, err := c.crHelper.GetVolumesByLocation(ctx, drive.Spec.UUID) if err != nil { - return err + log.Errorf("failed to get volumes on drive %s: %v", drive.Name, err) + return false, err } if len(volumes) > 0 { - log.Errorf("Drive %s has existing volumes. Storage group label can't be removed.", drive.Name) - return fmt.Errorf("error in removing storage-group label on drive") + log.Warnf("Drive %s has existing volumes and its sg label can't be removed.", drive.Name) + return false, nil } - ac, err := c.cachedCrHelper.GetACByLocation(drive.Spec.UUID) + location := drive.Name + lvg, err := c.crHelper.GetLVGByDrive(ctx, drive.Name) if err != nil { - log.Errorf("Error when getting AC by drive %s: %v", drive.Spec.UUID, err) - return err + log.Errorf("error when getting LVG of drive %s: %v", drive.GetName(), err) + return false, err + } else if lvg != nil { + location = lvg.Name } - if err = c.removeDriveStorageGroupLabel(ctx, log, drive); err != nil { - return err + ac, err := c.crHelper.GetACByLocation(location) + if err != nil && err != errTypes.ErrorNotFound { + log.Errorf("error when getting AC of drive %s: %v", drive.Name, err) + return false, err } - if ac.Labels[apiV1.StorageGroupLabelKey] == sg.Name { + if ac != nil { + if ac.Labels[apiV1.StorageGroupLabelKey] != sgName { + log.Warnf("ac %s's storage group label is not %s", ac.Name, sgName) + } if err = c.removeACStorageGroupLabel(ctx, log, ac); err != nil { - return err + return false, err } - } else { - log.Warnf("we cannot remove storage-group label of ac %s not included in storage group %s", ac.Name, sg.Name) + log.Infof("successfully remove storagegroup label of drive %s's corresponding AC", drive.Name) } - return nil + + if err = c.removeDriveStorageGroupLabel(ctx, log, drive); err != nil { + return false, err + } + log.Infof("successfully remove storagegroup label of drive %s", drive.Name) + return true, nil } func (c *Controller) addDriveAndACStorageGroupLabel(ctx context.Context, log *logrus.Entry, drive *drivecrd.Drive, - sg *sgcrd.StorageGroup) error { - log.Infof("Expect to add label of storagegroup %s to drive %s", sg.Name, drive.Name) + sgName string) (bool, error) { + log.Infof("Expect to add label of storagegroup %s to drive %s and its corresponding AC", sgName, drive.Name) - if lvg, err := c.crHelper.GetLVGByDrive(ctx, drive.Spec.UUID); err != nil || lvg != nil { - if err != nil { - log.Errorf("Error when getting LVG by drive %s: %v", drive.Name, err) - return err - } - log.Warnf("Drive %s has existing LVG and can't be selected by current storage group.", - drive.Name) - return nil + if !drive.Spec.IsClean { + log.Warnf("not clean drive %s can't be selected by current storage group.", drive.Name) + return false, nil } - if volumes, err := c.crHelper.GetVolumesByLocation(ctx, drive.Spec.UUID); err != nil || len(volumes) > 0 { - if err != nil { - log.Errorf("Error when getting volumes on drive %s: %v", drive.Name, err) - return err + ac, err := c.crHelper.GetACByLocation(drive.Name) + if err != nil { + if err != errTypes.ErrorNotFound { + log.Errorf("Error when getting AC by the location of drive %s: %v", drive.Spec.UUID, err) + return false, err } - log.Warnf("Drive %s has existing volumes and can't be selected by current storage group.", - drive.Name) - return nil + lvg, e := c.crHelper.GetLVGByDrive(ctx, drive.Name) + if e != nil || lvg != nil { + return false, e + } + // AC really doesn't exist yet, need to wait for AC created and reconcile again + return false, err } - - ac, err := c.cachedCrHelper.GetACByLocation(drive.Spec.UUID) - if err != nil { - log.Errorf("Error when getting AC by drive %s: %v", drive.Spec.UUID, err) - return err + // not clean AC, i.e. not clean drive + if ac != nil && ac.Spec.Size == 0 { + log.Warnf("not clean drive %s can't be selected by current storage group.", drive.Name) + return false, nil } - // the corresponding ac exists, add storage-group label to the drive and corresponding ac - if err = c.addDriveStorageGroupLabel(ctx, log, drive, sg.Name); err != nil { - return err + + // the corresponding non-lvg ac exists and has free space, add storage-group label to the drive and corresponding ac + if err = c.updateDriveStorageGroupLabel(ctx, log, drive, sgName); err != nil { + return false, err } - if err = c.addACStorageGroupLabel(ctx, log, ac, sg.Name); err != nil { - return err + log.Infof("Successfully add label of storagegroup %s to drive %s", sgName, drive.Name) + + if err = c.updateACStorageGroupLabel(ctx, log, ac, sgName); err != nil { + return true, err } - log.Infof("Successfully add label of storagegroup %s to drive %s and its corresponding AC", sg.Name, drive.Name) - return nil + log.Infof("Successfully add label of storagegroup %s to drive %s and its corresponding AC", sgName, drive.Name) + + return true, nil } diff --git a/pkg/crcontrollers/storagegroup/storagegroupcontroller_test.go b/pkg/crcontrollers/storagegroup/storagegroupcontroller_test.go new file mode 100644 index 000000000..7ed5c15f9 --- /dev/null +++ b/pkg/crcontrollers/storagegroup/storagegroupcontroller_test.go @@ -0,0 +1,714 @@ +package storagegroup + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/google/uuid" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + + api "github.com/dell/csi-baremetal/api/generated/v1" + apiV1 "github.com/dell/csi-baremetal/api/v1" + accrd "github.com/dell/csi-baremetal/api/v1/availablecapacitycrd" + dcrd "github.com/dell/csi-baremetal/api/v1/drivecrd" + "github.com/dell/csi-baremetal/api/v1/lvgcrd" + sgcrd "github.com/dell/csi-baremetal/api/v1/storagegroupcrd" + vcrd "github.com/dell/csi-baremetal/api/v1/volumecrd" + "github.com/dell/csi-baremetal/pkg/base/k8s" + "github.com/dell/csi-baremetal/pkg/base/logger/objects" + "github.com/dell/csi-baremetal/pkg/base/util" + "github.com/dell/csi-baremetal/pkg/mocks" +) + +var ( + testNs = "default" + testErr = errors.New("error") + + testLogger = logrus.New() + testCtx = context.Background() + nodeID = uuid.New().String() + driveUUID1 = uuid.New().String() + driveUUID2 = uuid.New().String() + acUUID1 = uuid.New().String() + acUUID2 = uuid.New().String() + lvgUUID1 = uuid.New().String() + driveSerialNumber = "VDH19UBD" + driveSerialNumber2 = "MDH16UAC" + vol1Name = "pvc-" + uuid.New().String() + sg1Name = "hdd-group-1" + sg2Name = "hdd-group-r" + sg3Name = "hdd-group-invalid" + + drive1 = &dcrd.Drive{ + TypeMeta: v1.TypeMeta{Kind: "Drive", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: driveUUID1}, + Spec: api.Drive{ + UUID: driveUUID1, + Size: 1024 * 1024 * 1024 * 500, + NodeId: nodeID, + Type: apiV1.DriveTypeHDD, + Status: apiV1.DriveStatusOnline, + Health: apiV1.HealthGood, + Slot: "1", + SerialNumber: driveSerialNumber, + IsClean: true, + }, + } + + drive2 = &dcrd.Drive{ + TypeMeta: v1.TypeMeta{Kind: "Drive", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: driveUUID2}, + Spec: api.Drive{ + UUID: driveUUID2, + Size: 1024 * 1024 * 1024 * 500, + NodeId: nodeID, + Type: apiV1.DriveTypeHDD, + Status: apiV1.DriveStatusOnline, + Health: apiV1.HealthGood, + Slot: "2", + SerialNumber: driveSerialNumber2, + IsClean: true, + }, + } + + ac1 = &accrd.AvailableCapacity{ + TypeMeta: v1.TypeMeta{Kind: "AvailableCapacity", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: acUUID1}, + Spec: api.AvailableCapacity{ + Size: drive1.Spec.Size, + StorageClass: apiV1.StorageClassHDD, + Location: driveUUID1, + NodeId: nodeID}, + } + + ac2 = &accrd.AvailableCapacity{ + TypeMeta: v1.TypeMeta{Kind: "AvailableCapacity", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: acUUID2}, + Spec: api.AvailableCapacity{ + Size: drive2.Spec.Size, + StorageClass: apiV1.StorageClassHDD, + Location: driveUUID2, + NodeId: nodeID}, + } + + lvg1 = &lvgcrd.LogicalVolumeGroup{ + TypeMeta: v1.TypeMeta{Kind: "LogicalVolumeGroup", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: lvgUUID1}, + Spec: api.LogicalVolumeGroup{ + Name: lvgUUID1, + Node: nodeID, + Locations: []string{driveUUID1}, + Size: int64(1024 * 5 * util.GBYTE), + }, + } + + vol1 = &vcrd.Volume{ + TypeMeta: v1.TypeMeta{Kind: "Volume", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{ + Name: vol1Name, + Namespace: testNs, + }, + Spec: api.Volume{ + Id: vol1Name, + StorageClass: apiV1.StorageClassHDD, + Location: driveUUID1, + CSIStatus: apiV1.Created, + NodeId: nodeID, + }, + } + + sg1 = &sgcrd.StorageGroup{ + TypeMeta: v1.TypeMeta{Kind: "StorageGroup", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: sg1Name}, + Spec: api.StorageGroupSpec{ + DriveSelector: &api.DriveSelector{ + MatchFields: map[string]string{ + "Slot": "1", + "Type": apiV1.DriveTypeHDD, + }, + }, + }, + } + + sg2 = &sgcrd.StorageGroup{ + TypeMeta: v1.TypeMeta{Kind: "StorageGroup", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: sg2Name}, + Spec: api.StorageGroupSpec{ + DriveSelector: &api.DriveSelector{ + NumberDrivesPerNode: 1, + MatchFields: map[string]string{ + "Type": apiV1.DriveTypeHDD, + "Status": apiV1.DriveStatusOffline, + }, + }, + }, + } + + sg3 = &sgcrd.StorageGroup{ + TypeMeta: v1.TypeMeta{Kind: "StorageGroup", APIVersion: apiV1.APIV1Version}, + ObjectMeta: v1.ObjectMeta{Name: sg3Name}, + Spec: api.StorageGroupSpec{ + DriveSelector: &api.DriveSelector{ + MatchFields: map[string]string{ + "Type": apiV1.DriveTypeHDD, + "IsSSD": "no", + }, + }, + }, + } +) + +func TestStorageGroupController_filterDeleteEvent(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + c := NewController(kubeClient, kubeClient, testLogger) + + testDrive1 := drive1.DeepCopy() + assert.False(t, c.filterDeleteEvent(testDrive1)) +} + +func TestStorageGroupController_filterDriveUpdateEvent(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testDrive2 := drive2.DeepCopy() + assert.False(t, filterDriveUpdateEvent(testDrive1, testDrive2)) +} + +func TestStorageGroupController_Reconcile(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + storageGroupController := NewController(kubeClient, kubeClient, testLogger) + + t.Run("reconcile drive with read error", func(t *testing.T) { + req := ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: drive1.Name}} + assert.NotNil(t, req) + + _, err := uuid.Parse(drive1.Name) + assert.Nil(t, err) + + res, err := storageGroupController.Reconcile(k8s.GetFailCtx, req) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{}, res) + }) + + t.Run("reconcile sg with read error", func(t *testing.T) { + k8sErrNotFound := storageGroupController.client.ReadCR(testCtx, drive1.Name, "", &dcrd.Drive{}) + assert.NotNil(t, k8sErrNotFound) + + req := ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: sg1Name}} + assert.NotNil(t, req) + + mockK8sClient := &mocks.K8Client{} + kubeClient := k8s.NewKubeClient(mockK8sClient, testLogger, objects.NewObjectLogger(), testNs) + storageGroupController := NewController(kubeClient, kubeClient, testLogger) + + mockK8sClient.On("Get", mock.Anything, mock.Anything, &dcrd.Drive{}).Return(k8sErrNotFound) + mockK8sClient.On("Get", mock.Anything, mock.Anything, &sgcrd.StorageGroup{}).Return(testErr) + + res, err := storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{}, res) + }) + + t.Run("reconcile drive with sg label manual change", func(t *testing.T) { + newSGName := "hdd-group-new" + + testAC := ac1.DeepCopy() + testDrive := drive1.DeepCopy() + testDrive.Labels = map[string]string{apiV1.StorageGroupLabelKey: newSGName} + + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testAC.Name, testAC)) + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testDrive.Name, testDrive)) + + req := ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testDrive.Name}} + assert.NotNil(t, req) + + res, err := storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC.Name, "", testAC)) + assert.Equal(t, newSGName, testAC.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive.Name, "", testDrive)) + assert.Equal(t, newSGName, testDrive.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testDrive)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testAC)) + }) + + t.Run("reconcile storage groups and drives", func(t *testing.T) { + // setup resources + testAC1 := ac1.DeepCopy() + testDrive1 := drive1.DeepCopy() + testAC2 := ac2.DeepCopy() + testDrive2 := drive2.DeepCopy() + + testDrive3 := drive2.DeepCopy() + testDrive3.Name = uuid.New().String() + testDrive3.Spec.Usage = apiV1.DriveUsageRemoved + testDrive3.Spec.Status = apiV1.DriveStatusOffline + + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testAC1.Name, testAC1)) + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testDrive1.Name, testDrive1)) + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testAC2.Name, testAC2)) + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testDrive2.Name, testDrive2)) + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testDrive3.Name, testDrive3)) + + testSG1 := sg1.DeepCopy() + + req := ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testSG1.Name}} + assert.NotNil(t, req) + + res, err := storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testSG1.Name, testSG1)) + testSG2 := sg2.DeepCopy() + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testSG2.Name, testSG2)) + + // reconcile creation of testSG1 and testSG2 + req = ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testSG1.Name}} + assert.NotNil(t, req) + + res, err = storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + req = ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testSG2.Name}} + assert.NotNil(t, req) + + res, err = storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testSG1.Name, "", testSG1)) + assert.Equal(t, 1, len(testSG1.Finalizers)) + assert.Equal(t, apiV1.StorageGroupPhaseSynced, testSG1.Status.Phase) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testSG2.Name, "", testSG2)) + assert.Equal(t, 1, len(testSG2.Finalizers)) + assert.Equal(t, apiV1.StorageGroupPhaseSynced, testSG2.Status.Phase) + + testAC1Result := &accrd.AvailableCapacity{} + testAC2Result := &accrd.AvailableCapacity{} + testDrive1Result := &dcrd.Drive{} + testDrive2Result := &dcrd.Drive{} + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC1.Name, "", testAC1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC2.Name, "", testAC2Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive2.Name, "", testDrive2Result)) + assert.Equal(t, testSG1.Name, testAC1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG1.Name, testDrive1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testAC2Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testDrive2Result.Labels[apiV1.StorageGroupLabelKey]) + + // reconcile deletion of testSG1 + testSG1.DeletionTimestamp = &v1.Time{Time: time.Now()} + assert.Nil(t, storageGroupController.client.UpdateCR(testCtx, testSG1)) + + req = ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testSG1.Name}} + assert.NotNil(t, req) + + res, err = storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + err = storageGroupController.client.ReadCR(testCtx, testSG1.Name, "", testSG1) + assert.True(t, k8serrors.IsNotFound(err)) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC1.Name, "", testAC1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC2.Name, "", testAC2Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive2.Name, "", testDrive2Result)) + assert.Empty(t, testAC1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Empty(t, testDrive1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testAC2Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testDrive2Result.Labels[apiV1.StorageGroupLabelKey]) + + // reconcile testDrive1 without testSG1 + req = ctrl.Request{NamespacedName: types.NamespacedName{Namespace: testNs, Name: testDrive1.Name}} + assert.NotNil(t, req) + + res, err = storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testSG2.Name, "", testSG2)) + assert.Equal(t, apiV1.StorageGroupPhaseSyncing, testSG2.Status.Phase) + + // reconcile testDrive1 with testSG1 + testSG1 = sg1.DeepCopy() + testSG1.Status.Phase = apiV1.StorageGroupPhaseSynced + assert.Nil(t, storageGroupController.client.CreateCR(testCtx, testSG1.Name, testSG1)) + + res, err = storageGroupController.Reconcile(testCtx, req) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testSG1.Name, "", testSG1)) + assert.Equal(t, apiV1.StorageGroupPhaseSynced, testSG1.Status.Phase) + + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC1.Name, "", testAC1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testAC2.Name, "", testAC2Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1Result)) + assert.Nil(t, storageGroupController.client.ReadCR(testCtx, testDrive2.Name, "", testDrive2Result)) + assert.Equal(t, testSG1.Name, testAC1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG1.Name, testDrive1Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testAC2Result.Labels[apiV1.StorageGroupLabelKey]) + assert.Equal(t, testSG2.Name, testDrive2Result.Labels[apiV1.StorageGroupLabelKey]) + + // delete resources + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testSG1)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testSG2)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testDrive1)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testAC1)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testDrive2)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testDrive3)) + assert.Nil(t, storageGroupController.client.DeleteCR(testCtx, testAC2)) + }) +} + +func TestStorageGroupController_reconcileStorageGroup(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + c := NewController(kubeClient, kubeClient, testLogger) + + t.Run("reconcile invalid storage group", func(t *testing.T) { + testSG3 := sg3.DeepCopy() + assert.Nil(t, c.client.CreateCR(testCtx, testSG3.Name, testSG3)) + + res, err := c.reconcileStorageGroup(testCtx, testSG3) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + testSG3Result := &sgcrd.StorageGroup{} + assert.Nil(t, c.client.ReadCR(testCtx, testSG3.Name, "", testSG3Result)) + assert.Equal(t, apiV1.StorageGroupPhaseInvalid, testSG3Result.Status.Phase) + + res, err = c.reconcileStorageGroup(testCtx, testSG3Result) + assert.NotNil(t, res) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testSG3.Name, "", testSG3Result)) + assert.Equal(t, apiV1.StorageGroupPhaseInvalid, testSG3Result.Status.Phase) + + assert.Nil(t, c.client.DeleteCR(testCtx, testSG3)) + }) + + t.Run("reconcile storage group with error", func(t *testing.T) { + testSG3 := sg3.DeepCopy() + + res, err := c.reconcileStorageGroup(testCtx, testSG3) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + testSG3.Finalizers = append(testSG3.Finalizers, sgFinalizer) + res, err = c.reconcileStorageGroup(testCtx, testSG3) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + testSG1 := sg1.DeepCopy() + testSG1.Finalizers = append(testSG1.Finalizers, sgFinalizer) + res, err = c.reconcileStorageGroup(testCtx, testSG1) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + testSG1.DeletionTimestamp = &v1.Time{Time: time.Now()} + testSG1.Finalizers = append(testSG1.Finalizers, sgFinalizer) + res, err = c.reconcileStorageGroup(testCtx, testSG1) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + }) +} + +func TestStorageGroupController_handleManualDriveStorageGroupLabelRemoval(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + c := NewController(kubeClient, kubeClient, testLogger) + + t.Run("get volumes with error", func(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testAC1 := ac1.DeepCopy() + + res, err := c.handleManualDriveStorageGroupLabelRemoval(k8s.ListFailCtx, c.log, testDrive1, testAC1, sg1Name) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + }) + + t.Run("restore sg label removal on drive with existing volumes", func(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testAC1 := ac1.DeepCopy() + testVol1 := vol1.DeepCopy() + + assert.Nil(t, c.client.CreateCR(testCtx, testDrive1.Name, testDrive1)) + assert.Nil(t, c.client.CreateCR(testCtx, testVol1.Name, testVol1)) + + // update fail + res, err := c.handleManualDriveStorageGroupLabelRemoval(k8s.UpdateFailCtx, c.log, testDrive1, testAC1, sg1Name) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // redo + assert.Nil(t, c.removeDriveStorageGroupLabel(testCtx, c.log, testDrive1)) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Empty(t, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.handleManualDriveStorageGroupLabelRemoval(testCtx, c.log, testDrive1, testAC1, sg1Name) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg1Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, c.client.DeleteCR(testCtx, testDrive1)) + assert.Nil(t, c.client.DeleteCR(testCtx, testVol1)) + }) + + t.Run("cases of handling drive's manual sg label removal", func(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testAC1 := ac1.DeepCopy() + testAC1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg1Name} + + testSG1 := sg1.DeepCopy() + testSG1.Status.Phase = apiV1.StorageGroupPhaseSynced + + // Fail to read testSG1 + res, err := c.handleManualDriveStorageGroupLabelRemoval(k8s.GetFailCtx, c.log, testDrive1, testAC1, sg1Name) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // testSG1 not found from fake k8s, fail to remove sg label from testAC1 + res, err = c.handleManualDriveStorageGroupLabelRemoval(testCtx, c.log, testDrive1, testAC1, sg1Name) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // successfully get testSG1, and need to restore the testDrive1's sg label removal + // update testDrive1's sg label with error + assert.Nil(t, c.client.CreateCR(testCtx, testSG1.Name, testSG1)) + + res, err = c.handleManualDriveStorageGroupLabelRemoval(testCtx, c.log, testDrive1, testAC1, sg1Name) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // redo the restore and succeed now + delete(testDrive1.Labels, apiV1.StorageGroupLabelKey) + + assert.Nil(t, c.client.CreateCR(testCtx, testDrive1.Name, testDrive1)) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Empty(t, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.handleManualDriveStorageGroupLabelRemoval(testCtx, c.log, testDrive1, testAC1, sg1Name) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg1Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, c.client.DeleteCR(testCtx, testDrive1)) + assert.Nil(t, c.client.DeleteCR(testCtx, testSG1)) + }) +} + +func TestStorageGroupController_reconcileDriveStorageGroupLabel(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + c := NewController(kubeClient, kubeClient, testLogger) + + t.Run("most cases of reconcileDriveStorageGroupLabel", func(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testDrive1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg2Name} + testDrive1.Spec.IsClean = true + + // error in getting LVG + res, err := c.reconcileDriveStorageGroupLabel(k8s.ListFailCtx, testDrive1) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // LVG not nil and ac really doesn't exist + testLVG1 := lvg1.DeepCopy() + assert.Nil(t, c.client.CreateCR(testCtx, testLVG1.Name, testLVG1)) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{RequeueAfter: normalRequeueInterval}, res) + + assert.Nil(t, c.client.DeleteCR(testCtx, testLVG1)) + + // ac with same label + testAC1 := ac1.DeepCopy() + testAC1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg2Name} + assert.Nil(t, c.client.CreateCR(testCtx, testAC1.Name, testAC1)) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + // clean ac without label, then redo ac sg labeling + assert.Nil(t, c.removeACStorageGroupLabel(testCtx, c.log, testAC1)) + + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Empty(t, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.reconcileDriveStorageGroupLabel(k8s.UpdateFailCtx, testDrive1) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Equal(t, sg2Name, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + // not clean drive, need to remove selection of drive from its sg when do drive's separate sg label addition + testDrive1.Spec.IsClean = false + + assert.Nil(t, c.removeACStorageGroupLabel(testCtx, c.log, testAC1)) + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Empty(t, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + testSG2 := sg2.DeepCopy() + testSG2.Status.Phase = apiV1.StorageGroupPhaseSynced + assert.Nil(t, c.client.CreateCR(testCtx, testSG2.Name, testSG2)) + + // get sg and update sg status failure + res, err = c.reconcileDriveStorageGroupLabel(k8s.GetFailCtx, testDrive1) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + res, err = c.reconcileDriveStorageGroupLabel(k8s.UpdateFailCtx, testDrive1) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // remove selection of drive from its sg + // failure in final step + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testSG2.Name, "", testSG2)) + assert.Equal(t, apiV1.StorageGroupPhaseSyncing, testSG2.Status.Phase) + + // redo + assert.Nil(t, c.client.CreateCR(testCtx, testDrive1.Name, testDrive1)) + assert.Nil(t, c.updateDriveStorageGroupLabel(testCtx, c.log, testDrive1, sg2Name)) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg2Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Empty(t, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + // manually remove drive's sg label + assert.Nil(t, c.updateACStorageGroupLabel(testCtx, c.log, testAC1, sg1Name)) + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Equal(t, sg1Name, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Empty(t, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Empty(t, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + // restore drive's manual sg label change with update failure + assert.Nil(t, c.updateDriveStorageGroupLabel(testCtx, c.log, testDrive1, sg2Name)) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg2Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, c.updateACStorageGroupLabel(testCtx, c.log, testAC1, sg1Name)) + assert.Nil(t, c.client.ReadCR(testCtx, testAC1.Name, "", testAC1)) + assert.Equal(t, sg1Name, testAC1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.reconcileDriveStorageGroupLabel(k8s.UpdateFailCtx, testDrive1) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + + // redo + assert.Nil(t, c.updateDriveStorageGroupLabel(testCtx, c.log, testDrive1, sg2Name)) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg2Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + res, err = c.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.Nil(t, err) + assert.Equal(t, ctrl.Result{}, res) + assert.Nil(t, c.client.ReadCR(testCtx, testDrive1.Name, "", testDrive1)) + assert.Equal(t, sg1Name, testDrive1.Labels[apiV1.StorageGroupLabelKey]) + + assert.Nil(t, c.client.DeleteCR(testCtx, testDrive1)) + assert.Nil(t, c.client.DeleteCR(testCtx, testAC1)) + assert.Nil(t, c.client.DeleteCR(testCtx, testSG2)) + }) + + t.Run("reconcileDriveStorageGroupLabel with getting ac error", func(t *testing.T) { + testDrive1 := drive1.DeepCopy() + testDrive1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg2Name} + + mockK8sClient := &mocks.K8Client{} + kubeClient := k8s.NewKubeClient(mockK8sClient, testLogger, objects.NewObjectLogger(), testNs) + storageGroupController := NewController(kubeClient, kubeClient, testLogger) + + mockK8sClient.On("List", mock.Anything, &lvgcrd.LogicalVolumeGroupList{}, mock.Anything).Return(nil) + mockK8sClient.On("List", mock.Anything, &accrd.AvailableCapacityList{}, mock.Anything).Return(testErr) + + res, err := storageGroupController.reconcileDriveStorageGroupLabel(testCtx, testDrive1) + assert.NotNil(t, res) + assert.NotNil(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, res) + }) +} + +func TestStorageGroupController_addRemoveStorageGroupLabel(t *testing.T) { + kubeClient, err := k8s.GetFakeKubeClient(testNs, testLogger) + assert.Nil(t, err) + + storageGroupController := NewController(kubeClient, kubeClient, testLogger) + + t.Run("add/remove ac/drive storage group label with error", func(t *testing.T) { + testAC1 := ac1.DeepCopy() + assert.NotNil(t, storageGroupController.updateACStorageGroupLabel(testCtx, storageGroupController.log, testAC1, sg1Name)) + + testAC1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg1Name} + assert.NotNil(t, storageGroupController.removeACStorageGroupLabel(testCtx, storageGroupController.log, testAC1)) + + testDrive1 := drive1.DeepCopy() + assert.NotNil(t, storageGroupController.updateDriveStorageGroupLabel(testCtx, storageGroupController.log, testDrive1, sg1Name)) + testDrive1.Labels = map[string]string{apiV1.StorageGroupLabelKey: sg1Name} + assert.NotNil(t, storageGroupController.removeDriveStorageGroupLabel(testCtx, storageGroupController.log, testDrive1)) + }) +} diff --git a/variables.mk b/variables.mk index bea783aa6..a39c4fae1 100644 --- a/variables.mk +++ b/variables.mk @@ -5,7 +5,7 @@ PROJECT_PATH := ${PWD} ### common path CSI_OPERATOR_PATH=../csi-baremetal-operator CSI_CHART_CRDS_PATH=$(CSI_OPERATOR_PATH)/charts/csi-baremetal-operator/crds -CRD_OPTIONS ?= "crd:trivialVersions=true" +CRD_OPTIONS ?= crd ### version MAJOR := 1 @@ -68,7 +68,7 @@ GOPRIVATE_PART := GOPROXY_PART := GOPROXY=https://proxy.golang.org,direct ### go dependencies -CONTROLLER_GEN_VER := v0.5.0 +CONTROLLER_GEN_VER := v0.9.2 MOCKERY_VER := v2.9.4 PROTOC_GEN_GO_VER := v1.3.2 CLIENT_GO_VER := v0.22.5