From c851b551e8dcdd6fe9730d3a2fdf64db2bd07e4b Mon Sep 17 00:00:00 2001 From: Elena Gershkovich Date: Tue, 5 Mar 2024 13:40:46 +0200 Subject: [PATCH] Add VolumeGroupReplication support. Signed-off-by: Elena Gershkovich --- api/v1alpha1/drplacementcontrol_types.go | 8 + api/v1alpha1/volumereplicationgroup_types.go | 3 + api/v1alpha1/zz_generated.deepcopy.go | 34 ++ ...endr.openshift.io_drplacementcontrols.yaml | 10 + ..._protectedvolumereplicationgrouplists.yaml | 10 + ....openshift.io_volumereplicationgroups.yaml | 10 + config/dr-cluster/rbac/role.yaml | 2 + config/rbac/role.yaml | 2 + ...hift.io_volumegroupreplicationclasses.yaml | 76 +++ ...ift.io_volumegroupreplicationcontents.yaml | 176 +++++++ ....openshift.io_volumegroupreplications.yaml | 257 ++++++++++ .../drplacementcontrol_controller.go | 6 +- .../volumereplicationgroup_controller.go | 136 +++++- internal/controller/vrg_volrep.go | 460 +++++++++++++----- internal/controller/vrg_volrep_test.go | 423 +++++++++++++++- 15 files changed, 1485 insertions(+), 128 deletions(-) create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml create mode 100644 hack/test/replication.storage.openshift.io_volumegroupreplications.yaml diff --git a/api/v1alpha1/drplacementcontrol_types.go b/api/v1alpha1/drplacementcontrol_types.go index b533de1af..68cdc6334 100644 --- a/api/v1alpha1/drplacementcontrol_types.go +++ b/api/v1alpha1/drplacementcontrol_types.go @@ -167,6 +167,10 @@ type PlacementDecision struct { ClusterNamespace string `json:"clusterNamespace,omitempty"` } +type Groups struct { + Grouped []string `json:"grouped,omitempty"` +} + // VRGResourceMeta represents the VRG resource. type VRGResourceMeta struct { // Kind is the kind of the Kubernetes resource. @@ -185,6 +189,10 @@ type VRGResourceMeta struct { //+optional ProtectedPVCs []string `json:"protectedpvcs,omitempty"` + // List of CGs that are protected by the VRG resource + //+optional + PVCGroups []Groups `json:"pvcgroups,omitempty"` + // ResourceVersion is a value used to identify the version of the // VRG resource object //+optional diff --git a/api/v1alpha1/volumereplicationgroup_types.go b/api/v1alpha1/volumereplicationgroup_types.go index 0332a0275..fb4da6d87 100644 --- a/api/v1alpha1/volumereplicationgroup_types.go +++ b/api/v1alpha1/volumereplicationgroup_types.go @@ -327,6 +327,9 @@ type VolumeReplicationGroupStatus struct { // All the protected pvcs ProtectedPVCs []ProtectedPVC `json:"protectedPVCs,omitempty"` + // List of CGs that are protected by the VRG resource + //+optional + PVCGroups []Groups `json:"pvcgroups,omitempty"` // Conditions are the list of VRG's summary conditions and their status. Conditions []metav1.Condition `json:"conditions,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 411337c47..3fc1c0bd1 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -542,6 +542,26 @@ func (in *DRPolicyStatus) DeepCopy() *DRPolicyStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Groups) DeepCopyInto(out *Groups) { + *out = *in + if in.Grouped != nil { + in, out := &in.Grouped, &out.Grouped + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Groups. +func (in *Groups) DeepCopy() *Groups { + if in == nil { + return nil + } + out := new(Groups) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Identifier) DeepCopyInto(out *Identifier) { *out = *in @@ -1434,6 +1454,13 @@ func (in *VRGResourceMeta) DeepCopyInto(out *VRGResourceMeta) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.PVCGroups != nil { + in, out := &in.PVCGroups, &out.PVCGroups + *out = make([]Groups, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VRGResourceMeta. @@ -1637,6 +1664,13 @@ func (in *VolumeReplicationGroupStatus) DeepCopyInto(out *VolumeReplicationGroup (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.PVCGroups != nil { + in, out := &in.PVCGroups, &out.PVCGroups + *out = make([]Groups, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]v1.Condition, len(*in)) diff --git a/config/crd/bases/ramendr.openshift.io_drplacementcontrols.yaml b/config/crd/bases/ramendr.openshift.io_drplacementcontrols.yaml index f7c674476..0d39079e7 100644 --- a/config/crd/bases/ramendr.openshift.io_drplacementcontrols.yaml +++ b/config/crd/bases/ramendr.openshift.io_drplacementcontrols.yaml @@ -515,6 +515,16 @@ spec: items: type: string type: array + pvcgroups: + description: List of CGs that are protected by the VRG resource + items: + properties: + grouped: + items: + type: string + type: array + type: object + type: array resourceVersion: description: |- ResourceVersion is a value used to identify the version of the diff --git a/config/crd/bases/ramendr.openshift.io_protectedvolumereplicationgrouplists.yaml b/config/crd/bases/ramendr.openshift.io_protectedvolumereplicationgrouplists.yaml index 9756974cd..4c03ccedd 100644 --- a/config/crd/bases/ramendr.openshift.io_protectedvolumereplicationgrouplists.yaml +++ b/config/crd/bases/ramendr.openshift.io_protectedvolumereplicationgrouplists.yaml @@ -1057,6 +1057,16 @@ spec: type: string type: object type: array + pvcgroups: + description: List of CGs that are protected by the VRG resource + items: + properties: + grouped: + items: + type: string + type: array + type: object + type: array state: description: State captures the latest state of the replication operation diff --git a/config/crd/bases/ramendr.openshift.io_volumereplicationgroups.yaml b/config/crd/bases/ramendr.openshift.io_volumereplicationgroups.yaml index b3ae3c7b2..b1a7d0221 100644 --- a/config/crd/bases/ramendr.openshift.io_volumereplicationgroups.yaml +++ b/config/crd/bases/ramendr.openshift.io_volumereplicationgroups.yaml @@ -995,6 +995,16 @@ spec: type: string type: object type: array + pvcgroups: + description: List of CGs that are protected by the VRG resource + items: + properties: + grouped: + items: + type: string + type: array + type: object + type: array state: description: State captures the latest state of the replication operation type: string diff --git a/config/dr-cluster/rbac/role.yaml b/config/dr-cluster/rbac/role.yaml index 807c0b850..8ac042feb 100644 --- a/config/dr-cluster/rbac/role.yaml +++ b/config/dr-cluster/rbac/role.yaml @@ -162,6 +162,7 @@ rules: - apiGroups: - replication.storage.openshift.io resources: + - volumegroupreplicationclasses - volumereplicationclasses verbs: - get @@ -170,6 +171,7 @@ rules: - apiGroups: - replication.storage.openshift.io resources: + - volumegroupreplications - volumereplications verbs: - create diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index b736dc5a1..2dd481958 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -286,6 +286,7 @@ rules: - apiGroups: - replication.storage.openshift.io resources: + - volumegroupreplicationclasses - volumereplicationclasses verbs: - get @@ -294,6 +295,7 @@ rules: - apiGroups: - replication.storage.openshift.io resources: + - volumegroupreplications - volumereplications verbs: - create diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml new file mode 100644 index 000000000..b7e6b1bf4 --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplicationclasses.yaml @@ -0,0 +1,76 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplicationclasses.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplicationClass + listKind: VolumeGroupReplicationClassList + plural: volumegroupreplicationclasses + singular: volumegroupreplicationclass + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplicationClass is the Schema for the volumegroupreplicationclasses + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + VolumeGroupReplicationClassSpec specifies parameters that an underlying storage system uses + when creating a volumegroup replica. A specific VolumeGroupReplicationClass is used by specifying + its name in a VolumeGroupReplication object. + properties: + parameters: + additionalProperties: + type: string + description: |- + Parameters is a key-value map with storage provisioner specific configurations for + creating volume group replicas + type: object + x-kubernetes-validations: + - message: parameters are immutable + rule: self == oldSelf + provisioner: + description: Provisioner is the name of storage provisioner + type: string + x-kubernetes-validations: + - message: provisioner is immutable + rule: self == oldSelf + required: + - provisioner + type: object + x-kubernetes-validations: + - message: parameters are immutable + rule: has(self.parameters) == has(oldSelf.parameters) + status: + description: VolumeGroupReplicationClassStatus defines the observed state + of VolumeGroupReplicationClass + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml new file mode 100644 index 000000000..be67a9e33 --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplicationcontents.yaml @@ -0,0 +1,176 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplicationcontents.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplicationContent + listKind: VolumeGroupReplicationContentList + plural: volumegroupreplicationcontents + singular: volumegroupreplicationcontent + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplicationContent is the Schema for the volumegroupreplicationcontents + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VolumeGroupReplicationContentSpec defines the desired state + of VolumeGroupReplicationContent + properties: + provisioner: + description: |- + provisioner is the name of the CSI driver used to create the physical + volume group on + the underlying storage system. + This MUST be the same as the name returned by the CSI GetPluginName() call for + that driver. + Required. + type: string + source: + description: |- + Source specifies whether the snapshot is (or should be) dynamically provisioned + or already exists, and just requires a Kubernetes object representation. + This field is immutable after creation. + Required. + properties: + volumeHandles: + description: |- + VolumeHandles is a list of volume handles on the backend to be grouped + and replicated. + items: + type: string + type: array + required: + - volumeHandles + type: object + volumeGroupReplicationClassName: + description: |- + VolumeGroupReplicationClassName is the name of the VolumeGroupReplicationClass from + which this group replication was (or will be) created. + type: string + volumeGroupReplicationHandle: + description: |- + VolumeGroupReplicationHandle is a unique id returned by the CSI driver + to identify the VolumeGroupReplication on the storage system. + type: string + volumeGroupReplicationRef: + description: |- + VolumeGroupreplicationRef specifies the VolumeGroupReplication object to which this + VolumeGroupReplicationContent object is bound. + VolumeGroupReplication.Spec.VolumeGroupReplicationContentName field must reference to + this VolumeGroupReplicationContent's name for the bidirectional binding to be valid. + For a pre-existing VolumeGroupReplicationContent object, name and namespace of the + VolumeGroupReplication object MUST be provided for binding to happen. + This field is immutable after creation. + Required. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + TODO: this design is not final and this field is subject to change in the future. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: both volumeGroupReplicationRef.name and volumeGroupReplicationRef.namespace + must be set + rule: has(self.name) && has(self.__namespace__) + - message: volumeGroupReplicationRef is immutable + rule: self == oldSelf + required: + - provisioner + - source + - volumeGroupReplicationHandle + - volumeGroupReplicationRef + type: object + status: + description: VolumeGroupReplicationContentStatus defines the status of + VolumeGroupReplicationContent + properties: + persistentVolumeRefList: + description: |- + PersistentVolumeRefList is the list of of PV for the group replication + The maximum number of allowed PV in the group is 100. + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + TODO: Add other useful fields. apiVersion, kind, uid? + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Drop `kubebuilder:default` when controller-gen doesn't need it https://github.com/kubernetes-sigs/kubebuilder/issues/3896. + type: string + type: object + x-kubernetes-map-type: atomic + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml b/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml new file mode 100644 index 000000000..d8f5757cb --- /dev/null +++ b/hack/test/replication.storage.openshift.io_volumegroupreplications.yaml @@ -0,0 +1,257 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.15.0 + name: volumegroupreplications.replication.storage.openshift.io +spec: + group: replication.storage.openshift.io + names: + kind: VolumeGroupReplication + listKind: VolumeGroupReplicationList + plural: volumegroupreplications + singular: volumegroupreplication + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: VolumeGroupReplication is the Schema for the volumegroupreplications + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: VolumeGroupReplicationSpec defines the desired state of VolumeGroupReplication + properties: + autoResync: + default: false + description: |- + AutoResync represents the group to be auto resynced when + ReplicationState is "secondary" + type: boolean + replicationState: + description: |- + ReplicationState represents the replication operation to be performed on the group. + Supported operations are "primary", "secondary" and "resync" + enum: + - primary + - secondary + - resync + type: string + source: + description: |- + Source specifies where a group replications will be created from. + This field is immutable after creation. + Required. + properties: + selector: + description: |- + Selector is a label query over persistent volume claims that are to be + grouped together for replication. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: selector is immutable + rule: self == oldSelf + type: object + x-kubernetes-validations: + - message: source is immutable + rule: self == oldSelf + volumeGroupReplicationClassName: + description: volumeGroupReplicationClassName is the volumeGroupReplicationClass + name for this VolumeGroupReplication resource + type: string + x-kubernetes-validations: + - message: volumeGroupReplicationClassName is immutable + rule: self == oldSelf + volumeGroupReplicationContentName: + description: Name of the VolumeGroupReplicationContent object created + for this volumeGroupReplication + type: string + x-kubernetes-validations: + - message: volumeGroupReplicationContentName is immutable + rule: self == oldSelf + volumeReplicationClassName: + description: volumeReplicationClassName is the volumeReplicationClass + name for VolumeReplication object + type: string + x-kubernetes-validations: + - message: volumReplicationClassName is immutable + rule: self == oldSelf + volumeReplicationName: + description: Name of the VolumeReplication object created for this + volumeGroupReplication + type: string + x-kubernetes-validations: + - message: volumeReplicationName is immutable + rule: self == oldSelf + required: + - autoResync + - replicationState + - source + - volumeGroupReplicationClassName + - volumeReplicationClassName + type: object + status: + description: VolumeGroupReplicationStatus defines the observed state of + VolumeGroupReplication + properties: + conditions: + description: Conditions are the list of conditions and their status. + items: + description: "Condition contains details for one aspect of the current + state of this API Resource.\n---\nThis struct is intended for + direct use as an array at the field path .status.conditions. For + example,\n\n\n\ttype FooStatus struct{\n\t // Represents the + observations of a foo's current state.\n\t // Known .status.conditions.type + are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // + +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t + \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" + patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t + \ // other fields\n\t}" + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + --- + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be + useful (see .node.status.conditions), the ability to deconflict is important. + The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastCompletionTime: + format: date-time + type: string + lastStartTime: + format: date-time + type: string + lastSyncBytes: + format: int64 + type: integer + lastSyncDuration: + type: string + lastSyncTime: + format: date-time + type: string + message: + type: string + observedGeneration: + description: observedGeneration is the last generation change the + operator has dealt with + format: int64 + type: integer + state: + description: State captures the latest state of the replication operation. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go index f3399490a..0b4719627 100644 --- a/internal/controller/drplacementcontrol_controller.go +++ b/internal/controller/drplacementcontrol_controller.go @@ -1311,7 +1311,7 @@ func (r *DRPlacementControlReconciler) updateDRPCStatus( // - The status update is NOT intended for a VRG that should be cleaned up on a peer cluster // It also updates DRPC ConditionProtected based on current state of VRG. // -//nolint:funlen +//nolint:funlen,cyclop func (r *DRPlacementControlReconciler) updateResourceCondition( ctx context.Context, drpc *rmn.DRPlacementControl, userPlacement client.Object, ) { @@ -1374,6 +1374,10 @@ func (r *DRPlacementControlReconciler) updateResourceCondition( drpc.Status.ResourceConditions.ResourceMeta.ProtectedPVCs = protectedPVCs + if rmnutil.IsCGEnabled(vrg.GetAnnotations()) { + drpc.Status.ResourceConditions.ResourceMeta.PVCGroups = vrg.Status.PVCGroups + } + if vrg.Status.LastGroupSyncTime != nil || drpc.Spec.Action != rmn.ActionRelocate { drpc.Status.LastGroupSyncTime = vrg.Status.LastGroupSyncTime drpc.Status.LastGroupSyncDuration = vrg.Status.LastGroupSyncDuration diff --git a/internal/controller/volumereplicationgroup_controller.go b/internal/controller/volumereplicationgroup_controller.go index d96432b04..3331b4b55 100644 --- a/internal/controller/volumereplicationgroup_controller.go +++ b/internal/controller/volumereplicationgroup_controller.go @@ -60,6 +60,8 @@ type VolumeReplicationGroupReconciler struct { } // SetupWithManager sets up the controller with the Manager. +// +// nolint: funlen func (r *VolumeReplicationGroupReconciler) SetupWithManager( mgr ctrl.Manager, ramenConfig *ramendrv1alpha1.RamenConfig, ) error { @@ -99,8 +101,13 @@ func (r *VolumeReplicationGroupReconciler) SetupWithManager( handler.EnqueueRequestsFromMapFunc(r.VRMapFunc), builder.WithPredicates(util.CreateOrDeleteOrResourceVersionUpdatePredicate{}), ). + Watches(&volrep.VolumeGroupReplication{}, + handler.EnqueueRequestsFromMapFunc(r.VGRMapFunc), + builder.WithPredicates(util.CreateOrDeleteOrResourceVersionUpdatePredicate{}), + ). Watches(&corev1.ConfigMap{}, handler.EnqueueRequestsFromMapFunc(r.configMapFun)). - Owns(&volrep.VolumeReplication{}) + Owns(&volrep.VolumeReplication{}). + Owns(&volrep.VolumeGroupReplication{}) if !ramenConfig.VolSync.Disabled { r.Log.Info("VolSync enabled; adding owns and watches") @@ -365,6 +372,8 @@ func filterPVC(reader client.Reader, pvc *corev1.PersistentVolumeClaim, log logr // +kubebuilder:rbac:groups=ramendr.openshift.io,resources=volumereplicationgroups/finalizers,verbs=update // +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplications,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumereplicationclasses,verbs=get;list;watch +// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumegroupreplications,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=replication.storage.openshift.io,resources=volumegroupreplicationclasses,verbs=get;list;watch // +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups=storage.k8s.io,resources=volumeattachments,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch @@ -409,6 +418,7 @@ func (r *VolumeReplicationGroupReconciler) Reconcile(ctx context.Context, req ct volRepPVCs: []corev1.PersistentVolumeClaim{}, volSyncPVCs: []corev1.PersistentVolumeClaim{}, replClassList: &volrep.VolumeReplicationClassList{}, + grpReplClassList: &volrep.VolumeGroupReplicationClassList{}, namespacedName: req.NamespacedName.String(), objectStorers: make(map[string]cachedObjectStorer), storageClassCache: make(map[string]*storagev1.StorageClass), @@ -479,6 +489,7 @@ type VRGInstance struct { volRepPVCs []corev1.PersistentVolumeClaim volSyncPVCs []corev1.PersistentVolumeClaim replClassList *volrep.VolumeReplicationClassList + grpReplClassList *volrep.VolumeGroupReplicationClassList storageClassCache map[string]*storagev1.StorageClass vrgObjectProtected *metav1.Condition kubeObjectsProtected *metav1.Condition @@ -784,10 +795,21 @@ func (v *VRGInstance) updateReplicationClassList() error { return fmt.Errorf("failed to list Replication Classes, %w", err) } - v.vrcUpdated = true - v.log.Info("Number of Replication Classes", "count", len(v.replClassList.Items)) + if util.IsCGEnabled(v.instance.GetAnnotations()) { + if err := v.reconciler.List(v.ctx, v.grpReplClassList, listOptions...); err != nil { + v.log.Error(err, "Failed to list Group Replication Classes", + "labeled", labels.Set(labelSelector.MatchLabels)) + + return fmt.Errorf("failed to list Group Replication Classes, %w", err) + } + + v.log.Info("Number of Group Replication Classes", "count", len(v.grpReplClassList.Items)) + } + + v.vrcUpdated = true + return nil } @@ -832,6 +854,7 @@ func (v *VRGInstance) validateSyncPVCs(pvcList *corev1.PersistentVolumeClaimList return nil } +// nolint:gocognit func (v *VRGInstance) separatePVCsUsingOnlySC(storageClass *storagev1.StorageClass, pvc *corev1.PersistentVolumeClaim) { v.log.Info("separating PVC using only sc provisioner") @@ -839,13 +862,30 @@ func (v *VRGInstance) separatePVCsUsingOnlySC(storageClass *storagev1.StorageCla pvcEnabledForVolSync := util.IsPVCMarkedForVolSync(v.instance.GetAnnotations()) + //nolint:nestif if !pvcEnabledForVolSync { - for _, replicationClass := range v.replClassList.Items { - if storageClass.Provisioner == replicationClass.Spec.Provisioner { + separatePVCs := func(provisioner string) { + if storageClass.Provisioner == provisioner { v.volRepPVCs = append(v.volRepPVCs, *pvc) replicationClassMatchFound = true + } + } + + if util.IsCGEnabled(v.instance.GetAnnotations()) { + for _, replicationClass := range v.grpReplClassList.Items { + separatePVCs(replicationClass.Spec.Provisioner) + + if replicationClassMatchFound { + break + } + } + } else { + for _, replicationClass := range v.replClassList.Items { + separatePVCs(replicationClass.Spec.Provisioner) - break + if replicationClassMatchFound { + break + } } } } @@ -942,20 +982,40 @@ func (v *VRGInstance) separateAsyncPVCs(pvcList *corev1.PersistentVolumeClaimLis func (v *VRGInstance) findReplicationClassUsingPeerClass( peerClass *ramendrv1alpha1.PeerClass, storageClass *storagev1.StorageClass, -) *volrep.VolumeReplicationClass { - for _, replicationClass := range v.replClassList.Items { +) client.Object { + findMatchingReplicationClass := func(replicationClass client.Object, provisioner string) client.Object { rIDFromReplicationClass := replicationClass.GetLabels()[VolumeReplicationIDLabel] sIDfromReplicationClass := replicationClass.GetLabels()[StorageIDLabel] matched := sIDfromReplicationClass == storageClass.GetLabels()[StorageIDLabel] && rIDFromReplicationClass == peerClass.ReplicationID && - replicationClass.Spec.Provisioner == storageClass.Provisioner + provisioner == storageClass.Provisioner if matched { - return &replicationClass + return replicationClass } - continue + return nil + } + + if !util.IsCGEnabled(v.instance.GetAnnotations()) { + for index := range v.replClassList.Items { + replicationClass := &v.replClassList.Items[index] + + provisioner := replicationClass.Spec.Provisioner + if result := findMatchingReplicationClass(replicationClass, provisioner); result != nil { + return result + } + } + } else { + for index := range v.grpReplClassList.Items { + replicationClass := &v.grpReplClassList.Items[index] + + provisioner := replicationClass.Spec.Provisioner + if result := findMatchingReplicationClass(replicationClass, provisioner); result != nil { + return result + } + } } return nil @@ -1393,6 +1453,17 @@ func (v *VRGInstance) updateVRGConditionsAndStatus(result ctrl.Result) ctrl.Resu func (v *VRGInstance) updateVRGStatus(result ctrl.Result) ctrl.Result { v.log.Info("Updating VRG status") + if util.IsCGEnabled(v.instance.GetAnnotations()) { + if err := v.updateProtectedCGs(); err != nil { + v.log.Info(fmt.Sprintf("Failed to update protected PVC groups (%v/%s)", + err, v.instance.Name)) + + result.Requeue = true + + return result + } + } + v.updateStatusState() v.instance.Status.ObservedGeneration = v.instance.Generation @@ -1500,6 +1571,35 @@ func getStatusStateFromSpecState(state ramendrv1alpha1.ReplicationState) ramendr } } +func (v *VRGInstance) updateProtectedCGs() error { + var vgrs volrep.VolumeGroupReplicationList + if err := v.reconciler.List(v.ctx, &vgrs); err != nil { + return fmt.Errorf("failed to list Volume Group Replications, %w", err) + } + + var pvcGroups []ramendrv1alpha1.Groups + + for idx := range vgrs.Items { + vgr := &vgrs.Items[idx] + + group := ramendrv1alpha1.Groups{Grouped: []string{}} + + for _, ref := range vgr.Status.PersistentVolumeClaimsRefList { + if ref.Name != "" { + group.Grouped = append(group.Grouped, ref.Name) + } + } + + if len(group.Grouped) > 0 { + pvcGroups = append(pvcGroups, group) + } + } + + v.instance.Status.PVCGroups = pvcGroups + + return nil +} + // updateVRGConditions updates three summary conditions VRGConditionTypeDataReady, // VRGConditionTypeClusterDataProtected and VRGConditionDataProtected at the VRG // level based on the corresponding PVC level conditions in the VRG: @@ -1722,6 +1822,20 @@ func filterVRGDependentObjects(reader client.Reader, obj client.Object, log logr return req } +func (r *VolumeReplicationGroupReconciler) VGRMapFunc(ctx context.Context, obj client.Object) []reconcile.Request { + log := ctrl.Log.WithName("vgrmap").WithName("VolumeReplicationGroup") + + vgr, ok := obj.(*volrep.VolumeGroupReplication) + if !ok { + log.Info("map function received non-vgr resource") + + return []reconcile.Request{} + } + + return filterVRGDependentObjects(r.Client, obj, + log.WithValues("vgr", types.NamespacedName{Name: vgr.Name, Namespace: vgr.Namespace})) +} + func (r *VolumeReplicationGroupReconciler) VRMapFunc(ctx context.Context, obj client.Object) []reconcile.Request { log := ctrl.Log.WithName("vrmap").WithName("VolumeReplicationGroup") diff --git a/internal/controller/vrg_volrep.go b/internal/controller/vrg_volrep.go index 1bdbdf236..994f50b3b 100644 --- a/internal/controller/vrg_volrep.go +++ b/internal/controller/vrg_volrep.go @@ -18,6 +18,7 @@ import ( storagev1 "k8s.io/api/storage/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -58,7 +59,11 @@ func logWithPvcName(log logr.Logger, pvc *corev1.PersistentVolumeClaim) logr.Log // reconcileVolRepsAsPrimary creates/updates VolumeReplication CR for each pvc // from pvcList. If it fails (even for one pvc), then requeue is set to true. +// +//nolint:funlen,gocognit func (v *VRGInstance) reconcileVolRepsAsPrimary() { + readyForVRProtectionPVCs := make([]corev1.PersistentVolumeClaim, 0) + for idx := range v.volRepPVCs { pvc := &v.volRepPVCs[idx] pvcNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} @@ -85,6 +90,14 @@ func (v *VRGInstance) reconcileVolRepsAsPrimary() { continue } + readyForVRProtectionPVCs = append(readyForVRProtectionPVCs, *pvc) + } + + for idx := range readyForVRProtectionPVCs { + pvc := &readyForVRProtectionPVCs[idx] + pvcNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} + log := v.log.WithValues("pvc", pvcNamespacedName.String()) + // If VR did not reach primary state, it is fine to still upload the PV and continue processing requeueResult, _, err := v.processVRAsPrimary(pvcNamespacedName, pvc, log) if requeueResult { @@ -275,7 +288,9 @@ func (v *VRGInstance) updateProtectedPVCs(pvc *corev1.PersistentVolumeClaim) err pvcNamespacedName, err) } - volumeReplicationClass, err := v.selectVolumeReplicationClass(pvcNamespacedName) + selectVolumeGroup := rmnutil.IsCGEnabled(v.instance.GetAnnotations()) + + volumeReplicationClass, err := v.selectVolumeReplicationClass(pvcNamespacedName, selectVolumeGroup) if err != nil { return fmt.Errorf("failed to find the appropriate VolumeReplicationClass (%s) %w", v.instance.Name, err) @@ -301,7 +316,7 @@ func (v *VRGInstance) updateProtectedPVCs(pvc *corev1.PersistentVolumeClaim) err func setPVCStorageIdentifiers( protectedPVC *ramendrv1alpha1.ProtectedPVC, storageClass *storagev1.StorageClass, - volumeReplicationClass *volrep.VolumeReplicationClass, + volumeReplicationClass client.Object, ) { protectedPVC.StorageIdentifiers.StorageProvisioner = storageClass.Provisioner @@ -369,6 +384,7 @@ func (v *VRGInstance) preparePVCForVRProtection(pvc *corev1.PersistentVolumeClai return v.protectPVC(pvc, log), !skip } +//nolint:funlen,cyclop func (v *VRGInstance) protectPVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) bool { const requeue = true @@ -421,6 +437,17 @@ func (v *VRGInstance) protectPVC(pvc *corev1.PersistentVolumeClaim, log logr.Log return requeue } + + if rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + if err := v.addConsistencyGroupLabel(pvc); err != nil { + log.Info("Requeuing, as adding label for consistency group failed", "errorValue", err) + + msg := "Failed to add label for consistency group to PVC" + v.updatePVCDataReadyCondition(pvc.Namespace, pvc.Name, VRGConditionReasonError, msg) + + return requeue + } + } } return !requeue @@ -506,6 +533,8 @@ func (v *VRGInstance) preparePVCForVRDeletion(pvc *corev1.PersistentVolumeClaim, delete(pvc.Annotations, pvcVRAnnotationProtectedKey) delete(pvc.Annotations, pvcVRAnnotationArchivedKey) + delete(pvc.Labels, ConsistencyGroupLabel) + log1 := log.WithValues("owner removed", ownerRemoved, "finalizer removed", finalizerRemoved) if err := v.reconciler.Update(v.ctx, pvc); err != nil { @@ -914,7 +943,7 @@ func (v *VRGInstance) undoPVCFinalizersAndPVRetention(pvc *corev1.PersistentVolu pvcNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} - if err := v.deleteVR(pvcNamespacedName, log); err != nil { + if err := v.deleteVR(pvcNamespacedName, pvc, log); err != nil { log.Info("Requeuing due to failure in finalizing VolumeReplication resource for PersistentVolumeClaim", "errorValue", err) @@ -954,10 +983,11 @@ func (v *VRGInstance) reconcileMissingVR(pvc *corev1.PersistentVolumeClaim, log return !vrMissing, !requeue } - volRep := &volrep.VolumeReplication{} + var volRep client.Object + vrNamespacedName := types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace} - err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + err := v.getVolumeReplication(pvc, vrNamespacedName, &volRep) if err == nil { if rmnutil.ResourceIsDeleted(volRep) { log.Info("Requeuing due to processing a deleted VR") @@ -986,6 +1016,70 @@ func (v *VRGInstance) reconcileMissingVR(pvc *corev1.PersistentVolumeClaim, log return vrMissing, !requeue } +func (v *VRGInstance) getVolumeReplication(pvc *corev1.PersistentVolumeClaim, + vrNamespacedName types.NamespacedName, volRep *client.Object, +) error { + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + vrNamespacedName.Name = rmnutil.TrimToK8sResourceNameLength(cg + v.instance.Name) + + *volRep = &volrep.VolumeGroupReplication{} + } else { + *volRep = &volrep.VolumeReplication{} + } + + return v.reconciler.Get(v.ctx, vrNamespacedName, *volRep) +} + +func (v *VRGInstance) createVolumeReplication(vrNamespacedName types.NamespacedName, + volumeReplicationClass client.Object, state volrep.ReplicationState, +) client.Object { + volRep := &volrep.VolumeReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + Labels: rmnutil.OwnerLabels(v.instance), + }, + Spec: volrep.VolumeReplicationSpec{ + DataSource: corev1.TypedLocalObjectReference{ + Kind: "PersistentVolumeClaim", + Name: vrNamespacedName.Name, + APIGroup: new(string), + }, + ReplicationState: state, + VolumeReplicationClass: volumeReplicationClass.GetName(), + AutoResync: v.autoResync(state), + }, + } + + return volRep +} + +func (v *VRGInstance) createVolumeGroupReplication(storageID string, vrNamespacedName types.NamespacedName, + volumeReplicationClass client.Object, volumeGroupReplicationClass client.Object, state volrep.ReplicationState, +) client.Object { + selector := metav1.AddLabelToSelector(&v.recipeElements.PvcSelector.LabelSelector, + ConsistencyGroupLabel, storageID) + + volRep := &volrep.VolumeGroupReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + Labels: rmnutil.OwnerLabels(v.instance), + }, + Spec: volrep.VolumeGroupReplicationSpec{ + ReplicationState: state, + VolumeReplicationClassName: volumeReplicationClass.GetName(), + VolumeGroupReplicationClassName: volumeGroupReplicationClass.GetName(), + Source: volrep.VolumeGroupReplicationSource{ + Selector: selector, + }, + }, + } + + return volRep +} + func (v *VRGInstance) deleteClusterDataInS3Stores(log logr.Logger) error { log.Info("Delete cluster data in", "s3Profiles", v.instance.Spec.S3Profiles) @@ -1134,9 +1228,9 @@ func (v *VRGInstance) createOrUpdateVR(vrNamespacedName types.NamespacedName, ) (bool, bool, error) { const requeue = true - volRep := &volrep.VolumeReplication{} + var volRep client.Object - err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + err := v.getVolumeReplication(pvc, vrNamespacedName, &volRep) if err != nil { if !k8serrors.IsNotFound(err) { log.Error(err, "Failed to get VolumeReplication resource", "resource", vrNamespacedName) @@ -1154,7 +1248,7 @@ func (v *VRGInstance) createOrUpdateVR(vrNamespacedName types.NamespacedName, } // Create VR for PVC - if err = v.createVR(vrNamespacedName, state); err != nil { + if err = v.createVR(vrNamespacedName, pvc, state); err != nil { log.Error(err, "Failed to create VolumeReplication resource", "resource", vrNamespacedName) rmnutil.ReportIfNotPresent(v.reconciler.eventRecorder, v.instance, corev1.EventTypeWarning, rmnutil.EventReasonVRCreateFailed, err.Error()) @@ -1193,20 +1287,86 @@ func (v *VRGInstance) autoResync(state volrep.ReplicationState) bool { // - a boolean indicating if a reconcile requeue is required // - a boolean indicating if VR is already at the desired state // - any errors during the process of updating the resource -func (v *VRGInstance) updateVR(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, +func (v *VRGInstance) updateVR(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + state volrep.ReplicationState, log logr.Logger, +) (bool, bool, error) { + isCGEnabled := func() bool { + _, ok := pvc.GetLabels()[ConsistencyGroupLabel] + + return ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) + } + + if isCGEnabled() { + return v.updateVolumeGroupReplication(pvc, volRep, state, log) + } + + return v.updateVolumeReplication(pvc, volRep, state, log) +} + +func (v *VRGInstance) updateVolumeGroupReplication(pvc *corev1.PersistentVolumeClaim, volRep client.Object, state volrep.ReplicationState, log logr.Logger, ) (bool, bool, error) { const requeue = true - // If state is already as desired, check the status - if volRep.Spec.ReplicationState == state && volRep.Spec.AutoResync == v.autoResync(state) { + log.Info(fmt.Sprintf("Update VolumeGroupReplication for PVC %s/%s", pvc.Namespace, pvc.Name)) + + vgr, ok := volRep.(*volrep.VolumeGroupReplication) + if !ok { + return requeue, false, fmt.Errorf("failed to cast volRep to *volrep.VolumeGroupReplication") + } + + if vgr.Spec.ReplicationState == state && vgr.Spec.AutoResync == v.autoResync(state) { + log.Info("VolumeGroupReplication and VolumeReplicationGroup state match. Proceeding to status check") + + return !requeue, v.checkVRStatus(pvc, volRep, &vgr.Status.VolumeReplicationStatus), nil + } + + vgr.Spec.ReplicationState = state + vgr.Spec.AutoResync = v.autoResync(state) + + return v.performUpdate(volRep, pvc, state, log) +} + +func (v *VRGInstance) updateVolumeReplication(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + state volrep.ReplicationState, log logr.Logger, +) (bool, bool, error) { + const requeue = true + + log.Info(fmt.Sprintf("Update VolumeReplication for PVC %s/%s", pvc.Namespace, pvc.Name)) + + vr, ok := volRep.(*volrep.VolumeReplication) + if !ok { + return requeue, false, fmt.Errorf("failed to cast volRep to *volrep.VolumeReplication") + } + + if vr.Spec.ReplicationState == state && vr.Spec.AutoResync == v.autoResync(state) { log.Info("VolumeReplication and VolumeReplicationGroup state and autoresync match. Proceeding to status check") - return !requeue, v.checkVRStatus(pvc, volRep), nil + // When the generation in the status is updated, VRG would get a reconcile + // as it owns VolumeReplication resource. + if volRep.GetGeneration() != vr.Status.ObservedGeneration { + v.log.Info(fmt.Sprintf("Generation mismatch in status for VolumeReplication resource (%s/%s)", + volRep.GetName(), volRep.GetNamespace())) + + msg := "VolumeReplication generation not updated in status" + v.updatePVCDataReadyCondition(pvc.Namespace, pvc.Name, VRGConditionReasonProgressing, msg) + + return !requeue, false, nil + } + + return !requeue, v.checkVRStatus(pvc, volRep, &vr.Status), nil } - volRep.Spec.ReplicationState = state - volRep.Spec.AutoResync = v.autoResync(state) + vr.Spec.ReplicationState = state + vr.Spec.AutoResync = v.autoResync(state) + + return v.performUpdate(volRep, pvc, state, log) +} + +func (v *VRGInstance) performUpdate(volRep client.Object, pvc *corev1.PersistentVolumeClaim, + state volrep.ReplicationState, log logr.Logger, +) (bool, bool, error) { + const requeue = true if err := v.reconciler.Update(v.ctx, volRep); err != nil { log.Error(err, "Failed to update VolumeReplication resource", @@ -1234,29 +1394,35 @@ func (v *VRGInstance) updateVR(pvc *corev1.PersistentVolumeClaim, volRep *volrep } // createVR creates a VolumeReplication CR with a PVC as its data source. -func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, state volrep.ReplicationState) error { - volumeReplicationClass, err := v.selectVolumeReplicationClass(vrNamespacedName) +func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, state volrep.ReplicationState, +) error { + volumeReplicationClass, err := v.selectVolumeReplicationClass(vrNamespacedName, false) if err != nil { return fmt.Errorf("failed to find the appropriate VolumeReplicationClass (%s) %w", v.instance.Name, err) } - volRep := &volrep.VolumeReplication{ - ObjectMeta: metav1.ObjectMeta{ - Name: vrNamespacedName.Name, - Namespace: vrNamespacedName.Namespace, - Labels: rmnutil.OwnerLabels(v.instance), - }, - Spec: volrep.VolumeReplicationSpec{ - DataSource: corev1.TypedLocalObjectReference{ - Kind: "PersistentVolumeClaim", - Name: vrNamespacedName.Name, - APIGroup: new(string), - }, - ReplicationState: state, - VolumeReplicationClass: volumeReplicationClass.GetName(), - AutoResync: v.autoResync(state), - }, + var volRep client.Object + + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + v.log.Info(fmt.Sprintf("Create VolumeGroupReplication for PVC %s/%s", pvc.Namespace, pvc.Name)) + + volumeGroupReplicationClass, err := v.selectVolumeReplicationClass(vrNamespacedName, true) + if err != nil { + return fmt.Errorf("failed to find the appropriate VolumeReplicationClass (%s) %w", + v.instance.Name, err) + } + + vrNamespacedName.Name = rmnutil.TrimToK8sResourceNameLength(cg + v.instance.Name) + + volRep = v.createVolumeGroupReplication(cg, vrNamespacedName, volumeReplicationClass, + volumeGroupReplicationClass, state) + } else { + v.log.Info(fmt.Sprintf("Create VolumeReplication for PVC %s/%s", pvc.Namespace, pvc.Name)) + + volRep = v.createVolumeReplication(vrNamespacedName, volumeReplicationClass, state) } if !vrgInAdminNamespace(v.instance, v.ramenConfig) { @@ -1284,18 +1450,18 @@ func (v *VRGInstance) createVR(vrNamespacedName types.NamespacedName, state volr // VolumeReplicationGroup has the same name as pvc. But in future if it changes // functions to be changed would be processVRAsPrimary(), processVRAsSecondary() // to either receive pvc NamespacedName or pvc itself as an additional argument. - -//nolint:funlen,cyclop,gocognit +// +//nolint:funlen,cyclop,gocognit,nestif,gocyclo func (v *VRGInstance) selectVolumeReplicationClass( - namespacedName types.NamespacedName, -) (*volrep.VolumeReplicationClass, error) { + namespacedName types.NamespacedName, selectVolumeGroup bool, +) (client.Object, error) { if err := v.updateReplicationClassList(); err != nil { v.log.Error(err, "Failed to get VolumeReplicationClass list") return nil, fmt.Errorf("failed to get VolumeReplicationClass list") } - if len(v.replClassList.Items) == 0 { + if len(v.replClassList.Items) == 0 && len(v.grpReplClassList.Items) == 0 { v.log.Info("No VolumeReplicationClass available") return nil, fmt.Errorf("no VolumeReplicationClass available") @@ -1310,20 +1476,21 @@ func (v *VRGInstance) selectVolumeReplicationClass( namespacedName, err) } - matchingReplicationClassList := []*volrep.VolumeReplicationClass{} + matchingReplicationClassList := []client.Object{} - for index := range v.replClassList.Items { - replicationClass := &v.replClassList.Items[index] - schedulingInterval, found := replicationClass.Spec.Parameters[VRClassScheduleKey] + filterMatchingReplicationClass := func(replicationClass client.Object, parameters map[string]string, + provisioner string, + ) { + schedulingInterval, found := parameters[VRClassScheduleKey] - if storageClass.Provisioner != replicationClass.Spec.Provisioner || !found { + if storageClass.Provisioner != provisioner || !found { // skip this replication class if provisioner does not match or if schedule not found - continue + return } // ReplicationClass that matches both VRG schedule and pvc provisioner if schedulingInterval != v.instance.Spec.Async.SchedulingInterval { - continue + return } // if peerClasses does not exist, replicationClasses would not have SID in @@ -1335,17 +1502,33 @@ func (v *VRGInstance) selectVolumeReplicationClass( if len(v.instance.Spec.Async.PeerClasses) != 0 { sIDFromReplicationClass, exists := replicationClass.GetLabels()[StorageIDLabel] if !exists { - continue + return } if sIDFromReplicationClass != storageClass.GetLabels()[StorageIDLabel] { - continue + return } } matchingReplicationClassList = append(matchingReplicationClassList, replicationClass) } + if !selectVolumeGroup { + for index := range v.replClassList.Items { + replicationClass := &v.replClassList.Items[index] + + filterMatchingReplicationClass(replicationClass, replicationClass.Spec.Parameters, + replicationClass.Spec.Provisioner) + } + } else { + for index := range v.grpReplClassList.Items { + replicationClass := &v.grpReplClassList.Items[index] + + filterMatchingReplicationClass(replicationClass, replicationClass.Spec.Parameters, + replicationClass.Spec.Provisioner) + } + } + switch len(matchingReplicationClassList) { case 0: v.log.Info(fmt.Sprintf("No VolumeReplicationClass found to match provisioner and schedule %s/%s", @@ -1365,11 +1548,11 @@ func (v *VRGInstance) selectVolumeReplicationClass( // filterDefaultVRC filters the VRC list to return VRCs with default annotation // if the list contains more than one VRC. func (v *VRGInstance) filterDefaultVRC( - replicationClassList []*volrep.VolumeReplicationClass, -) (*volrep.VolumeReplicationClass, error) { + replicationClassList []client.Object, +) (client.Object, error) { v.log.Info("Found multiple matching VolumeReplicationClasses, filtering with default annotation") - filteredVRCs := []*volrep.VolumeReplicationClass{} + filteredVRCs := []client.Object{} for index := range replicationClassList { if replicationClassList[index].GetAnnotations()[defaultVRCAnnotationKey] == "true" { @@ -1381,8 +1564,8 @@ func (v *VRGInstance) filterDefaultVRC( switch len(filteredVRCs) { case 0: - v.log.Info(fmt.Sprintf("Multiple VolumeReplicationClass found, with no default annotation (%s/%s)", - replicationClassList[0].Spec.Provisioner, v.instance.Spec.Async.SchedulingInterval)) + v.log.Info(fmt.Sprintf("Multiple VolumeReplicationClass found, with no default annotation (%s)", + defaultVRCAnnotationKey)) return nil, fmt.Errorf("multiple VolumeReplicationClass found, with no default annotation, %s", defaultVRCAnnotationKey) @@ -1448,24 +1631,14 @@ func (v *VRGInstance) getStorageClass(namespacedName types.NamespacedName) (*sto // checkVRStatus checks if the VolumeReplication resource has the desired status for the // current generation and returns true if so, false otherwise -func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication) bool { - // When the generation in the status is updated, VRG would get a reconcile - // as it owns VolumeReplication resource. - if volRep.GetGeneration() != volRep.Status.ObservedGeneration { - v.log.Info(fmt.Sprintf("Generation mismatch in status for VolumeReplication resource (%s/%s)", - volRep.GetName(), volRep.GetNamespace())) - - msg := "VolumeReplication generation not updated in status" - v.updatePVCDataReadyCondition(pvc.Namespace, pvc.Name, VRGConditionReasonProgressing, msg) - - return false - } - +func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, +) bool { switch { case v.instance.Spec.ReplicationState == ramendrv1alpha1.Primary: - return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Primary) + return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Primary, status) case v.instance.Spec.ReplicationState == ramendrv1alpha1.Secondary: - return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Secondary) + return v.validateVRStatus(pvc, volRep, ramendrv1alpha1.Secondary, status) default: v.log.Info(fmt.Sprintf("invalid Replication State %s for VolumeReplicationGroup (%s:%s)", string(v.instance.Spec.ReplicationState), v.instance.Name, v.instance.Namespace)) @@ -1485,12 +1658,12 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v // deleted safely. // - Primary VRG: Validated condition is checked, and if successful the Completed conditions is also checked. // - Secondary VRG: Completed, Degraded and Resyncing conditions are checked and ensured healthy. -func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, - state ramendrv1alpha1.ReplicationState, +func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + state ramendrv1alpha1.ReplicationState, status *volrep.VolumeReplicationStatus, ) bool { // If primary, check the validated condition. if state == ramendrv1alpha1.Primary { - validated, condState := v.validateVRValidatedStatus(pvc, volRep) + validated, condState := v.validateVRValidatedStatus(pvc, volRep, status) if !validated && condState != conditionMissing { // If the condition is known, this VR will never complete since it failed initial validation. if condState == conditionKnown { @@ -1507,19 +1680,19 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep } // Check completed for both primary and secondary. - if !v.validateVRCompletedStatus(pvc, volRep, state) { + if !v.validateVRCompletedStatus(pvc, volRep, state, status) { return false } // if primary, all checks are completed. if state == ramendrv1alpha1.Secondary { - return v.validateAdditionalVRStatusForSecondary(pvc, volRep) + return v.validateAdditionalVRStatusForSecondary(pvc, volRep, status) } msg := "PVC in the VolumeReplicationGroup is ready for use" v.updatePVCDataReadyCondition(pvc.Namespace, pvc.Name, VRGConditionReasonReady, msg) v.updatePVCDataProtectedCondition(pvc.Namespace, pvc.Name, VRGConditionReasonReady, msg) - v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, &volRep.Status) + v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, status) v.log.Info(fmt.Sprintf("VolumeReplication resource %s/%s is ready for use", volRep.GetName(), volRep.GetNamespace())) @@ -1532,9 +1705,9 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep // - state: condition state func (v *VRGInstance) validateVRValidatedStatus( pvc *corev1.PersistentVolumeClaim, - volRep *volrep.VolumeReplication, + volRep client.Object, status *volrep.VolumeReplicationStatus, ) (bool, conditionState) { - conditionMet, condState, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue) + conditionMet, condState, errorMsg := isVRConditionMet(status, volrep.ConditionValidated, metav1.ConditionTrue) if !conditionMet { if errorMsg == "" { errorMsg = "VolumeReplication resource not validated" @@ -1554,10 +1727,10 @@ func (v *VRGInstance) validateVRValidatedStatus( // validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update // the PVC DataReady and Protected conditions. // Returns true if the condition is true, false if the condition is missing, stale, ubnknown, of false. -func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, - state ramendrv1alpha1.ReplicationState, +func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + state ramendrv1alpha1.ReplicationState, status *volrep.VolumeReplicationStatus, ) bool { - conditionMet, _, errorMsg := isVRConditionMet(volRep, volrep.ConditionCompleted, metav1.ConditionTrue) + conditionMet, _, errorMsg := isVRConditionMet(status, volrep.ConditionCompleted, metav1.ConditionTrue) if !conditionMet { if errorMsg == "" { var ( @@ -1605,17 +1778,17 @@ func (v *VRGInstance) validateVRCompletedStatus(pvc *corev1.PersistentVolumeClai // With 2nd condition being met, // ProtectedPVC.Conditions[DataReady] = True // ProtectedPVC.Conditions[DataProtected] = True -func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.PersistentVolumeClaim, - volRep *volrep.VolumeReplication, +func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, ) bool { v.updatePVCLastSyncCounters(pvc.Namespace, pvc.Name, nil) - conditionMet, _, _ := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionTrue) + conditionMet, _, _ := isVRConditionMet(status, volrep.ConditionResyncing, metav1.ConditionTrue) if !conditionMet { - return v.checkResyncCompletionAsSecondary(pvc, volRep) + return v.checkResyncCompletionAsSecondary(pvc, volRep, status) } - conditionMet, _, errorMsg := isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionTrue) + conditionMet, _, errorMsg := isVRConditionMet(status, volrep.ConditionDegraded, metav1.ConditionTrue) if !conditionMet { if errorMsg == "" { errorMsg = "VolumeReplication resource for pvc is not in Degraded condition while resyncing" @@ -1639,10 +1812,10 @@ func (v *VRGInstance) validateAdditionalVRStatusForSecondary(pvc *corev1.Persist } // checkResyncCompletionAsSecondary returns true if resync status is complete as secondary, false otherwise -func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim, - volRep *volrep.VolumeReplication, +func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVolumeClaim, volRep client.Object, + status *volrep.VolumeReplicationStatus, ) bool { - conditionMet, _, errorMsg := isVRConditionMet(volRep, volrep.ConditionResyncing, metav1.ConditionFalse) + conditionMet, _, errorMsg := isVRConditionMet(status, volrep.ConditionResyncing, metav1.ConditionFalse) if !conditionMet { if errorMsg == "" { errorMsg = "VolumeReplication resource for pvc not syncing as Secondary" @@ -1656,7 +1829,7 @@ func (v *VRGInstance) checkResyncCompletionAsSecondary(pvc *corev1.PersistentVol return false } - conditionMet, _, errorMsg = isVRConditionMet(volRep, volrep.ConditionDegraded, metav1.ConditionFalse) + conditionMet, _, errorMsg = isVRConditionMet(status, volrep.ConditionDegraded, metav1.ConditionFalse) if !conditionMet { if errorMsg == "" { errorMsg = "VolumeReplication resource for pvc is not syncing and is degraded as Secondary" @@ -1684,26 +1857,24 @@ type conditionState string const ( // Not found. conditionMissing = conditionState("missing") - // Found but its observed generation does not match the object generation. - conditionStale = conditionState("stale") - // Found, not stale, but its value is "Unknown". + // Found, but its value is "Unknown". conditionUnknown = conditionState("unknown") - // Found, not stale, and the value is "True" or "False" + // Found and the value is "True" or "False" conditionKnown = conditionState("known") ) // isVRConditionMet check if condition is met. // Returns 3 values: // - met: true if the condition status matches the desired status, otherwise false -// - state: one of (conditionMissing, conditionStale, conditionUnknown, conditionKnown) +// - state: one of (conditionMissing, conditionUnknown, conditionKnown) // - errorMsg: error message describing why the condition is not met -func isVRConditionMet(volRep *volrep.VolumeReplication, +func isVRConditionMet(status *volrep.VolumeReplicationStatus, conditionType string, desiredStatus metav1.ConditionStatus, ) (bool, conditionState, string) { met := true - volRepCondition := findCondition(volRep.Status.Conditions, conditionType) + volRepCondition := findCondition(status.Conditions, conditionType) if volRepCondition == nil { errorMsg := fmt.Sprintf("Failed to get the %s condition from status of VolumeReplication resource.", conditionType) @@ -1711,13 +1882,6 @@ func isVRConditionMet(volRep *volrep.VolumeReplication, return !met, conditionMissing, errorMsg } - if volRep.GetGeneration() != volRepCondition.ObservedGeneration { - errorMsg := fmt.Sprintf("Stale generation for condition %s from status of VolumeReplication resource.", - conditionType) - - return !met, conditionStale, errorMsg - } - if volRepCondition.Status == metav1.ConditionUnknown { errorMsg := fmt.Sprintf("Unknown status for condition %s from status of VolumeReplication resource.", conditionType) @@ -1858,9 +2022,9 @@ func (v *VRGInstance) updatePVCLastSyncCounters(pvcNamespace, pvcName string, st // ensureVRDeletedFromAPIServer adds an additional step to ensure that we wait for volumereplication deletion // from API server before moving ahead with vrg finalizer removal. -func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.NamespacedName, log logr.Logger) error { - volRep := &volrep.VolumeReplication{} - +func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.NamespacedName, + volRep client.Object, log logr.Logger, +) error { err := v.reconciler.APIReader.Get(v.ctx, vrNamespacedName, volRep) if err == nil { log.Info("Found VolumeReplication resource pending delete", "vr", volRep) @@ -1880,15 +2044,39 @@ func (v *VRGInstance) ensureVRDeletedFromAPIServer(vrNamespacedName types.Namesp } // deleteVR deletes a VolumeReplication instance if found -func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, log logr.Logger) error { - cr := &volrep.VolumeReplication{ - ObjectMeta: metav1.ObjectMeta{ - Name: vrNamespacedName.Name, - Namespace: vrNamespacedName.Namespace, - }, +func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) error { + var cr client.Object + + var err error + + cg, ok := pvc.GetLabels()[ConsistencyGroupLabel] + if ok && rmnutil.IsCGEnabled(v.instance.GetAnnotations()) { + log.Info("Delete VolumeGroupReplication for PVC %s/%s", pvc.Namespace, pvc.Name) + + vrNamespacedName.Name = rmnutil.TrimToK8sResourceNameLength(cg + v.instance.Name) + + cr, err = v.reconcileVolumeGroupReplicationForDeletion(vrNamespacedName, pvc, log) + if err != nil { + return err + } + + if cr == nil { + return nil + } + } else { + log.Info("Delete VolumeReplication for PVC %s/%s", pvc.Namespace, pvc.Name) + + cr = &volrep.VolumeReplication{ + ObjectMeta: metav1.ObjectMeta{ + Name: vrNamespacedName.Name, + Namespace: vrNamespacedName.Namespace, + }, + } } - err := v.reconciler.Delete(v.ctx, cr) + err = v.reconciler.Delete(v.ctx, cr) if err != nil { if !k8serrors.IsNotFound(err) { log.Error(err, "Failed to delete VolumeReplication resource") @@ -1902,7 +2090,57 @@ func (v *VRGInstance) deleteVR(vrNamespacedName types.NamespacedName, log logr.L v.log.Info("Deleted VolumeReplication resource %s/%s", vrNamespacedName.Namespace, vrNamespacedName.Name) - return v.ensureVRDeletedFromAPIServer(vrNamespacedName, log) + return v.ensureVRDeletedFromAPIServer(vrNamespacedName, cr, log) +} + +func (v *VRGInstance) reconcileVolumeGroupReplicationForDeletion(vrNamespacedName types.NamespacedName, + pvc *corev1.PersistentVolumeClaim, log logr.Logger, +) (client.Object, error) { + volRep := &volrep.VolumeGroupReplication{} + + err := v.reconciler.Get(v.ctx, vrNamespacedName, volRep) + if err != nil { + if !k8serrors.IsNotFound(err) { + log.Error(err, "Failed to get VolumeGroupReplication resource") + + return nil, fmt.Errorf("failed to get VolumeGroupReplication resource (%s/%s), %w", + vrNamespacedName.Namespace, vrNamespacedName.Name, err) + } + + return nil, nil + } + + pvcLabelSelector := volRep.Spec.Source.Selector + + // Found VGR, if there is only 1 PVC protected by it, we can delete + pvcList, err := rmnutil.ListPVCsByPVCSelector(v.ctx, v.reconciler.Client, v.log, + *pvcLabelSelector, + []string{vrNamespacedName.Namespace}, + v.instance.Spec.VolSync.Disabled, + ) + if err != nil { + return nil, err + } + + if len(pvcList.Items) > 1 { + log.Error(err, "VolumeGroupReplication resource is in use and cannot be deleted yet") + + return nil, nil + } + + selector, err := metav1.LabelSelectorAsSelector(pvcLabelSelector) + if err != nil { + return nil, err + } + + labelMatch := selector.Matches(labels.Set(pvc.GetLabels())) + if !labelMatch { + log.Info(fmt.Sprintf("PVC %s does not match VolumeGroupReplication label selector %v", pvc.Name, selector)) + + return nil, fmt.Errorf("PVC %s does not match VolumeGroupReplication label selector %v", pvc.Name, selector) + } + + return volRep, nil } func (v *VRGInstance) addProtectedAnnotationForPVC(pvc *corev1.PersistentVolumeClaim, log logr.Logger) error { diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index f544c7495..c62df5341 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -643,6 +643,7 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { vrgVRDeleteEnsureTestCase.promoteVolReps() vrgVRDeleteEnsureTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) }) + //nolint:dupl It("ensures orderly cleanup post VolumeReplication deletion", func() { By("Protecting the VolumeReplication resources from deletion") vrgVRDeleteEnsureTestCase.protectDeletionOfVolReps() @@ -848,6 +849,161 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { vrgDeleteCompletedVR.cleanupVRC() }) }) + // Test VRG finalizer removal during deletion is deferred till VGR is deleted + var vrgVGRDeleteEnsureTestCase *vrgTest + Context("in primary state", func() { + storageIDLabel := genStorageIDLabel(storageIDs[0]) + storageID := storageIDLabel[vrgController.StorageIDLabel] + vrcLabels := genVRCLabels(replicationIDs[0], storageID, "ramen") + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + storageIDLabels: storageIDLabel, + replicationClassLabels: vrcLabels, + } + It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgVGRDeleteEnsureTestCase = newVRGTestCaseCreate(1, createTestTemplate, true, false) + vrgVGRDeleteEnsureTestCase.repGroup = true + vrgVGRDeleteEnsureTestCase.VRGTestCaseStart() + }) + It("waits for VRG to create a VGR for all PVCs", func() { + expectedVRCount := 1 + vrgVGRDeleteEnsureTestCase.waitForVGRCountToMatch(expectedVRCount) + }) + It("waits for VRG status to match", func() { + vrgVGRDeleteEnsureTestCase.promoteVolGroupReps() + vrgVGRDeleteEnsureTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + //nolint:dupl + It("ensures orderly cleanup post VolumeGroupReplication deletion", func() { + By("Protecting the VolumeGroupReplication resources from deletion") + vrgVGRDeleteEnsureTestCase.protectDeletionOfVolGroupReps() + + By("Starting the VRG deletion process") + vrgVGRDeleteEnsureTestCase.cleanupPVCs(pvcProtectedVerify, vrAndPvcDeletionTimestampsRecentVerify) + vrg := vrgVGRDeleteEnsureTestCase.getVRG() + Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed()) + + By("Ensuring VRG is not deleted till VGR is present") + Consistently(apiReader.Get, vrgtimeout, vrginterval). + WithArguments(context.TODO(), vrgVGRDeleteEnsureTestCase.vrgNamespacedName(), vrg). + Should(Succeed(), "while waiting for VRG %v to remain undeleted", + vrgVGRDeleteEnsureTestCase.vrgNamespacedName()) + + By("Un-protecting the VolumeReplication resources to ensure their deletion") + vrgVGRDeleteEnsureTestCase.unprotectDeletionOfVolGroupReps() + + By("Ensuring VRG is deleted eventually as a result") + var i int + Eventually(func() error { + i++ + + return apiReader.Get(context.TODO(), vrgVGRDeleteEnsureTestCase.vrgNamespacedName(), vrg) + }, vrgtimeout*2, vrginterval). + Should(MatchError(errors.NewNotFound(schema.GroupResource{ + Group: ramendrv1alpha1.GroupVersion.Group, + Resource: "volumereplicationgroups", + }, vrgVGRDeleteEnsureTestCase.vrgName)), + "polled %d times for VRG to be garbage collected\n"+format.Object(*vrg, 1), i) + + vrgVGRDeleteEnsureTestCase.cleanupNamespace() + vrgVGRDeleteEnsureTestCase.cleanupSC() + vrgVGRDeleteEnsureTestCase.cleanupVGRC() + vrgVGRDeleteEnsureTestCase.cleanupVRC() + }) + }) + + // Try the simple case of creating VRG, PVC, PV and + // check whether VolGroupRep resources are created or not + var vrgCreateVGRTestCase *vrgTest + Context("in primary state", func() { + storageIDLabel := genStorageIDLabel(storageIDs[0]) + storageID := storageIDLabel[vrgController.StorageIDLabel] + vrcLabels := genVRCLabels(replicationIDs[0], storageID, "ramen") + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + storageIDLabels: storageIDLabel, + replicationClassLabels: vrcLabels, + } + It("sets up PVCs, PVs and VRGs", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgCreateVGRTestCase = newVRGTestCaseCreate(3, createTestTemplate, true, false) + vrgCreateVGRTestCase.repGroup = true + vrgCreateVGRTestCase.VRGTestCaseStart() + }) + It("waits for VRG to create a VGR for all PVCs", func() { + expectedVGRCount := 1 + vrgCreateVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("waits for VRG status to match", func() { + vrgCreateVGRTestCase.promoteVolGroupReps() + vrgCreateVGRTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + It("cleans up after testing", func() { + vrgCreateVGRTestCase.cleanupProtected() + }) + }) + + // Creates VRG. PVCs and PV are created with Status.Phase + // set to pending and VolGroupRep should not be created until + // all the PVCs and PVs are bound. So, these tests then + // change the Status.Phase of PVCs and PVs to bound state, + // and then checks whether VolGroupRep + // resource have been created or not. + var vrgPVCnotBoundVGRTestCase *vrgTest + Context("in primary state", func() { + storageIDLabel := genStorageIDLabel(storageIDs[0]) + storageID := storageIDLabel[vrgController.StorageIDLabel] + vrcLabels := genVRCLabels(replicationIDs[0], storageID, "ramen") + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimPending, + VolumeBindInfo: corev1.VolumePending, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + storageIDLabels: storageIDLabel, + replicationClassLabels: vrcLabels, + } + It("sets up PVCs, PVs and VRGs", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgPVCnotBoundVGRTestCase = newVRGTestCaseCreate(3, createTestTemplate, false, false) + vrgPVCnotBoundVGRTestCase.repGroup = true + vrgPVCnotBoundVGRTestCase.VRGTestCaseStart() + }) + It("expect no VR to be created as PVC not bound", func() { + expectedVGRCount := 0 + vrgPVCnotBoundVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("bind each pv to corresponding pvc", func() { + vrgPVCnotBoundVGRTestCase.bindPVAndPVC() + vrgPVCnotBoundVGRTestCase.verifyPVCBindingToPV(true) + }) + It("waits for VRG to create one VGR resource for all PVCs", func() { + expectedVGRCount := 1 + vrgPVCnotBoundVGRTestCase.waitForVGRCountToMatch(expectedVGRCount) + }) + It("waits for VRG status to match", func() { + vrgPVCnotBoundVGRTestCase.promoteVolGroupReps() + vrgPVCnotBoundVGRTestCase.verifyVRGStatusExpectation(true, vrgController.VRGConditionReasonReady) + }) + It("cleans up after testing", func() { + vrgPVCnotBoundVGRTestCase.cleanupProtected() + }) + }) // Try the simple case of creating VRG, PVC, PV and // check whether VolRep resources are created or not @@ -1500,6 +1656,7 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { // TODO: Add tests to ensure delete as Secondary (check if delete as Primary is tested above) }) +//nolint:maligned type vrgTest struct { uniqueID string namespace string @@ -1515,6 +1672,7 @@ type vrgTest struct { checkBind bool vrgFirst bool asyncPeerClasses []ramendrv1alpha1.PeerClass + repGroup bool template *template } @@ -1592,6 +1750,10 @@ func (v *vrgTest) VRGTestCaseStart() { v.createSC(v.template) v.createVRC(v.template) + if v.repGroup { + v.createVGRC(v.template) + } + if v.vrgFirst { v.createVRG() @@ -1882,11 +2044,14 @@ func (v *vrgTest) bindPVAndPVC() { i := i // capture i for use in closure // Bind PVC - pvc := getPVC(v.pvcNames[i]) - pvc.Status.Phase = corev1.ClaimBound - err = k8sClient.Status().Update(context.TODO(), pvc) - Expect(err).To(BeNil(), - "failed to update status of PVC %s", v.pvcNames[i]) + retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + pvc := getPVC(v.pvcNames[i]) + pvc.Status.Phase = corev1.ClaimBound + + return k8sClient.Status().Update(context.TODO(), pvc) + }) + + Expect(retryErr).NotTo(HaveOccurred()) } } @@ -1919,6 +2084,15 @@ func (v *vrgTest) createVRG() { S3Profiles: v.template.s3Profiles, }, } + + if v.repGroup { + if vrg.ObjectMeta.Annotations == nil { + vrg.ObjectMeta.Annotations = map[string]string{} + } + + vrg.ObjectMeta.Annotations[util.IsCGEnabledAnnotation] = "true" + } + err := k8sClient.Create(context.TODO(), vrg) expectedErr := k8serrors.NewAlreadyExists( schema.GroupResource{ @@ -2006,6 +2180,45 @@ func createVolumeReplicationClass(testTemplate *template) { } } +func (v *vrgTest) createVGRC(testTemplate *template) { + defaultAnnotations := map[string]string{} + defaultAnnotations["replication.storage.openshift.io/is-default-class"] = "true" + + By("creating VGRC " + testTemplate.replicationClassName) + + parameters := make(map[string]string) + + if testTemplate.schedulingInterval != "" { + parameters["schedulingInterval"] = testTemplate.schedulingInterval + } + + vrc := &volrep.VolumeGroupReplicationClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: testTemplate.replicationClassName, + Namespace: v.namespace, + Annotations: defaultAnnotations, + }, + Spec: volrep.VolumeGroupReplicationClassSpec{ + Provisioner: testTemplate.vrcProvisioner, + Parameters: parameters, + }, + } + + if len(testTemplate.replicationClassLabels) > 0 { + vrc.ObjectMeta.Labels = testTemplate.replicationClassLabels + } + + err := k8sClient.Create(context.TODO(), vrc) + if err != nil { + if errors.IsAlreadyExists(err) { + err = k8sClient.Get(context.TODO(), types.NamespacedName{Name: testTemplate.replicationClassName}, vrc) + } + } + + Expect(err).NotTo(HaveOccurred(), + "failed to create/get VolumeGroupReplicationClass %s", testTemplate.replicationClassName) +} + func createStorageClass(testTemplate *template) { By("creating StorageClass " + testTemplate.storageClassName) @@ -2286,6 +2499,7 @@ func (v *vrgTest) cleanup( v.cleanupNamespace() v.cleanupSC() v.cleanupVRC() + v.cleanupVGRC() } func (v *vrgTest) cleanupPVCs( @@ -2541,6 +2755,26 @@ func cleanupVolumeReplicationClass(testTemplate *template) { "failed to delete replicationClass %s", testTemplate.replicationClassName) } +func (v *vrgTest) cleanupVGRC() { + key := types.NamespacedName{ + Name: v.replicationClass, + Namespace: v.namespace, + } + + vgrc := &volrep.VolumeGroupReplicationClass{} + + err := k8sClient.Get(context.TODO(), key, vgrc) + if err != nil { + if errors.IsNotFound(err) { + return + } + } + + err = k8sClient.Delete(context.TODO(), vgrc) + Expect(err).To(BeNil(), + "failed to delete replicationClass %s", v.replicationClass) +} + func (v *vrgTest) cleanupNamespace() { By("deleting namespace " + v.namespace) @@ -2573,6 +2807,24 @@ func (v *vrgTest) waitForVRCountToMatch(vrCount int) { vrCount, v.vrgName, v.namespace) } +func (v *vrgTest) waitForVGRCountToMatch(vgrCount int) { + By("Waiting for VRs count to match " + v.namespace) + + Eventually(func() int { + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + volGroupRepList := &volrep.VolumeGroupReplicationList{} + err := k8sClient.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), + "failed to get a list of VGRs in namespace %s", v.namespace) + + return len(volGroupRepList.Items) + }, timeout, interval).Should(BeNumerically("==", vgrCount), + "while waiting for VGR count of %d in VRG %s of namespace %s", + vgrCount, v.vrgName, v.namespace) +} + func (v *vrgTest) promoteVolReps() { v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) { // VRG should not be ready until last VolRep is ready. @@ -2580,6 +2832,13 @@ func (v *vrgTest) promoteVolReps() { }) } +func (v *vrgTest) promoteVolGroupReps() { + v.promoteVolGroupRepsAndDo(promoteOptions{}, func(index, count int) { + // VRG should not be ready until last VolRep is ready. + v.verifyVRGStatusExpectation(index == count-1, vrgController.VRGConditionReasonReady) + }) +} + func (v *vrgTest) promoteVolRepsWithoutVrgStatusCheck() { v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) {}) } @@ -2593,6 +2852,7 @@ type promoteOptions struct { ValidatedFailed bool } +//nolint:dupl func (v *vrgTest) promoteVolRepsAndDo(options promoteOptions, do func(int, int)) { By("Promoting VolumeReplication resources " + v.namespace) @@ -2719,6 +2979,59 @@ func (v *vrgTest) deleteVolReps() { } } +// nolint: dupl +func (v *vrgTest) promoteVolGroupRepsAndDo(options promoteOptions, do func(int, int)) { + By("Promoting VolumeGroupReplication resources " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := k8sClient.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroup := volGroupRepList.Items[index] + + volGroupRepStatus := volrep.VolumeGroupReplicationStatus{ + VolumeReplicationStatus: volrep.VolumeReplicationStatus{ + Conditions: v.generateVRConditions(volGroup.Generation, options), + ObservedGeneration: volGroup.Generation, + State: volrep.PrimaryState, + Message: "volume is marked primary", + }, + } + + if options.ValidatedFailed { + volGroupRepStatus.State = volrep.UnknownState + volGroupRepStatus.Message = "precondition failed ..." + } + + volGroup.Status = volGroupRepStatus + + err = k8sClient.Status().Update(context.TODO(), &volGroup) + Expect(err).NotTo(HaveOccurred(), "failed to update the status of VolGroupRep %s", volGroup.Name) + + volrepKey := types.NamespacedName{ + Name: volGroup.Name, + Namespace: volGroup.Namespace, + } + + if options.ValidatedFailed { + if options.ValidatedMissing { + v.waitForVolGroupRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionFalse) + } else { + v.waitForVolGroupRepCondition(volrepKey, volrep.ConditionValidated, metav1.ConditionFalse) + } + } else { + v.waitForVolGroupRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue) + v.waitForVGRProtectedPVCs(volrepKey, volGroup.Spec.Source.Selector) + } + + do(index, len(volGroupRepList.Items)) + } +} + func (v *vrgTest) protectDeletionOfVolReps() { By("Adding a finalizer to protect VolumeReplication resources being deleted " + v.namespace) @@ -2738,6 +3051,25 @@ func (v *vrgTest) protectDeletionOfVolReps() { } } +func (v *vrgTest) protectDeletionOfVolGroupReps() { + By("Adding a finalizer to protect VolumeGroupReplication resources being deleted " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := apiReader.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VGRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroupRep := volGroupRepList.Items[index] + if controllerutil.AddFinalizer(client.Object(&volGroupRep), "testDeleteProtected") { + err = k8sClient.Update(context.TODO(), &volGroupRep) + Expect(err).NotTo(HaveOccurred(), "failed to add finalizer to VolGroupRep %s", volGroupRep.Name) + } + } +} + func (v *vrgTest) unprotectDeletionOfVolReps() { By("Removing finalizer that protects VolumeReplication resources from being deleted " + v.namespace) @@ -2757,6 +3089,25 @@ func (v *vrgTest) unprotectDeletionOfVolReps() { } } +func (v *vrgTest) unprotectDeletionOfVolGroupReps() { + By("Removing finalizer that protects VolumeGroupReplication resources from being deleted " + v.namespace) + + volGroupRepList := &volrep.VolumeGroupReplicationList{} + listOptions := &client.ListOptions{ + Namespace: v.namespace, + } + err := apiReader.List(context.TODO(), volGroupRepList, listOptions) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VGRs in namespace %s", v.namespace) + + for index := range volGroupRepList.Items { + volGroupRep := volGroupRepList.Items[index] + if controllerutil.RemoveFinalizer(client.Object(&volGroupRep), "testDeleteProtected") { + err = k8sClient.Update(context.TODO(), &volGroupRep) + Expect(err).NotTo(HaveOccurred(), "failed to remove finalizer to VolGroupRep %s", volGroupRep.Name) + } + } +} + func (v *vrgTest) waitForVolRepCondition( vrNamespacedName types.NamespacedName, conditionType string, @@ -2822,6 +3173,68 @@ func (v *vrgTest) waitForProtectedPVCs(vrNamespacedName types.NamespacedName) { "while waiting for protected pvc condition %s/%s", vrNamespacedName.Namespace, vrNamespacedName.Name) } +func (v *vrgTest) waitForVolGroupRepCondition( + vrNamespacedName types.NamespacedName, + conditionType string, + conditionStatus metav1.ConditionStatus, +) { + updatedVolGroupRep := volrep.VolumeGroupReplication{} + + Eventually(func() bool { + err := k8sClient.Get(context.TODO(), vrNamespacedName, &updatedVolGroupRep) + if err != nil { + return false + } + + condition := meta.FindStatusCondition(updatedVolGroupRep.Status.Conditions, conditionType) + if condition == nil { + return false + } + + return condition.Status == conditionStatus + }, vrgtimeout, vrginterval).Should(BeTrue(), + "failed to wait for volRep condition %q to become %q", conditionType, conditionStatus) +} + +func (v *vrgTest) waitForVGRProtectedPVCs(vrNamespacedName types.NamespacedName, + pvcLabelSelector *metav1.LabelSelector, +) { + Eventually(func() bool { + vrg := v.getVRG() + + pvcSelector, err := metav1.LabelSelectorAsSelector(pvcLabelSelector) + if err != nil { + return false + } + listOptions := []client.ListOption{ + client.MatchingLabelsSelector{ + Selector: pvcSelector, + }, + } + + pvcList := &corev1.PersistentVolumeClaimList{} + if err := k8sClient.List(context.TODO(), pvcList, listOptions...); err != nil { + return false + } + + protected := false + for idx := range pvcList.Items { + pvc := pvcList.Items[idx] + protectedPVC := vrgController.FindProtectedPVC(vrg, pvc.Namespace, pvc.Name) + if protectedPVC == nil { + continue + } + protected = v.checkProtectedPVCSuccess(vrg, protectedPVC) + if !protected { + return false + } + } + + return protected + }, vrgtimeout, vrginterval).Should(BeTrue(), + "while waiting for protected pvc condition %s/%s", vrNamespacedName.Namespace, vrNamespacedName.Name) +} + func (v *vrgTest) checkProtectedPVCSuccess(vrg *ramendrv1alpha1.VolumeReplicationGroup, protectedPVC *ramendrv1alpha1.ProtectedPVC, ) bool {