From d1dd8628a862c3cbaefe9ffaf89ec98dffa08041 Mon Sep 17 00:00:00 2001 From: akutz Date: Tue, 24 Sep 2024 09:49:15 -0500 Subject: [PATCH] Restrict cache for all ConfigMap/Secret objects This patch updates the manager cache for VM Operator so that ConfigMap/Secret resources from the kube-system and VM Op pod namespaces are cached, but ConfigMap/Secret resources in any other namespace are *not* cached. This patch means controllers that access ConfigMap/Secret resources in these namespaces no longer need to create separate caches. Instead, all controllers may use the manager client unless for some other reason. --- .gitignore | 1 + .../capability/infra_capability_controller.go | 63 +++++-------------- .../configmap/infra_configmap_controller.go | 44 ++----------- .../infra/secret/infra_secret_controller.go | 44 ++----------- .../infra_secret_controller_intg_test.go | 17 ++--- pkg/manager/cache.go | 4 +- pkg/manager/manager.go | 30 +++++---- pkg/util/kube/predicates.go | 28 +++++++++ 8 files changed, 84 insertions(+), 147 deletions(-) diff --git a/.gitignore b/.gitignore index 9b7a12a36..20d333d3b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ config/crd/external-crds/cns.vmware.com_* .DS_Store .cache +ginkgo.report # Mkdocs /.site/ diff --git a/controllers/infra/capability/infra_capability_controller.go b/controllers/infra/capability/infra_capability_controller.go index 35cc9ab15..4847c5efe 100644 --- a/controllers/infra/capability/infra_capability_controller.go +++ b/controllers/infra/capability/infra_capability_controller.go @@ -12,10 +12,8 @@ import ( corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -23,7 +21,6 @@ import ( pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" "github.com/vmware-tanzu/vm-operator/pkg/config/capabilities" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" - pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/pkg/record" kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube" "github.com/vmware-tanzu/vm-operator/pkg/util/ptr" @@ -38,72 +35,42 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err controllerNameLong = fmt.Sprintf("%s/%s/%s", ctx.Namespace, ctx.Name, controllerNameShort) ) - cache, err := pkgmgr.NewNamespacedCacheForObject( - mgr, - &ctx.SyncPeriod, - controlledType, - capabilities.WCPClusterCapabilitiesConfigMapObjKey.Namespace) - if err != nil { - return err - } - r := NewReconciler( ctx, - cache, + mgr.GetClient(), ctrl.Log.WithName("controllers").WithName(controllerName), record.New(mgr.GetEventRecorderFor(controllerNameLong)), ) - // This controller is also run on the non-leaders (webhooks) pods too - // so capabilities updates are reflected there. - c, err := controller.New(controllerName, mgr, controller.Options{ - Reconciler: r, - MaxConcurrentReconciles: 1, - NeedLeaderElection: ptr.To(false), - }) - if err != nil { - return err - } + return ctrl.NewControllerManagedBy(mgr). + For(controlledType). + WithEventFilter(kubeutil.MatchNamePredicate(capabilities.WCPClusterCapabilitiesConfigMapName)). + WithEventFilter(predicate.ResourceVersionChangedPredicate{}). + WithOptions(controller.TypedOptions[reconcile.Request]{ + MaxConcurrentReconciles: 1, + NeedLeaderElection: ptr.To(false), + }). + Complete(r) - return c.Watch(source.Kind( - cache, - controlledType, - &handler.TypedEnqueueRequestForObject[*corev1.ConfigMap]{}, - predicate.TypedFuncs[*corev1.ConfigMap]{ - CreateFunc: func(e event.TypedCreateEvent[*corev1.ConfigMap]) bool { - return e.Object.Name == capabilities.WCPClusterCapabilitiesConfigMapObjKey.Name - }, - UpdateFunc: func(e event.TypedUpdateEvent[*corev1.ConfigMap]) bool { - return e.ObjectOld.Name == capabilities.WCPClusterCapabilitiesConfigMapObjKey.Name - }, - DeleteFunc: func(e event.TypedDeleteEvent[*corev1.ConfigMap]) bool { - return false - }, - GenericFunc: func(e event.TypedGenericEvent[*corev1.ConfigMap]) bool { - return false - }, - }, - kubeutil.TypedResourceVersionChangedPredicate[*corev1.ConfigMap]{}, - )) } func NewReconciler( ctx context.Context, - cache client.Reader, + client client.Client, logger logr.Logger, recorder record.Recorder) *Reconciler { return &Reconciler{ + Client: client, Context: ctx, - Cache: cache, Logger: logger, Recorder: recorder, } } type Reconciler struct { + client.Client Context context.Context - Cache client.Reader Logger logr.Logger Recorder record.Recorder } @@ -127,7 +94,7 @@ func (r *Reconciler) reconcileWcpClusterCapabilitiesConfig(ctx context.Context, "isAsyncSVUpgrade", oldFeatures.SVAsyncUpgrade) cm := &corev1.ConfigMap{} - if err := r.Cache.Get(ctx, req.NamespacedName, cm); err != nil { + if err := r.Client.Get(ctx, req.NamespacedName, cm); err != nil { return client.IgnoreNotFound(err) } diff --git a/controllers/infra/configmap/infra_configmap_controller.go b/controllers/infra/configmap/infra_configmap_controller.go index e85982e10..c3173d6a6 100644 --- a/controllers/infra/configmap/infra_configmap_controller.go +++ b/controllers/infra/configmap/infra_configmap_controller.go @@ -11,18 +11,12 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" - pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/pkg/record" kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube" ) @@ -45,40 +39,10 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err ctx.VMProvider, ) - c, err := controller.New(controllerName, mgr, controller.Options{Reconciler: r}) - if err != nil { - return err - } - - cache, err := pkgmgr.NewNamespacedCacheForObject( - mgr, - &ctx.SyncPeriod, - controlledType, - WcpClusterConfigMapNamespace) - if err != nil { - return err - } - - return c.Watch(source.Kind( - cache, - controlledType, - &handler.TypedEnqueueRequestForObject[*corev1.ConfigMap]{}, - predicate.TypedFuncs[*corev1.ConfigMap]{ - CreateFunc: func(e event.TypedCreateEvent[*corev1.ConfigMap]) bool { - return e.Object.GetName() == WcpClusterConfigMapName - }, - UpdateFunc: func(e event.TypedUpdateEvent[*corev1.ConfigMap]) bool { - return e.ObjectOld.GetName() == WcpClusterConfigMapName - }, - DeleteFunc: func(e event.TypedDeleteEvent[*corev1.ConfigMap]) bool { - return false - }, - GenericFunc: func(e event.TypedGenericEvent[*corev1.ConfigMap]) bool { - return false - }, - }, - kubeutil.TypedResourceVersionChangedPredicate[*corev1.ConfigMap]{}, - )) + return ctrl.NewControllerManagedBy(mgr). + For(controlledType). + WithEventFilter(kubeutil.MatchNamePredicate(WcpClusterConfigMapName)). + Complete(r) } type provider interface { diff --git a/controllers/infra/secret/infra_secret_controller.go b/controllers/infra/secret/infra_secret_controller.go index c06a65bf9..9c4775239 100644 --- a/controllers/infra/secret/infra_secret_controller.go +++ b/controllers/infra/secret/infra_secret_controller.go @@ -10,18 +10,13 @@ import ( "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/predicate" - "sigs.k8s.io/controller-runtime/pkg/source" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/manager" pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" - pkgmgr "github.com/vmware-tanzu/vm-operator/pkg/manager" "github.com/vmware-tanzu/vm-operator/pkg/record" kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube" ) @@ -53,40 +48,11 @@ func AddToManager(ctx *pkgctx.ControllerManagerContext, mgr manager.Manager) err ctx.VMProvider, ) - c, err := controller.New(controllerName, mgr, controller.Options{Reconciler: r}) - if err != nil { - return err - } - - cache, err := pkgmgr.NewNamespacedCacheForObject( - mgr, - &ctx.SyncPeriod, - controlledType, - r.vmOpNamespace) - if err != nil { - return err - } - - return c.Watch(source.Kind( - cache, - controlledType, - &handler.TypedEnqueueRequestForObject[*corev1.Secret]{}, - predicate.TypedFuncs[*corev1.Secret]{ - CreateFunc: func(e event.TypedCreateEvent[*corev1.Secret]) bool { - return e.Object.GetName() == VcCredsSecretName - }, - UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Secret]) bool { - return e.ObjectOld.GetName() == VcCredsSecretName - }, - DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Secret]) bool { - return false - }, - GenericFunc: func(e event.TypedGenericEvent[*corev1.Secret]) bool { - return false - }, - }, - kubeutil.TypedResourceVersionChangedPredicate[*corev1.Secret]{}, - )) + return ctrl.NewControllerManagedBy(mgr). + For(controlledType). + WithEventFilter(kubeutil.MatchNamePredicate(VcCredsSecretName)). + WithEventFilter(predicate.ResourceVersionChangedPredicate{}). + Complete(r) } diff --git a/controllers/infra/secret/infra_secret_controller_intg_test.go b/controllers/infra/secret/infra_secret_controller_intg_test.go index 1f715ab08..95bc3c5f7 100644 --- a/controllers/infra/secret/infra_secret_controller_intg_test.go +++ b/controllers/infra/secret/infra_secret_controller_intg_test.go @@ -53,6 +53,8 @@ func intgTestsReconcile() { ) BeforeEach(func() { + atomic.StoreInt32(&called, 0) + obj = &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Namespace: ctx.PodNamespace, @@ -73,7 +75,6 @@ func intgTestsReconcile() { AfterEach(func() { err := ctx.Client.Delete(ctx, obj) Expect(err == nil || apierrors.IsNotFound(err)).To(BeTrue()) - atomic.StoreInt32(&called, 0) }) When("created", func() { @@ -91,9 +92,10 @@ func intgTestsReconcile() { }) It("should not be reconciled", func() { Consistently(func() int32 { - // NOTE: ResetVcClient() won't be called during the reconcile because the - // obj namespace won't match the pod's namespace. It is bad news if you see - // "Reconciling unexpected object" in the logs. + // NOTE: ResetVcClient() will not be called during the + // reconcile because the object's namespace will not + // match the pod's namespace. It is bad news if + // "Reconciling unexpected object" is in the logs. return atomic.LoadInt32(&called) }).Should(Equal(int32(0))) }) @@ -120,9 +122,10 @@ func intgTestsReconcile() { }) It("should not be reconciled", func() { Consistently(func() int32 { - // NOTE: ResetVcClient() won't be called during the reconcile because the - // obj namespace won't match the pod's namespace. It is bad news if you see - // "Reconciling unexpected object" in the logs. + // NOTE: ResetVcClient() will not be called during the + // reconcile because the object's namespace will not + // match the pod's namespace. It is bad news if + // "Reconciling unexpected object" is in the logs. return atomic.LoadInt32(&called) }).Should(Equal(int32(0))) }) diff --git a/pkg/manager/cache.go b/pkg/manager/cache.go index 0c60f5a2c..a438bbf91 100644 --- a/pkg/manager/cache.go +++ b/pkg/manager/cache.go @@ -112,8 +112,8 @@ func GetNamespaceCacheConfigs(namespaces ...string) map[string]ctrlcache.Config } nsc := make(map[string]ctrlcache.Config, len(namespaces)) for i := range namespaces { - if v := namespaces[i]; v != "" { - nsc[v] = ctrlcache.Config{} + if namespaces[i] != "" { + nsc[namespaces[i]] = ctrlcache.Config{} } } return nsc diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index a0cf1d5bf..c9e99bb82 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -67,6 +67,17 @@ func New(ctx context.Context, opts Options) (Manager, error) { _ = vpcv1alpha1.AddToScheme(opts.Scheme) } + // An informer is created for each watched resource. Due to the number of + // ConfigMap and Secret resources that may exist in a cluster, caching all + // of these resources can result in VM Operator being terminated due to an + // out-of-memory error, i.e. OOMKill. To avoid this outcome, ConfigMap and + // Secret resources are only cached for the following namespaces: + // - kube-system + // - the namespace in which the VM Operator pod is running + nsCacheForConfigMapsAndSecrets := GetNamespaceCacheConfigs( + "kube-system", + opts.PodNamespace) + // Build the controller manager. mgr, err := ctrlmgr.New(opts.KubeConfig, ctrlmgr.Options{ Scheme: opts.Scheme, @@ -74,17 +85,14 @@ func New(ctx context.Context, opts Options) (Manager, error) { DefaultNamespaces: GetNamespaceCacheConfigs(opts.WatchNamespace), DefaultTransform: cache.TransformStripManagedFields(), SyncPeriod: &opts.SyncPeriod, - }, - Client: client.Options{ - Cache: &client.CacheOptions{ - // An informer is created for each watched resource. Due to the - // number of ConfigMap and Secret resources that may exist, - // watching each one can result in VM Operator being terminated - // due to an out-of-memory error, i.e. OOMKill. To avoid this - // outcome, ConfigMap and Secret resources are not cached. - DisableFor: []client.Object{ - &corev1.ConfigMap{}, - &corev1.Secret{}, + ByObject: map[client.Object]cache.ByObject{ + // See the docs for nsCacheForConfigMapsAndSecrets + &corev1.ConfigMap{}: { + Namespaces: nsCacheForConfigMapsAndSecrets, + }, + // See the docs for nsCacheForConfigMapsAndSecrets + &corev1.Secret{}: { + Namespaces: nsCacheForConfigMapsAndSecrets, }, }, }, diff --git a/pkg/util/kube/predicates.go b/pkg/util/kube/predicates.go index 6f08f3833..46e30adaf 100644 --- a/pkg/util/kube/predicates.go +++ b/pkg/util/kube/predicates.go @@ -266,3 +266,31 @@ func isNil(arg any) bool { } return false } + +type matchNamePredicate[T client.Object] struct { + name string +} + +// MatchNamePredicate returns a predicate that only allows objects with names +// that match the provided value. +func MatchNamePredicate( + name string) predicate.TypedPredicate[client.Object] { + + return matchNamePredicate[client.Object]{name: name} +} + +func (p matchNamePredicate[T]) Create(e event.TypedCreateEvent[T]) bool { + return e.Object.GetName() == p.name +} + +func (p matchNamePredicate[T]) Delete(e event.TypedDeleteEvent[T]) bool { + return e.Object.GetName() == p.name +} + +func (p matchNamePredicate[T]) Update(e event.TypedUpdateEvent[T]) bool { + return e.ObjectOld.GetName() == p.name +} + +func (p matchNamePredicate[T]) Generic(e event.TypedGenericEvent[T]) bool { + return e.Object.GetName() == p.name +}