Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-33958: secret re-creation scenario for externalCertificate with active informer #614

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pkg/cmd/infra/router/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -793,14 +793,14 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error {

var plugin router.Plugin = templatePlugin
var recorder controller.RouteStatusRecorder = controller.LogRejections
informer := factory.CreateRoutesSharedInformer()
routeLister := routelisters.NewRouteLister(informer.GetIndexer())
if o.UpdateStatus {
lease := writerlease.New(time.Minute, 3*time.Second)
go lease.Run(stopCh)
informer := factory.CreateRoutesSharedInformer()
tracker := controller.NewSimpleContentionTracker(informer, o.RouterName, o.ResyncInterval/10)
tracker.SetConflictMessage(fmt.Sprintf("The router detected another process is writing conflicting updates to route status with name %q. Please ensure that the configuration of all routers is consistent. Route status will not be updated as long as conflicts are detected.", o.RouterName))
go tracker.Run(stopCh)
routeLister := routelisters.NewRouteLister(informer.GetIndexer())
status := controller.NewStatusAdmitter(plugin, routeclient.RouteV1(), routeLister, o.RouterName, o.RouterCanonicalHostname, lease, tracker)
recorder = status
plugin = status
Expand All @@ -812,7 +812,7 @@ func (o *TemplateRouterOptions) Run(stopCh <-chan struct{}) error {
plugin = controller.NewExtendedValidator(plugin, recorder)
}
if o.AllowExternalCertificates {
plugin = controller.NewRouteSecretManager(plugin, recorder, secretManager, kc.CoreV1(), authorizationClient.SubjectAccessReviews())
plugin = controller.NewRouteSecretManager(plugin, recorder, secretManager, kc.CoreV1(), routeLister, authorizationClient.SubjectAccessReviews())
}
plugin = controller.NewUniqueHost(plugin, o.RouterSelection.DisableNamespaceOwnershipCheck, recorder)
plugin = controller.NewHostAdmitter(plugin, o.RouteAdmissionFunc(), o.AllowWildcardRoutes, o.RouterSelection.DisableNamespaceOwnershipCheck, recorder)
Expand Down
159 changes: 106 additions & 53 deletions pkg/router/controller/route_secret_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"sync"

routev1 "github.com/openshift/api/route/v1"
routelisters "github.com/openshift/client-go/route/listers/route/v1"
"github.com/openshift/library-go/pkg/route/secretmanager"
"github.com/openshift/router/pkg/router"
"github.com/openshift/router/pkg/router/routeapihelpers"
Expand All @@ -29,6 +30,7 @@ type RouteSecretManager struct {

secretManager secretmanager.SecretManager
secretsGetter corev1client.SecretsGetter
routelister routelisters.RouteLister
sarClient authorizationclient.SubjectAccessReviewInterface
// deletedSecrets tracks routes for which the associated secret was deleted after intial creation of the secret monitor.
// This helps to differentiate between a new secret creation and a recreation of a previously deleted secret.
Expand All @@ -39,12 +41,20 @@ type RouteSecretManager struct {

// NewRouteSecretManager creates a new instance of RouteSecretManager.
// It wraps the provided plugin and adds secret management capabilities.
func NewRouteSecretManager(plugin router.Plugin, recorder RouteStatusRecorder, secretManager secretmanager.SecretManager, secretsGetter corev1client.SecretsGetter, sarClient authorizationclient.SubjectAccessReviewInterface) *RouteSecretManager {
func NewRouteSecretManager(
plugin router.Plugin,
recorder RouteStatusRecorder,
secretManager secretmanager.SecretManager,
secretsGetter corev1client.SecretsGetter,
routelister routelisters.RouteLister,
sarClient authorizationclient.SubjectAccessReviewInterface,
) *RouteSecretManager {
return &RouteSecretManager{
plugin: plugin,
recorder: recorder,
secretManager: secretManager,
secretsGetter: secretsGetter,
routelister: routelister,
sarClient: sarClient,
deletedSecrets: sync.Map{},
}
Expand Down Expand Up @@ -95,10 +105,6 @@ func (p *RouteSecretManager) HandleRoute(eventType watch.EventType, route *route
}
}

// TODO always creating a new `secretHandler` ensures that there are no stale route specs
// in the next plugin chain, especially when the referenced secret is updated or deleted.
// This prevents sending outdated routes to subsequent plugins, preserving expected functionality.
// TODO : we might need to add RouteLister()
case watch.Modified:
// Determine if the route's external certificate configuration has changed
newHasExt := hasExternalCertificate(route)
Expand All @@ -120,13 +126,23 @@ func (p *RouteSecretManager) HandleRoute(eventType watch.EventType, route *route
}
} else {
// ExternalCertificate is not updated
// Revalidate and update the in-memory TLS certificate and key (even if ExternalCertificate remains unchanged)
// It is the responsibility of this plugin to ensure everything is synced properly.

// One Scenario: The user deletes the secret, causing the route's status to be updated to reject.
// This triggers the entire plugin chain again. Without re-validating the external certificate
// and re-syncing the secret here, the route could become active again and start serving
// the default certificate, even though its spec has an external certificate.
// Re-validate and update the in-memory TLS certificate and key (even if ExternalCertificate remains unchanged)
// It is the responsibility of this plugin to ensure everything is synced properly, because there might
// have been updates to the secret data or other events that require re-evaluation.
//
// 1. Secret update and re-create: Even if the externalCertificate name remains
// the same, the router needs to be aware of the events when the content of
// the secret is updated or the secret is recreated, to re-validate and
// fetch the new TLS data. Note: These events won't trigger the apiserver
// route admission, hence we need to rely on the router controller for this validation.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean we rely on the controller to trigger re-validation, right? Or are you saying that this scenario requires the controller to perform some validation steps that would otherwise be handled by the admission plugin?

Suggested change
// route admission, hence we need to rely on the router controller for this validation.
// route admission, hence we need to rely on the router controller to trigger this validation.

//
// 2. Consider a case where a user deletes the secret, causing the route's
// status to be updated to "reject". This status update triggers the
// entire plugin chain again. Without re-validating the external certificate
// and re-syncing the secret here, the route could incorrectly transition
// back to an "active" state and start serving the default certificate
// even though its spec still references an external certificate.
//
Comment on lines +141 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would the router use the default certificate for the route? Why wouldn't it continue to use the route's old external certificate?

This point is central to my concern about the secret manager's setting Admitted=False. I'm fine with setting Admitted=False in these two cases:

  • The secret was deleted.
  • A new secret was created after the old one had been deleted (which means that the route should already have Admitted=False).

In other words, the secret manager can set Admitted=False in any sequence of events that includes deleting the secret that the route references. However, I still don't understand the necessity of setting Admitted=False in the case where the secret was updated. We do need to force the controller to revalidate the route, but is it really necessary to mark the route as rejected in the meantime?

Does setting Admitted=False when the secret is updated imply that rotating certificates necessarily causes downtime when using an external certificate? ...Hm, maybe not, as you only call p.plugin.HandleRoute(watch.Deleted, route) in the delete case, and not in the update case; does that mean we report that the route is rejected but continue serving it unless revalidation re-rejects it?

In comparison, if the route has an inline certificate, the route can be updated with a new certificate, and the router continues using the old certificate until it observes the update; once the router does observe the update, it switches to the new certificate without rejecting the route and without any interruption to service. I don't understand why the experience should or must be different when using an external certificate.

// Therefore, it is essential to re-sync the secret to ensure the plugin chain correctly handles the route.

log.V(4).Info("ExternalCertificate is not updated", "namespace", route.Namespace, "route", route.Name)
Expand Down Expand Up @@ -181,7 +197,7 @@ func (p *RouteSecretManager) validateAndRegister(route *routev1.Route) error {
return err
}
// register route with secretManager
handler := p.generateSecretHandler(route)
handler := p.generateSecretHandler(route.Namespace, route.Name)
if err := p.secretManager.RegisterRoute(context.TODO(), route.Namespace, route.Name, route.Spec.TLS.ExternalCertificate.Name, handler); err != nil {
log.Error(err, "failed to register route")
return err
Expand All @@ -196,78 +212,102 @@ func (p *RouteSecretManager) validateAndRegister(route *routev1.Route) error {

// generateSecretHandler creates ResourceEventHandlerFuncs to handle Add, Update, and Delete events on secrets.
//
// AddFunc: Invoked when a new secret is added. It logs the addition of the secret. This function also handles the
// re-creation scenario where a previously deleted secret is added again. In such cases, it revalidates the route's
// external certificate configuration, updates the route's TLS certificate and key, and calls the next plugin's HandleRoute method with a watch.Modified event, and then commits the changes by calling the next plugin's Commit() method.
// To ensure that the handlers always operate on the most up-to-date route object,
// it fetches the latest route from the informer and then updates the route's status
// with specific reasons related to the secret event. This status update is crucial
// because it serves as a signal to trigger the entire route plugin chain to re-evaluate
// the route from the beginning.
//
// Triggering a re-evaluation ensures that both:
// - Changes made by `RouteModifierFn` registered with the `RouterControllerFactory`
// are propagated correctly to all plugins.
// - All plugins get a chance to react to the changes and make necessary
// in-memory modifications to the route object, ensuring consistent behavior.
//
// - AddFunc:
// - Invoked when a new secret is added.
// - Handles secret recreation: If a secret is recreated (created after being
// deleted), it fetches the latest route object associated with it using the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if multiple routes reference the same secret?

// RouteLister and updates the route's status to trigger a full
// re-evaluation of the route by the entire plugin chain.
//
// UpdateFunc: Invoked when an existing secret is updated. It performs validation of the route's external certificate configuration.
// If the validation succeeds, it updates the route's TLS certificate and key with the new secret data and calls the next plugin's HandleRoute method with a watch.Modified event, and then commits the changes by calling the next plugin's Commit() method.
// - UpdateFunc:
// - Invoked when an existing secret is updated.
// - Fetches the latest associated route object and
// updates the route's status to trigger a re-evaluation by the entire plugin chain.
//
// DeleteFunc: Invoked when the secret is deleted. It logs the deletion, marks the secret as deleted for this route in the deletedSecrets map,
// records the route rejection, and triggers the deletion of the route by calling the HandleRoute method with a watch.Deleted event. NOTE: It doesn't unregister the route.
func (p *RouteSecretManager) generateSecretHandler(route *routev1.Route) cache.ResourceEventHandlerFuncs {
// - DeleteFunc:
// - Invoked when the secret is deleted.
// - Marks the secret as deleted for the associated route in the `deletedSecrets` map.
// - Fetches the latest associated route object and records a route rejection event.
// - Triggers the deletion of the route by calling the HandleRoute method with a watch.Deleted event.
// - NOTE: It doesn't unregister the route.
func (p *RouteSecretManager) generateSecretHandler(namespace, routeName string) cache.ResourceEventHandlerFuncs {
// secret handler
return cache.ResourceEventHandlerFuncs{

AddFunc: func(obj interface{}) {
secret := obj.(*kapi.Secret)
log.V(4).Info("secret added for route", "namespace", route.Namespace, "secret", secret.Name, "route", route.Name)
log.V(4).Info("secret added for route", "namespace", namespace, "secret", secret.Name, "route", routeName)

// Secret re-creation scenario
// Check if the route key exists in the deletedSecrets map, indicating that the secret was previously deleted for this route.
// If it exists, it means the secret is being recreated. Remove the key from the map and proceed with re-registration.
// If it exists, it means the secret is being recreated. Remove the key from the map and proceed with handling the route.
// Otherwise, no-op (new secret creation scenario and no race condition with that flow)
// This helps to differentiate between a new secret creation and a re-creation of a previously deleted secret.
key := generateKey(route)
key := generateKey(namespace, routeName)
if _, deleted := p.deletedSecrets.LoadAndDelete(key); deleted {
log.V(4).Info("secret recreated for route", "namespace", route.Namespace, "secret", secret.Name, "route", route.Name)
msg := fmt.Sprintf("secret %q recreated for route %q", secret.Name, key)
log.V(4).Info(msg)

// re-validate
// since secret re-creation will not trigger the apiserver route admission,
// we need to rely on the router controller for this validation.
if err := p.validate(route); err != nil {
// Ensure fetching the updated route
route, err := p.getUpdatedRoute(namespace, routeName)
if err != nil {
return
Copy link
Contributor

@alebedev87 alebedev87 Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we fail here, will the fact that the deletedSecrets has been cleaned already cause any problem?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we fail here, then there are two choices

  • Delete that secret which will re-populate deletedSecrets ; and then re-create it
  • Update that secret

Both choices will only work after fixing the issue which had caused the validation to fail.

Of we fail here, will the fact that the deletedSecrets has been cleaned already cause any problem?

So, it should not cause any problem.

}
// read the re-created secret and update TLS certificate and key
if err := p.populateRouteTLSFromSecret(route); err != nil {
return
}
// call the next plugin with watch.Modified
p.plugin.HandleRoute(watch.Modified, route)
// commit the changes
p.plugin.Commit()

// The route should *remain* rejected until it's re-evaluated
// by all the plugins (including this plugin). Once passes, the route will become active again.
p.recorder.RecordRouteRejection(route, "ExternalCertificateSecretReCreated", msg)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
p.recorder.RecordRouteRejection(route, "ExternalCertificateSecretReCreated", msg)
p.recorder.RecordRouteRejection(route, "ExternalCertificateSecretRecreated", msg)

}
},

UpdateFunc: func(old interface{}, new interface{}) {
secretOld := old.(*kapi.Secret)
secretNew := new.(*kapi.Secret)
log.V(4).Info("secret updated for route", "namespace", route.Namespace, "secret", secretNew.Name, "old-version", secretOld.ResourceVersion, "new-version", secretNew.ResourceVersion, "route", route.Name)
key := generateKey(namespace, routeName)
msg := fmt.Sprintf("secret %q updated for route %q : old-version %q new-version %q", secretNew.Name, key, secretOld.ResourceVersion, secretNew.ResourceVersion)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
msg := fmt.Sprintf("secret %q updated for route %q : old-version %q new-version %q", secretNew.Name, key, secretOld.ResourceVersion, secretNew.ResourceVersion)
msg := fmt.Sprintf("secret %q updated for route %q (old resourceVersion=%v, new resourceVersion=%v)", secretNew.Name, key, secretOld.ResourceVersion, secretNew.ResourceVersion)

log.V(4).Info(msg)

// re-validate
if err := p.validate(route); err != nil {
// Ensure fetching the updated route
route, err := p.getUpdatedRoute(namespace, routeName)
if err != nil {
return
}
// read referenced secret (updated data) and update TLS certificate and key
if err := p.populateRouteTLSFromSecret(route); err != nil {
return
}
// call the next plugin with watch.Modified
p.plugin.HandleRoute(watch.Modified, route)
// commit the changes
p.plugin.Commit()

// Until the route is re-evaluated by all the plugins (including this plugin),
// it should be marked as rejected. Once passes, the route will become active again.
p.recorder.RecordRouteRejection(route, "ExternalCertificateSecretUpdated", msg)
},

DeleteFunc: func(obj interface{}) {
secret := obj.(*kapi.Secret)
key := generateKey(route)
msg := fmt.Sprintf("secret %s deleted for route %s", secret.Name, key)
key := generateKey(namespace, routeName)
msg := fmt.Sprintf("secret %q deleted for route %q", secret.Name, key)
log.V(4).Info(msg)

// keep the secret monitor active and mark the secret as deleted for this route.
p.deletedSecrets.Store(key, true)

// Ensure fetching the updated route
route, err := p.getUpdatedRoute(namespace, routeName)
if err != nil {
return
}

// Reject this route
p.recorder.RecordRouteRejection(route, "ExternalCertificateSecretDeleted", msg)
// Stop serving this route
p.plugin.HandleRoute(watch.Deleted, route)
},
}
Expand All @@ -276,6 +316,9 @@ func (p *RouteSecretManager) generateSecretHandler(route *routev1.Route) cache.R
// validate checks the route's external certificate configuration.
// If the validation fails, it records the route rejection and triggers
// the deletion of the route by calling the HandleRoute method with a watch.Deleted event.
//
// NOTE: TLS data validation and sanitization are handled by the next plugin `ExtendedValidator`,
// by reading the "tls.crt" and "tls.key" added by populateRouteTLSFromSecret.
func (p *RouteSecretManager) validate(route *routev1.Route) error {
fldPath := field.NewPath("spec").Child("tls").Child("externalCertificate")
if err := routeapihelpers.ValidateTLSExternalCertificate(route, fldPath, p.sarClient, p.secretsGetter).ToAggregate(); err != nil {
Expand Down Expand Up @@ -321,7 +364,7 @@ func (p *RouteSecretManager) unregister(route *routev1.Route) error {
}
// clean the route if present inside deletedSecrets
// this is required for the scenario when the associated secret is deleted, before unregistering with secretManager
p.deletedSecrets.Delete(generateKey(route))
p.deletedSecrets.Delete(generateKey(route.Namespace, route.Name))
return nil
}

Expand All @@ -332,6 +375,16 @@ func hasExternalCertificate(route *routev1.Route) bool {
}

// generateKey creates a unique identifier for a route
func generateKey(route *routev1.Route) string {
return fmt.Sprintf("%s/%s", route.Namespace, route.Name)
func generateKey(namespace, routeName string) string {
return fmt.Sprintf("%s/%s", namespace, routeName)
}

// getUpdatedRoute fetches the latest version of the route using the RouteLister.
func (p *RouteSecretManager) getUpdatedRoute(namespace, routeName string) (*routev1.Route, error) {
updatedRoute, err := p.routelister.Routes(namespace).Get(routeName)
if err != nil {
log.Error(err, "failed to get route", "namespace", namespace, "route", routeName)
return nil, err
}
return updatedRoute, nil
}
Loading