diff --git a/CHANGELOG.md b/CHANGELOG.md index 998df90d568..b6dac9f6a1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ * Add support for Kafka 3.8.1 * Ability to move data between JBOD disks using Cruise Control. +* Only roll pods once for ClientsCa cert renewal. + This change also means the restart reason ClientCaCertKeyReplaced is removed and either CaCertRenewed or CaCertHasOldGeneration will be used. ## 0.44.0 diff --git a/cluster-operator/src/main/java/io/strimzi/operator/cluster/model/RestartReason.java b/cluster-operator/src/main/java/io/strimzi/operator/cluster/model/RestartReason.java index 06bc6725c6d..12c1655f85d 100644 --- a/cluster-operator/src/main/java/io/strimzi/operator/cluster/model/RestartReason.java +++ b/cluster-operator/src/main/java/io/strimzi/operator/cluster/model/RestartReason.java @@ -28,11 +28,6 @@ public enum RestartReason { */ CA_CERT_RENEWED("CA certificate renewed"), - /** - * Clients CA private key was replaced - */ - CLIENT_CA_CERT_KEY_REPLACED("Trust new clients CA certificate signed by new key"), - /** * Cluster CA private key was replaced */ diff --git a/cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/assembly/CaReconciler.java b/cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/assembly/CaReconciler.java index 0d072614461..df4cc541f6c 100644 --- a/cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/assembly/CaReconciler.java +++ b/cluster-operator/src/main/java/io/strimzi/operator/cluster/operator/assembly/CaReconciler.java @@ -375,38 +375,29 @@ Future reconcileClusterOperatorSecret(Clock clock) { /** * Perform a rolling update of the cluster so that CA certificates get added to their truststores, or expired CA - * certificates get removed from their truststores. Note this is only necessary when the CA certificate has changed - * due to a new CA key. It is not necessary when the CA certificate is replace while retaining the existing key. + * certificates get removed from their truststores. Note this is only necessary when the Cluster CA certificate has changed + * due to a new CA key. It is not necessary when the CA certificate is renewed while retaining the existing key. */ Future rollingUpdateForNewCaKey() { - RestartReasons podRollReasons = RestartReasons.empty(); - // cluster CA needs to be fully trusted // it is coming from a cluster CA key replacement which didn't end successfully (i.e. CO stopped) and we need to continue from there if (clusterCa.keyReplaced() || isClusterCaNeedFullTrust) { - podRollReasons.add(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED); - } - - if (clientsCa.keyReplaced()) { - podRollReasons.add(RestartReason.CLIENT_CA_CERT_KEY_REPLACED); - } - - if (podRollReasons.shouldRestart()) { + String restartReason = RestartReason.CLUSTER_CA_CERT_KEY_REPLACED.getDefaultNote(); TlsPemIdentity coTlsPemIdentity = new TlsPemIdentity(new PemTrustSet(clusterCa.caCertSecret()), PemAuthIdentity.clusterOperator(coSecret)); return getZooKeeperReplicas() - .compose(replicas -> maybeRollZookeeper(replicas, podRollReasons, coTlsPemIdentity)) + .compose(replicas -> rollZookeeper(replicas, restartReason, coTlsPemIdentity)) .compose(i -> getKafkaReplicas()) - .compose(nodes -> rollKafkaBrokers(nodes, podRollReasons, coTlsPemIdentity)) - .compose(i -> maybeRollDeploymentIfExists(KafkaResources.entityOperatorDeploymentName(reconciliation.name()), podRollReasons)) - .compose(i -> maybeRollDeploymentIfExists(KafkaExporterResources.componentName(reconciliation.name()), podRollReasons)) - .compose(i -> maybeRollDeploymentIfExists(CruiseControlResources.componentName(reconciliation.name()), podRollReasons)); + .compose(nodes -> rollKafkaBrokers(nodes, RestartReasons.of(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED), coTlsPemIdentity)) + .compose(i -> rollDeploymentIfExists(KafkaResources.entityOperatorDeploymentName(reconciliation.name()), restartReason)) + .compose(i -> rollDeploymentIfExists(KafkaExporterResources.componentName(reconciliation.name()), restartReason)) + .compose(i -> rollDeploymentIfExists(CruiseControlResources.componentName(reconciliation.name()), restartReason)); } else { return Future.succeededFuture(); } } /** - * Gather the Kafka related components pods for checking CA key trust and CA certificate usage to sign servers certificate. + * Gather the Kafka related components pods for checking Cluster CA key trust and Cluster CA certificate usage to sign servers certificate. * * Verify that all the pods are already trusting the new CA certificate signed by a new CA key. * It checks each pod's CA key generation, compared with the new CA key generation. @@ -498,33 +489,27 @@ Future rollingUpdateForNewCaKey() { } /** - * Checks whether the ZooKeeper cluster needs ot be rolled to trust the new CA private key. ZooKeeper uses only the - * Cluster CA and not the Clients CA. So the rolling happens only when Cluster CA private key changed. + * Rolls the ZooKeeper cluster to trust the new Cluster CA private key. * * @param replicas Current number of ZooKeeper replicas - * @param podRestartReasons List of reasons to restart the pods + * @param podRestartReason Reason to restart the pods * @param coTlsPemIdentity Trust set and identity for TLS client authentication for connecting to ZooKeeper * - * @return Future which completes when this step is done either by rolling the ZooKeeper cluster or by deciding - * that no rolling is needed. + * @return Future which completes when the ZooKeeper cluster has been rolled. */ - /* test */ Future maybeRollZookeeper(int replicas, RestartReasons podRestartReasons, TlsPemIdentity coTlsPemIdentity) { - if (podRestartReasons.contains(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED)) { - Labels zkSelectorLabels = Labels.EMPTY - .withStrimziKind(reconciliation.kind()) - .withStrimziCluster(reconciliation.name()) - .withStrimziName(KafkaResources.zookeeperComponentName(reconciliation.name())); - - Function> rollZkPodAndLogReason = pod -> { - List reason = List.of(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED.getDefaultNote()); - LOGGER.debugCr(reconciliation, "Rolling Pod {} to {}", pod.getMetadata().getName(), reason); - return reason; - }; - return new ZooKeeperRoller(podOperator, zookeeperLeaderFinder, operationTimeoutMs) - .maybeRollingUpdate(reconciliation, replicas, zkSelectorLabels, rollZkPodAndLogReason, coTlsPemIdentity); - } else { - return Future.succeededFuture(); - } + /* test */ Future rollZookeeper(int replicas, String podRestartReason, TlsPemIdentity coTlsPemIdentity) { + Labels zkSelectorLabels = Labels.EMPTY + .withStrimziKind(reconciliation.kind()) + .withStrimziCluster(reconciliation.name()) + .withStrimziName(KafkaResources.zookeeperComponentName(reconciliation.name())); + + Function> rollZkPodAndLogReason = pod -> { + List reason = List.of(podRestartReason); + LOGGER.debugCr(reconciliation, "Rolling Pod {} to {}", pod.getMetadata().getName(), reason); + return reason; + }; + return new ZooKeeperRoller(podOperator, zookeeperLeaderFinder, operationTimeoutMs) + .maybeRollingUpdate(reconciliation, replicas, zkSelectorLabels, rollZkPodAndLogReason, coTlsPemIdentity); } /* test */ Future> getKafkaReplicas() { @@ -572,15 +557,6 @@ Future rollingUpdateForNewCaKey() { eventPublisher); } - // Entity Operator, Kafka Exporter, and Cruise Control are only rolled when the cluster CA cert key is replaced - private Future maybeRollDeploymentIfExists(String deploymentName, RestartReasons podRollReasons) { - if (podRollReasons.contains(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED)) { - return rollDeploymentIfExists(deploymentName, RestartReason.CLUSTER_CA_CERT_KEY_REPLACED.getDefaultNote()); - } else { - return Future.succeededFuture(); - } - } - /** * Rolls deployments when they exist. This method is used by the CA renewal to roll deployments. * diff --git a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerTest.java b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerTest.java index 7ef5ba5a2fb..b642afe0056 100644 --- a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerTest.java +++ b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerTest.java @@ -1788,11 +1788,8 @@ public void testStrimziManagedClientsCaKeyReplaced(Vertx vertx, VertxTestContext mockCaReconciler .reconcile(Clock.systemUTC()) .onComplete(context.succeeding(c -> context.verify(() -> { - assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, aMapWithSize(6)); - mockCaReconciler.kafkaRestartReasons.forEach((podName, restartReasons) -> { - assertThat("Restart reasons for pod " + podName, restartReasons.getReasons(), hasSize(1)); - assertThat("Restart reasons for pod " + podName, restartReasons.contains(RestartReason.CLIENT_CA_CERT_KEY_REPLACED), is(true)); - }); + // We rely on KafkaReconciler to roll pods for ClientsCa renewal + assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, anEmptyMap()); assertThat("Deployment restart reasons", mockCaReconciler.deploymentRestartReasons, anEmptyMap()); async.flag(); }))); @@ -1971,11 +1968,8 @@ public void testUserManagedClientsCaKeyReplaced(Vertx vertx, VertxTestContext co mockCaReconciler .reconcile(Clock.systemUTC()) .onComplete(context.succeeding(c -> context.verify(() -> { - assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, aMapWithSize(6)); - mockCaReconciler.kafkaRestartReasons.forEach((podName, restartReasons) -> { - assertThat("Restart reasons for pod " + podName, restartReasons.getReasons(), hasSize(1)); - assertThat("Restart reasons for pod " + podName, restartReasons.contains(RestartReason.CLIENT_CA_CERT_KEY_REPLACED), is(true)); - }); + // We rely on KafkaReconciler to roll pods for ClientsCa renewal + assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, anEmptyMap()); assertThat("Deployment restart reasons", mockCaReconciler.deploymentRestartReasons, anEmptyMap()); async.flag(); }))); @@ -2040,12 +2034,7 @@ public void testUserManagedClientsCaCertRenewed(Vertx vertx, VertxTestContext co mockCaReconciler .reconcile(Clock.systemUTC()) .onComplete(context.succeeding(c -> context.verify(() -> { - // When user is managing CA a cert renewal implies a key replacement - assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, aMapWithSize(6)); - mockCaReconciler.kafkaRestartReasons.forEach((podName, restartReasons) -> { - assertThat("Restart reasons for pod " + podName, restartReasons.getReasons(), hasSize(1)); - assertThat("Restart reasons for pod " + podName, restartReasons.contains(RestartReason.CLIENT_CA_CERT_KEY_REPLACED), is(true)); - }); + assertThat("Kafka restart reasons", mockCaReconciler.kafkaRestartReasons, anEmptyMap()); assertThat("Deployment restart reasons", mockCaReconciler.deploymentRestartReasons, anEmptyMap()); async.flag(); }))); diff --git a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerZooBasedTest.java b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerZooBasedTest.java index 44de51b1394..3b31bc77611 100644 --- a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerZooBasedTest.java +++ b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/assembly/CaReconcilerZooBasedTest.java @@ -225,7 +225,7 @@ public void testRollingReasonsWithClusterCAKeyNotTrusted(Vertx vertx, VertxTestC .reconcile(Clock.systemUTC()) .onComplete(context.succeeding(c -> context.verify(() -> { assertThat(mockCaReconciler.isClusterCaNeedFullTrust, is(true)); - assertThat(mockCaReconciler.zkPodRestartReasons.contains(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED), is(true)); + assertThat(mockCaReconciler.zkPodRestartReason, is(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED.getDefaultNote())); assertThat(mockCaReconciler.kPodRollReasons.contains(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED), is(true)); assertThat(mockCaReconciler.deploymentRollReason.size() == 3, is(true)); for (String reason: mockCaReconciler.deploymentRollReason) { @@ -237,7 +237,7 @@ public void testRollingReasonsWithClusterCAKeyNotTrusted(Vertx vertx, VertxTestC static class MockCaReconciler extends CaReconciler { - RestartReasons zkPodRestartReasons; + String zkPodRestartReason; RestartReasons kPodRollReasons; List deploymentRollReason = new ArrayList<>(); @@ -259,8 +259,8 @@ Future getZooKeeperReplicas() { } @Override - Future maybeRollZookeeper(int replicas, RestartReasons podRestartReasons, TlsPemIdentity coTlsPemIdentity) { - this.zkPodRestartReasons = podRestartReasons; + Future rollZookeeper(int replicas, String restartReason, TlsPemIdentity coTlsPemIdentity) { + this.zkPodRestartReason = restartReason; return Future.succeededFuture(); } diff --git a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherTest.java b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherTest.java index 11c5c074983..85142e19029 100644 --- a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherTest.java +++ b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventPublisherTest.java @@ -112,9 +112,9 @@ protected void publishEvent(MicroTime eventTime, ObjectReference podReference, S } }; - Set expectedReasons = Set.of("ClientCaCertKeyReplaced", "ClusterCaCertKeyReplaced"); + Set expectedReasons = Set.of("FileSystemResizeNeeded", "ClusterCaCertKeyReplaced"); - RestartReasons reasons = new RestartReasons().add(RestartReason.CLIENT_CA_CERT_KEY_REPLACED) + RestartReasons reasons = new RestartReasons().add(RestartReason.FILE_SYSTEM_RESIZE_NEEDED) .add(RestartReason.CLUSTER_CA_CERT_KEY_REPLACED); capturingPublisher.publishRestartEvents(mockPod, reasons); diff --git a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventsMockTest.java b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventsMockTest.java index 01b8938f826..8bdb02f176f 100644 --- a/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventsMockTest.java +++ b/cluster-operator/src/test/java/io/strimzi/operator/cluster/operator/resource/events/KubernetesRestartEventsMockTest.java @@ -105,7 +105,6 @@ import static io.strimzi.operator.cluster.model.RestartReason.CA_CERT_HAS_OLD_GENERATION; import static io.strimzi.operator.cluster.model.RestartReason.CA_CERT_REMOVED; import static io.strimzi.operator.cluster.model.RestartReason.CA_CERT_RENEWED; -import static io.strimzi.operator.cluster.model.RestartReason.CLIENT_CA_CERT_KEY_REPLACED; import static io.strimzi.operator.cluster.model.RestartReason.CLUSTER_CA_CERT_KEY_REPLACED; import static io.strimzi.operator.cluster.model.RestartReason.CONFIG_CHANGE_REQUIRES_RESTART; import static io.strimzi.operator.cluster.model.RestartReason.FILE_SYSTEM_RESIZE_NEEDED; @@ -396,24 +395,6 @@ public boolean certRenewed() { reconciler.reconcile(new KafkaStatus(), Clock.systemUTC()).onComplete(verifyEventPublished(CA_CERT_RENEWED, context)); } - @Test - void testEventEmittedWhenClientCaCertKeyReplaced(Vertx vertx, VertxTestContext context) { - // Turn off cert authority generation to cause reconciliation to roll pods - Kafka kafkaWithoutClientCaGen = new KafkaBuilder(kafka) - .editSpec() - .editOrNewClientsCa() - .withGenerateCertificateAuthority(false) - .endClientsCa() - .endSpec() - .build(); - - // Bump ca cert generation to make it look newer than pod knows of - patchClusterSecretWithAnnotation(Ca.ANNO_STRIMZI_IO_CLIENTS_CA_CERT_GENERATION, "100000"); - - CaReconciler reconciler = new CaReconciler(reconciliation, kafkaWithoutClientCaGen, clusterOperatorConfig, supplier, vertx, mockCertManager, passwordGenerator); - reconciler.reconcile(Clock.systemUTC()).onComplete(verifyEventPublished(CLIENT_CA_CERT_KEY_REPLACED, context)); - } - @Test void testEventEmittedWhenClusterCaCertKeyReplaced(Vertx vertx, VertxTestContext context) { //Turn off cert authority generation to cause reconciliation to roll pods diff --git a/documentation/modules/deploying/ref-operator-restart-events-reasons.adoc b/documentation/modules/deploying/ref-operator-restart-events-reasons.adoc index 842dacdb266..3a00090e215 100644 --- a/documentation/modules/deploying/ref-operator-restart-events-reasons.adoc +++ b/documentation/modules/deploying/ref-operator-restart-events-reasons.adoc @@ -25,9 +25,6 @@ a|Event |CaCertRenewed |CA certificates have been renewed, and the pod is restarted to run with the updated certificates. -|ClientCaCertKeyReplaced -|The key used to sign clients CA certificates has been replaced, and the pod is being restarted as part of the CA renewal process. - |ClusterCaCertKeyReplaced |The key used to sign the cluster's CA certificates has been replaced, and the pod is being restarted as part of the CA renewal process. diff --git a/systemtest/src/test/java/io/strimzi/systemtest/security/SecurityST.java b/systemtest/src/test/java/io/strimzi/systemtest/security/SecurityST.java index e50d2f5d9d0..d9e76d92d15 100644 --- a/systemtest/src/test/java/io/strimzi/systemtest/security/SecurityST.java +++ b/systemtest/src/test/java/io/strimzi/systemtest/security/SecurityST.java @@ -391,7 +391,7 @@ void testAutoReplaceClusterCaKeysTriggeredByAnno() { @Tag("ClientsCaKeys") void testAutoReplaceClientsCaKeysTriggeredByAnno() { autoReplaceSomeKeysTriggeredByAnno( - 2, + 1, false, true, false,