From db297feb1cec103f59c1c7f52408e563a58654b5 Mon Sep 17 00:00:00 2001 From: johnny_shum Date: Wed, 9 Oct 2024 20:06:30 +0000 Subject: [PATCH] Introduce k8s network failure reason and remove previous network related error... --- documentation/domains/Domain.json | 1 + kubernetes/crd/domain-crd.yaml | 3 +- .../operator/StuckPodProcessing.java | 2 ++ .../kubernetes/operator/WaitForReadyStep.java | 1 + .../kubernetes/operator/WebhookMain.java | 1 + .../helpers/PersistentVolumeClaimHelper.java | 2 ++ .../helpers/PersistentVolumeHelper.java | 2 ++ .../helpers/PodDisruptionBudgetHelper.java | 3 ++ .../operator/helpers/ResponseStep.java | 29 ++++++++++++++++++- .../operator/helpers/SecretHelper.java | 1 + .../operator/helpers/ServiceHelper.java | 3 ++ .../operator/helpers/WebhookHelper.java | 3 ++ .../operator/steps/DefaultResponseStep.java | 1 + .../domain/model/DomainFailureReason.java | 7 +++++ 14 files changed, 57 insertions(+), 2 deletions(-) diff --git a/documentation/domains/Domain.json b/documentation/domains/Domain.json index 5217e91d0f6..d1d922c0b42 100644 --- a/documentation/domains/Domain.json +++ b/documentation/domains/Domain.json @@ -238,6 +238,7 @@ "DomainInvalid", "Introspection", "Kubernetes", + "KubernetesNetworkException", "ServerPod", "PersistentVolumeClaim", "ReplicasTooHigh", diff --git a/kubernetes/crd/domain-crd.yaml b/kubernetes/crd/domain-crd.yaml index c468105717c..fc45002df75 100644 --- a/kubernetes/crd/domain-crd.yaml +++ b/kubernetes/crd/domain-crd.yaml @@ -5,7 +5,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - weblogic.sha256: 3cad99b24fb84de65dc38d7734ce269fb7058c3b6aed32b85ef590e142921635 + weblogic.sha256: 8cd5a2176fe99b104c82048d750d42f1130341bdfdba825493bc64de45025424 name: domains.weblogic.oracle spec: group: weblogic.oracle @@ -10322,6 +10322,7 @@ spec: - DomainInvalid - Introspection - Kubernetes + - KubernetesNetworkException - ServerPod - PersistentVolumeClaim - ReplicasTooHigh diff --git a/operator/src/main/java/oracle/kubernetes/operator/StuckPodProcessing.java b/operator/src/main/java/oracle/kubernetes/operator/StuckPodProcessing.java index 9d75f482c21..736d843955d 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/StuckPodProcessing.java +++ b/operator/src/main/java/oracle/kubernetes/operator/StuckPodProcessing.java @@ -64,6 +64,7 @@ public PodListProcessing(String namespace, OffsetDateTime dateTime) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); callResponse.getResult().getItems().stream() .filter(pod -> isStuck(pod, now)) .forEach(pod -> addStuckPodToPacket(packet, pod)); @@ -156,6 +157,7 @@ public ForcedDeleteResponseStep(String name, String namespace, String domainUID) @Override @SuppressWarnings("try") public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); try (ThreadLoggingContext ignored = ThreadLoggingContext.setThreadContext().namespace(namespace).domainUid(domainUID)) { LOGGER.info(POD_FORCE_DELETED, name, namespace); diff --git a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java index ef2421c2e7a..5de5c4d0827 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WaitForReadyStep.java @@ -254,6 +254,7 @@ static class MakeRightDomainStep extends DefaultResponseStep { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); MakeRightDomainOperation makeRightDomainOperation = (MakeRightDomainOperation)packet.get(MAKE_RIGHT_DOMAIN_OPERATION); if (makeRightDomainOperation != null) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/WebhookMain.java b/operator/src/main/java/oracle/kubernetes/operator/WebhookMain.java index 267453403cd..f3631d22c13 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/WebhookMain.java +++ b/operator/src/main/java/oracle/kubernetes/operator/WebhookMain.java @@ -212,6 +212,7 @@ private class CrdPresenceResponseStep extends D @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); warnedOfCrdAbsence = false; crdPresenceCheckCount.set(0); return super.onSuccess(packet, callResponse); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeClaimHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeClaimHelper.java index 70a3f586cef..064512e12d1 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeClaimHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeClaimHelper.java @@ -144,6 +144,7 @@ private NextAction updateDomainStatus(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); logPersistentVolumeClaimCreated(messageKey); addPersistentVolumeClaimToRecord(callResponse.getResult()); return doNext(packet); @@ -164,6 +165,7 @@ public NextAction onFailure(Packet packet, CallResponse @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); V1PersistentVolumeClaim persistentVolumeClaim = callResponse.getResult(); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeHelper.java index 0969bea8739..2cfa456a33a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/PersistentVolumeHelper.java @@ -133,6 +133,7 @@ private NextAction updateDomainStatus(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); logPersistentVolumeCreated(messageKey); return doNext(packet); } @@ -152,6 +153,7 @@ public NextAction onFailure(Packet packet, CallResponse call @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); DomainPresenceInfo info = packet.getSpi(DomainPresenceInfo.class); V1PersistentVolume persistentVolume = callResponse.getResult(); if (persistentVolume == null) { diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/PodDisruptionBudgetHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/PodDisruptionBudgetHelper.java index 2ac78d276e8..e0583dfc665 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/PodDisruptionBudgetHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/PodDisruptionBudgetHelper.java @@ -113,6 +113,7 @@ private NextAction updateDomainStatus(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); logPodDisruptionBudgetCreated(messageKey); addPodDisruptionBudgetToRecord(callResponse.getResult()); return doNext(packet); @@ -133,6 +134,7 @@ public NextAction onFailure(Packet packet, CallResponse c @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); V1PodDisruptionBudget podDisruptionBudget = callResponse.getResult(); if (podDisruptionBudget == null) { removePodDisruptionBudgetFromRecord(); @@ -157,6 +159,7 @@ public NextAction onFailure(Packet packet, CallResponse c @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); logPodDisruptionBudgetPatched(); return doNext(packet); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java index ded9efb6ccc..5d46f81e530 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/ResponseStep.java @@ -3,6 +3,8 @@ package oracle.kubernetes.operator.helpers; +import java.net.ConnectException; +import java.net.SocketTimeoutException; import java.util.Collections; import java.util.Optional; import java.util.stream.Collectors; @@ -25,6 +27,7 @@ import oracle.kubernetes.operator.work.Packet; import oracle.kubernetes.operator.work.Step; import oracle.kubernetes.weblogic.domain.model.DomainCondition; +import oracle.kubernetes.weblogic.domain.model.DomainFailureReason; import oracle.kubernetes.weblogic.domain.model.DomainResource; import static oracle.kubernetes.common.CommonConstants.CRD; @@ -37,6 +40,7 @@ import static oracle.kubernetes.operator.calls.AsyncRequestStep.accessContinue; import static oracle.kubernetes.weblogic.domain.model.DomainConditionType.FAILED; import static oracle.kubernetes.weblogic.domain.model.DomainFailureReason.KUBERNETES; +import static oracle.kubernetes.weblogic.domain.model.DomainFailureReason.KUBERNETES_NETWORK_EXCEPTION; /** * Step to receive response of Kubernetes API server call. @@ -80,6 +84,20 @@ public final void setPrevious(Step previousStep) { this.previousStep = previousStep; } + /** + * Clear out any existing Kubernetes network exception (ConnectException and SocketTimeoutException). + * + * @param packet packet + */ + public static void clearExistingKubernetesNetworkException(Packet packet) { + Optional.ofNullable(packet.getSpi(DomainPresenceInfo.class)) + .map(DomainPresenceInfo::getDomain) + .map(DomainResource::getStatus) + .ifPresent(status -> status.removeConditionsMatching( + c -> c.hasType(FAILED) && KUBERNETES_NETWORK_EXCEPTION == c.getReason())); + } + + @Override public final NextAction apply(Packet packet) { NextAction nextAction = getActionForCallResponse(packet); @@ -233,7 +251,16 @@ private void addDomainFailureStatus(Packet packet, RequestParams requestParams, private void updateFailureStatus( @Nonnull DomainResource domain, RequestParams requestParams, ApiException apiException) { - DomainCondition condition = new DomainCondition(FAILED).withFailureInfo(domain.getSpec()).withReason(KUBERNETES) + DomainFailureReason reason = KUBERNETES; + if (apiException != null) { + LOGGER.fine("updateFailureStatus: apiException: " + apiException.getCause()); + LOGGER.fine("updateFailureStatus: status code: " + apiException.getCode()); + } + if (apiException != null && (apiException.getCause() instanceof ConnectException + || apiException.getCause() instanceof SocketTimeoutException)) { + reason = DomainFailureReason.KUBERNETES_NETWORK_EXCEPTION; + } + DomainCondition condition = new DomainCondition(FAILED).withFailureInfo(domain.getSpec()).withReason(reason) .withMessage(createMessage(requestParams, apiException)); addFailureStatus(domain, condition); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java index ea0860124bf..a73fe223c0a 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/SecretHelper.java @@ -102,6 +102,7 @@ public NextAction onFailure(Packet packet, CallResponse callResponse) @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); V1Secret secret = callResponse.getResult(); packet.getSpi(DomainPresenceInfo.class).setWebLogicCredentialsSecret(secret); insertAuthorizationSource(packet, secret); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/ServiceHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/ServiceHelper.java index b4389a0759d..46c18d32d27 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/ServiceHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/ServiceHelper.java @@ -663,6 +663,7 @@ public NextAction onFailure(Packet packet, CallResponse callResponse) @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); V1Service service = callResponse.getResult(); if (service == null) { removeServiceFromRecord(); @@ -687,6 +688,7 @@ public NextAction onFailure(Packet packet, CallResponse callResponse) @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); return doNext(createReplacementService(getNext()), packet); } } @@ -714,6 +716,7 @@ private NextAction updateDomainStatus(Packet packet, CallResponse cal @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); logServiceCreated(messageKey); addServiceToRecord(callResponse.getResult()); return doNext(packet); diff --git a/operator/src/main/java/oracle/kubernetes/operator/helpers/WebhookHelper.java b/operator/src/main/java/oracle/kubernetes/operator/helpers/WebhookHelper.java index e9f7d08c19b..d0ea0226f7c 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/helpers/WebhookHelper.java +++ b/operator/src/main/java/oracle/kubernetes/operator/helpers/WebhookHelper.java @@ -193,6 +193,7 @@ private class ReadResponseStep extends WebhookConfigResponseStep { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); V1ValidatingWebhookConfiguration existingWebhookConfig = callResponse.getResult(); if (existingWebhookConfig == null) { return doNext(createValidatingWebhookConfiguration(getNext()), packet); @@ -273,6 +274,7 @@ private class CreateResponseStep extends WebhookConfigResponseStep { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); LOGGER.info(VALIDATING_WEBHOOK_CONFIGURATION_CREATED, getName(callResponse.getResult())); return doNext(packet); } @@ -306,6 +308,7 @@ public NextAction onFailure(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); LOGGER.info(MessageKeys.VALIDATING_WEBHOOK_CONFIGURATION_REPLACED, getName(callResponse.getResult())); return doNext(packet); } diff --git a/operator/src/main/java/oracle/kubernetes/operator/steps/DefaultResponseStep.java b/operator/src/main/java/oracle/kubernetes/operator/steps/DefaultResponseStep.java index 81abd02f27a..ec845b2535e 100644 --- a/operator/src/main/java/oracle/kubernetes/operator/steps/DefaultResponseStep.java +++ b/operator/src/main/java/oracle/kubernetes/operator/steps/DefaultResponseStep.java @@ -36,6 +36,7 @@ public NextAction onFailure(Packet packet, CallResponse callResponse) { @Override public NextAction onSuccess(Packet packet, CallResponse callResponse) { + clearExistingKubernetesNetworkException(packet); return doNext(packet); } } diff --git a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/DomainFailureReason.java b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/DomainFailureReason.java index 3164c815a69..4798c1d3a89 100644 --- a/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/DomainFailureReason.java +++ b/operator/src/main/java/oracle/kubernetes/weblogic/domain/model/DomainFailureReason.java @@ -73,6 +73,13 @@ public String getEventError() { return KUBERNETES_EVENT_ERROR; } }, + @SerializedName("KubernetesNetworkException") + KUBERNETES_NETWORK_EXCEPTION("KubernetesNetworkException") { + @Override + public String getEventError() { + return KUBERNETES_EVENT_ERROR; + } + }, @SerializedName("ServerPod") SERVER_POD("ServerPod") { @Override