From 30c3c832e353de6d3e5365b33b2bde1c37d6ecd1 Mon Sep 17 00:00:00 2001 From: shirady <57721533+shirady@users.noreply.github.com> Date: Tue, 29 Oct 2024 08:54:23 +0200 Subject: [PATCH] Docs | STS | Update Docs (noobaa core service account name changed) Signed-off-by: shirady <57721533+shirady@users.noreply.github.com> --- deploy/internal/deployment-endpoint.yaml | 1 + deploy/internal/statefulset-core.yaml | 1 + deploy/operator.yaml | 1 + doc/dev_guide/create_aws_role.sh | 6 +- doc/dev_guide/troubleshoot_sts_cluster.md | 86 ++++++++++++++++++++--- pkg/bundle/deploy.go | 9 ++- 6 files changed, 91 insertions(+), 13 deletions(-) diff --git a/deploy/internal/deployment-endpoint.yaml b/deploy/internal/deployment-endpoint.yaml index c638ab623c..0e6d068e80 100644 --- a/deploy/internal/deployment-endpoint.yaml +++ b/deploy/internal/deployment-endpoint.yaml @@ -22,6 +22,7 @@ spec: annotations: noobaa.io/configmap-hash: "" spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa-endpoint volumes: - name: mgmt-secret diff --git a/deploy/internal/statefulset-core.yaml b/deploy/internal/statefulset-core.yaml index 7fad869911..c76499ecdd 100644 --- a/deploy/internal/statefulset-core.yaml +++ b/deploy/internal/statefulset-core.yaml @@ -21,6 +21,7 @@ spec: annotations: noobaa.io/configmap-hash: "" spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa-core volumes: - name: logs diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 27fb775541..0bf958fedb 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -13,6 +13,7 @@ spec: app: noobaa noobaa-operator: deployment spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa securityContext: seccompProfile: diff --git a/doc/dev_guide/create_aws_role.sh b/doc/dev_guide/create_aws_role.sh index c7fc622d22..3d7fde2780 100755 --- a/doc/dev_guide/create_aws_role.sh +++ b/doc/dev_guide/create_aws_role.sh @@ -15,8 +15,9 @@ ROLE_NAME="shira-28-11" # role name that you pick in your AWS account (replace s NAMESPACE="test1" # namespace name where noobaa will be running (replace test1 with your value) # noobaa variables -SERVICE_ACCOUNT_NAME_1="noobaa" # The service account name of statefulset core and deployment operator +SERVICE_ACCOUNT_NAME_1="noobaa" # The service account name of deployment operator SERVICE_ACCOUNT_NAME_2="noobaa-endpoint" # The service account name of deployment endpoint +SERVICE_ACCOUNT_NAME_3="noobaa-core" # The service account name of statefulset core # AWS variables # Please make sure these values are not empty (AWS_ACCOUNT_ID, OIDC_PROVIDER) @@ -48,7 +49,8 @@ read -r -d '' TRUST_RELATIONSHIP < | grep operator | awk '{ print $1}') -c noobaa-operator -n -- cat /var/run/secrets/openshift/serviceaccount/token) +MY_TOKEN_OPERATOR=$(kubectl exec $(kubectl get pods -n | grep operator | awk '{ print $1}') -c noobaa-operator -n -- cat /var/run/secrets/openshift/serviceaccount/token) ``` ```bash -echo ${MY_TOKEN} +echo ${MY_TOKEN_OPERATOR} ``` And to verify that the issue is with the role please test it with assume-role-with-web-identity ```bash -aws sts assume-role-with-web-identity --role-arn --role-session-name "test" --web-identity-token ${MY_TOKEN} +aws sts assume-role-with-web-identity --role-arn --role-session-name "test" --web-identity-token ${MY_TOKEN_OPERATOR} ``` You should see in the output the credentials (which includes the `AccessKeyId`, `SecretAccessKey`, and `SessionToken` - output example is in file `doc/dev_guide/create_aws_sts_setup_on_minikube.md`, but in case the role is wrong you'll see still `AccessDenied`, so you can create a new role with the script and test it. Note: if this invalid role was sent as a part of OCP cluster you would need to update the subscription that have this environment variable: in odf-operator and mcg-operator subscriptions (in both of them - either in the UI or editing the YAMLs), search for the ROLEARN env name and update the value. -#### 2) Cluster configurations +#### 2) Missing service account name in the role trusted policy +In case the role has a partial trust policy, for example - when this feature was launched in version 5.15 the service account name of stateful-set noobaa core was "noobaa" and in version 5.17 it was changed to "noobaa-core". This would cause `AccessDenied` error when the core pod tries assume-role-with-web-identity (hence any action would fail). + +##### Solution: +The steps to trouble shoot this is to verify that every pod that sends a request using assume-role-with-web-identity (operator, core and endpoint) its mounted token has access - by running the mentioned steps in point number 1 (where it was demonstrated only in the operator pod) you should also run it in the endpoint and core pods. +For example in the core pod as well: + +```bash +MY_TOKEN_CORE=$(kubectl exec $(kubectl get pods -n | grep core | awk '{ print $1}') -n -- cat /var/run/secrets/openshift/serviceaccount/token) +``` + +Notes: +- The next step as described in point number 1 it to run instead of using `MY_TOKEN_OPERATOR` use `MY_TOKEN_CORE`. +- The token that is projected is different between the pods, and it is fine, what matters is the ability to check the assume-role-with-web-identity with each token. You can decoding the JWT tokens by running: + +```bash +${MY_TOKEN} | cut -d '.' -f 2 | base64 -d | jq . +``` + +Partial output (when running on `MY_TOKEN_CORE` instead of `MY_TOKEN`): +Note that *** signifies a redacted field. +In this output you can see that the token service account name is "noobaa-core". +``` +{ + "aud": [ + "openshift" + ], + "exp": 1730137064, + "iat": 1730133464, + "iss": "https://kubernetes.default.svc", + "jti": ***, + "kubernetes.io": { + "namespace": "openshift-storage", + "node": { + "name": ***, + "uid": *** + }, + "pod": { + "name": "noobaa-core-0", + "uid": *** + }, + "serviceaccount": { + "name": "noobaa-core", + "uid": *** + } + }, + "nbf": ***, + "sub": "system:serviceaccount:openshift-storage:noobaa-core" +} +``` + +In any case, better ask the user to attach the trust policy as a part of troubleshooting, and make sure to the needed lines of the service name: + +```json + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "${OIDC_PROVIDER}:sub": [ + "system:serviceaccount:openshift-storage:noobaa", + "system:serviceaccount:openshift-storage:noobaa-core", + "system:serviceaccount:openshift-storage:noobaa-endpoint" + ] + } + } +``` +where OIDC_PROVIDER will be filled according to the OIDC provider, +and the `openshift-storage` is the namespace name (if it runs in a different namespace it would be different). + +In case the system is already running and we need to update the trust policy, you would need to ask the user to update it (can be by simply editing the trust policy in the AWS console), then the check of assume-role-with-web-identity should return the credentials instead of `AccessDenied` error. + +#### 3) Cluster configurations ``` time="2023-11-26T15:17:53Z" level=warning msg="⏳ Temporary Error: could not use AWS AssumeRoleWithWebIdentity with role and web identity token file /var/run/secrets/openshift/serviceaccount/token, InvalidIdentityToken: No OpenIDConnect provider found in your account for https://kubernetes.default.svc\n\tstatus code: 400, request id: " sys=test1/noobaa @@ -61,7 +131,7 @@ The structure of the output should be: 2) In case the OIDC bucket configurations are in an S3 private bucket (with a public CloudFront distribution URL): `d111111abcdef8.cloudfront.net` (this example it taken from [AWS docs](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/GettingStartedCreateDistribution.html)) Please follow the Openshift documentation. -#### 3) Wrong audience +#### 4) Wrong audience ``` time="2023-11-27T08:05:43Z" level=warning msg="⏳ Temporary Error: could not use AWS AssumeRoleWithWebIdentity with role name and web identity token file /var/run/secrets/openshift/serviceaccount/token, InvalidIdentityToken: Incorrect token audience\n\tstatus code: 400, request id: " sys=test1/noobaa @@ -72,7 +142,7 @@ Add the needed audience to match between the create role and the identity provid - api - as we did in the local cluster example in `doc/dev_guide/create_aws_sts_setup_on_minikube.md`. - openshift - as needed in the openshift cluster. -#### 4) Missing details: +#### 5) Missing details: ``` time="2023-11-27T07:50:20Z" level=info msg="Secret noobaa-aws-cloud-creds-secret was created successfully by cloud-credentials operator" sys=test1/noobaa @@ -130,7 +200,7 @@ kubectl delete secret noobaa-aws-cloud-creds-secret -n kubectl logs $(kubectl get pod -n openshift-cloud-credential-operator | grep cloud-credential-operator | awk '{ print $1}') -c cloud-credential-operator -n openshift-cloud-credential-operator --tail 50 -f ``` -#### 4) Other: +#### 5) Other: ``` time="2023-12-20T09:46:59Z" level=info msg="AssumeRoleWithWebIdentityInput, roleARN = arn:aws:iam:::role/ webIdentityTokenPath = /var/run/secrets/openshift/serviceaccount/token, " sys=openshift-storage/noobaa diff --git a/pkg/bundle/deploy.go b/pkg/bundle/deploy.go index d54fa632ad..03b2b68030 100644 --- a/pkg/bundle/deploy.go +++ b/pkg/bundle/deploy.go @@ -3820,7 +3820,7 @@ data: shared_preload_libraries = 'pg_stat_statements' ` -const Sha256_deploy_internal_deployment_endpoint_yaml = "0784d71f1a50b8b2f216adb957ea4ce90392e39981bd584dd5e98272327a99c2" +const Sha256_deploy_internal_deployment_endpoint_yaml = "21b206c9119e37c4ebba84d5c1e2b1d45b06c716b4def69db9ba9268ef75e1e1" const File_deploy_internal_deployment_endpoint_yaml = `apiVersion: apps/v1 kind: Deployment @@ -3846,6 +3846,7 @@ spec: annotations: noobaa.io/configmap-hash: "" spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa-endpoint volumes: - name: mgmt-secret @@ -4880,7 +4881,7 @@ spec: noobaa-s3-svc: "true" ` -const Sha256_deploy_internal_statefulset_core_yaml = "447d0c9d6831eb9074e8648609614268430b4d0f89d618a4c9a250053f858290" +const Sha256_deploy_internal_statefulset_core_yaml = "50e5b11d8e0a2f2bb8a6db8d154b34b6569e160fa7ad2b1fb154001b36c8a152" const File_deploy_internal_statefulset_core_yaml = `apiVersion: apps/v1 kind: StatefulSet @@ -4905,6 +4906,7 @@ spec: annotations: noobaa.io/configmap-hash: "" spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa-core volumes: - name: logs @@ -5955,7 +5957,7 @@ spec: sourceNamespace: default ` -const Sha256_deploy_operator_yaml = "f1d3f744af5e55b5476c085c10425f93837cf0bdf39d206f3857d3c5e9bc6c78" +const Sha256_deploy_operator_yaml = "5399fbfcd1c421acd978f2762d6f8c5048d68fb3c5acc79a595d62cd035a3bc0" const File_deploy_operator_yaml = `apiVersion: apps/v1 kind: Deployment @@ -5972,6 +5974,7 @@ spec: app: noobaa noobaa-operator: deployment spec: + # Notice that changing the serviceAccountName would need to update existing AWS STS role trust policy for customers serviceAccountName: noobaa securityContext: seccompProfile: