From d86082a51e5221851a8bc86cdc3d4c56667a2487 Mon Sep 17 00:00:00 2001 From: Kashif Khan Date: Wed, 11 Dec 2024 13:12:40 +0200 Subject: [PATCH 1/2] Add RBAC files for metrics authentication and authorization https://github.com/metal3-io/baremetal-operator/pull/2102 has introduced controller-runtime's WithAuthenticationAndAuthorization filter which also requires extra RBAC roles and role bindings for metrics authentication and authorization. This PR adds those. Signed-off-by: Kashif Khan --- config/base/rbac/kustomization.yaml | 10 ++++ config/base/rbac/metrics_auth_role.yaml | 17 ++++++ .../base/rbac/metrics_auth_role_binding.yaml | 12 ++++ config/base/rbac/metrics_reader_role.yaml | 9 +++ config/base/rbac/metrics_service.yaml | 14 +++++ config/overlays/e2e/kustomization.yaml | 8 +-- config/render/capm3.yaml | 56 +++++++++++++++++++ 7 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 config/base/rbac/metrics_auth_role.yaml create mode 100644 config/base/rbac/metrics_auth_role_binding.yaml create mode 100644 config/base/rbac/metrics_reader_role.yaml create mode 100644 config/base/rbac/metrics_service.yaml diff --git a/config/base/rbac/kustomization.yaml b/config/base/rbac/kustomization.yaml index 166fe79868..161e0f8630 100644 --- a/config/base/rbac/kustomization.yaml +++ b/config/base/rbac/kustomization.yaml @@ -9,3 +9,13 @@ resources: - role_binding.yaml - leader_election_role.yaml - leader_election_role_binding.yaml +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml +- metrics_service.yaml diff --git a/config/base/rbac/metrics_auth_role.yaml b/config/base/rbac/metrics_auth_role.yaml new file mode 100644 index 0000000000..32d2e4ec6b --- /dev/null +++ b/config/base/rbac/metrics_auth_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/base/rbac/metrics_auth_role_binding.yaml b/config/base/rbac/metrics_auth_role_binding.yaml new file mode 100644 index 0000000000..e775d67ff0 --- /dev/null +++ b/config/base/rbac/metrics_auth_role_binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: metrics-auth-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/config/base/rbac/metrics_reader_role.yaml b/config/base/rbac/metrics_reader_role.yaml new file mode 100644 index 0000000000..51a75db47a --- /dev/null +++ b/config/base/rbac/metrics_reader_role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/config/base/rbac/metrics_service.yaml b/config/base/rbac/metrics_service.yaml new file mode 100644 index 0000000000..6cf656be14 --- /dev/null +++ b/config/base/rbac/metrics_service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + targetPort: https + selector: + control-plane: controller-manager diff --git a/config/overlays/e2e/kustomization.yaml b/config/overlays/e2e/kustomization.yaml index 99c0f10336..1711876448 100644 --- a/config/overlays/e2e/kustomization.yaml +++ b/config/overlays/e2e/kustomization.yaml @@ -31,7 +31,7 @@ generatorOptions: # NOTE: These credentials are generated automatically in hack/ci-e2e.sh secretGenerator: - - name: ironic-credentials - files: - - username=ironic-username - - password=ironic-password +- name: ironic-credentials + files: + - username=ironic-username + - password=ironic-password diff --git a/config/render/capm3.yaml b/config/render/capm3.yaml index ec2d378fea..83a3561351 100644 --- a/config/render/capm3.yaml +++ b/config/render/capm3.yaml @@ -2370,6 +2370,34 @@ rules: - update --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: baremetal-operator-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: baremetal-operator-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: baremetal-operator-leader-election-rolebinding @@ -2396,6 +2424,19 @@ subjects: name: baremetal-operator-controller-manager namespace: baremetal-operator-system --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: baremetal-operator-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: baremetal-operator-metrics-auth-role +subjects: +- kind: ServiceAccount + name: baremetal-operator-controller-manager + namespace: baremetal-operator-system +--- apiVersion: v1 data: controller_manager_config.yaml: | @@ -2431,6 +2472,21 @@ metadata: --- apiVersion: v1 kind: Service +metadata: + labels: + control-plane: controller-manager + name: baremetal-operator-controller-manager-metrics-service + namespace: baremetal-operator-system +spec: + ports: + - name: https + port: 8443 + targetPort: https + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service metadata: name: baremetal-operator-webhook-service namespace: baremetal-operator-system From 93f5f4a4200dbdf1ca9adf85c25769fc75763bb9 Mon Sep 17 00:00:00 2001 From: Kashif Khan Date: Thu, 12 Dec 2024 10:15:48 +0200 Subject: [PATCH 2/2] Add e2e test for metrics service Signed-off-by: Kashif Khan --- .golangci.yaml | 1 + config/base/manager.yaml | 94 ++++++++++++++-------------- config/render/capm3.yaml | 3 + main.go | 4 +- test/e2e/e2e_suite_test.go | 123 +++++++++++++++++++++++++++++++++++++ 5 files changed, 179 insertions(+), 46 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index ec0300a8b0..87266094c9 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -120,6 +120,7 @@ issues: linters: - gci - goconst + - gosec - path: _test\.go linters: - errcheck diff --git a/config/base/manager.yaml b/config/base/manager.yaml index 26463d50cf..0cbfec0468 100644 --- a/config/base/manager.yaml +++ b/config/base/manager.yaml @@ -19,51 +19,55 @@ spec: webhook: metal3-io-v1alpha1-baremetalhost spec: containers: - - command: - - /baremetal-operator - args: - - --enable-leader-election - image: quay.io/metal3-io/baremetal-operator - imagePullPolicy: Always - env: - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - envFrom: - - configMapRef: - name: ironic - name: manager - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - runAsUser: 65532 - runAsGroup: 65532 - livenessProbe: - httpGet: - path: /healthz - port: 9440 - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 2 - successThreshold: 1 - failureThreshold: 10 - readinessProbe: - httpGet: - path: /readyz - port: 9440 - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 2 - successThreshold: 1 - failureThreshold: 10 + - command: + - /baremetal-operator + args: + - --enable-leader-election + ports: + - containerPort: 8443 + protocol: TCP + name: https + image: quay.io/metal3-io/baremetal-operator + imagePullPolicy: Always + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + envFrom: + - configMapRef: + name: ironic + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + runAsUser: 65532 + runAsGroup: 65532 + livenessProbe: + httpGet: + path: /healthz + port: 9440 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + successThreshold: 1 + failureThreshold: 10 + readinessProbe: + httpGet: + path: /readyz + port: 9440 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 2 + successThreshold: 1 + failureThreshold: 10 terminationGracePeriodSeconds: 10 securityContext: runAsNonRoot: true diff --git a/config/render/capm3.yaml b/config/render/capm3.yaml index 83a3561351..e364080c4a 100644 --- a/config/render/capm3.yaml +++ b/config/render/capm3.yaml @@ -2551,6 +2551,9 @@ spec: - containerPort: 9443 name: webhook-server protocol: TCP + - containerPort: 8443 + name: https + protocol: TCP readinessProbe: failureThreshold: 10 httpGet: diff --git a/main.go b/main.go index ad2906c866..38a703ceb8 100644 --- a/main.go +++ b/main.go @@ -137,7 +137,7 @@ func main() { // namespace. flag.StringVar(&watchNamespace, "namespace", os.Getenv("WATCH_NAMESPACE"), "Namespace that the controller watches to reconcile host resources.") - flag.StringVar(&metricsBindAddr, "metrics-addr", "127.0.0.1:8085", + flag.StringVar(&metricsBindAddr, "metrics-addr", ":8443", "The address the metric endpoint binds to.") flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. "+ @@ -217,7 +217,9 @@ func main() { Scheme: scheme, Metrics: metricsserver.Options{ BindAddress: metricsBindAddr, + SecureServing: true, FilterProvider: filters.WithAuthenticationAndAuthorization, + TLSOpts: tlsOptionOverrides, }, WebhookServer: webhook.NewServer(webhook.Options{ Port: webhookPort, diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index a4edf7a73a..56f66893f0 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -2,11 +2,15 @@ package e2e import ( "context" + "encoding/json" "flag" + "fmt" "os" + "os/exec" "path/filepath" "strings" "testing" + "time" metal3api "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" . "github.com/onsi/ginkgo/v2" @@ -76,6 +80,69 @@ func TestE2e(t *testing.T) { RunSpecs(t, "E2e Suite") } +const namespace = "baremetal-operator-system" +const serviceAccountName = "baremetal-operator-controller-manager" +const metricsServiceName = "baremetal-operator-controller-manager-metrics-service" +const metricsRoleBindingName = "baremetal-operator-metrics-binding" + +// serviceAccountToken returns a token for the specified service account in the given namespace. +// It uses the Kubernetes TokenRequest API to generate a token by directly sending a request +// and parsing the resulting token from the API response. +func serviceAccountToken() (string, error) { + const tokenRequestRawString = `{ + "apiVersion": "authentication.k8s.io/v1", + "kind": "TokenRequest" + }` + + // Temporary file to store the token request + secretName := fmt.Sprintf("%s-token-request", serviceAccountName) + tokenRequestFile := filepath.Join("/tmp", secretName) //nolint: gocritic + err := os.WriteFile(tokenRequestFile, []byte(tokenRequestRawString), os.FileMode(0o644)) + if err != nil { + return "", err + } + + var out string + verifyTokenCreation := func(g Gomega) { + // Execute kubectl command to create the token + cmd := exec.Command("kubectl", "create", "--raw", fmt.Sprintf( + "/api/v1/namespaces/%s/serviceaccounts/%s/token", + namespace, + serviceAccountName, + ), "-f", tokenRequestFile) + + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + + // Parse the JSON output to extract the token + var token tokenRequest + err = json.Unmarshal(output, &token) + g.Expect(err).NotTo(HaveOccurred()) + + out = token.Status.Token + } + Eventually(verifyTokenCreation).Should(Succeed()) + + return out, err +} + +// tokenRequest is a simplified representation of the Kubernetes TokenRequest API response, +// containing only the token field that we need to extract. +type tokenRequest struct { + Status struct { + Token string `json:"token"` + } `json:"status"` +} + +// getMetricsOutput retrieves and returns the logs from the curl pod used to access the metrics endpoint. +func getMetricsOutput() string { + By("getting the curl-metrics logs") + cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", namespace) + metricsOutput, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to retrieve logs from curl pod") + return string(metricsOutput) +} + var _ = SynchronizedBeforeSuite(func() []byte { var kubeconfigPath string @@ -158,6 +225,62 @@ var _ = SynchronizedBeforeSuite(func() []byte { Expect(err).NotTo(HaveOccurred()) } + // Metrics test start + By("creating a ClusterRoleBinding for the service account to allow access to metrics") + cmd := exec.Command("kubectl", "create", "clusterrolebinding", metricsRoleBindingName, + "--clusterrole=baremetal-operator-metrics-reader", + fmt.Sprintf("--serviceaccount=%s:%s", namespace, serviceAccountName), + ) + _, err := cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to create ClusterRoleBinding") + + By("validating that the metrics service is available") + cmd = exec.Command("kubectl", "get", "service", metricsServiceName, "-n", namespace) + _, err = cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Metrics service should exist") + + By("getting the service account token") + token, err := serviceAccountToken() + Expect(err).NotTo(HaveOccurred()) + Expect(token).NotTo(BeEmpty()) + + By("waiting for the metrics endpoint to be ready") + verifyMetricsEndpointReady := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "endpoints", metricsServiceName, "-n", namespace) + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).To(ContainSubstring("8443"), "Metrics endpoint is not ready") + } + Eventually(verifyMetricsEndpointReady).Should(Succeed()) + + By("creating the curl-metrics pod to access the metrics endpoint") + cmd = exec.Command("kubectl", "run", "curl-metrics", "--restart=Never", + "--namespace", namespace, + "--image=curlimages/curl:7.87.0", + "--command", + "--", "curl", "-v", "--tlsv1.3", "-k", "-H", fmt.Sprintf("Authorization:Bearer %s", token), + fmt.Sprintf("https://%s.%s.svc.cluster.local:8443/metrics", metricsServiceName, namespace)) + _, err = cmd.CombinedOutput() + Expect(err).NotTo(HaveOccurred(), "Failed to create curl-metrics pod") + + By("waiting for the curl-metrics pod to complete.") + verifyCurlUp := func(g Gomega) { + cmd := exec.Command("kubectl", "get", "pods", "curl-metrics", + "-o", "jsonpath={.status.phase}", + "-n", namespace) + output, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(string(output)).To(Equal("Succeeded"), "curl pod in wrong status") + } + Eventually(verifyCurlUp, 5*time.Minute).Should(Succeed()) + + By("getting the metrics by checking curl-metrics logs") + metricsOutput := getMetricsOutput() + Expect(metricsOutput).To(ContainSubstring( + "controller_runtime_reconcile_total", + )) + // Metrics test end + return []byte(strings.Join([]string{clusterProxy.GetKubeconfigPath()}, ",")) }, func(data []byte) { // Before each parallel node