From d7386c642e707ff1da8c280a5ad853adb6469a4b Mon Sep 17 00:00:00 2001 From: Robert Grandl Date: Thu, 6 Jun 2024 15:17:45 -0700 Subject: [PATCH] Disable TLS --- cmd/weaver-gke/controller.go | 7 +-- cmd/weaver-gke/distributor.go | 7 +-- cmd/weaver-gke/manager.go | 7 +-- internal/gke/babysitter.go | 34 ++++++++++--- internal/gke/container.pb.go | 21 ++++++-- internal/gke/container.proto | 1 + internal/gke/deploy.go | 78 +++++++++++++++++----------- internal/gke/gke.go | 27 +++++----- internal/gke/nanny.go | 96 ++++++++++++++++++++++++++--------- 9 files changed, 188 insertions(+), 90 deletions(-) diff --git a/cmd/weaver-gke/controller.go b/cmd/weaver-gke/controller.go index c96cdf1..2c01124 100644 --- a/cmd/weaver-gke/controller.go +++ b/cmd/weaver-gke/controller.go @@ -23,8 +23,9 @@ import ( ) var ( - controllerFlags = flag.NewFlagSet("controller", flag.ContinueOnError) - controllerPort = controllerFlags.Int("port", 0, "Controller port") + controllerFlags = flag.NewFlagSet("controller", flag.ContinueOnError) + controllerPort = controllerFlags.Int("port", 0, "Controller port") + controllerMtlsEnabled = controllerFlags.Bool("mtls", false, "Whether controller uses MTLS") ) var controllerCmd = tool.Command{ @@ -37,7 +38,7 @@ var controllerCmd = tool.Command{ Flags: -h, --help Print this help message.`, Fn: func(ctx context.Context, args []string) error { - return gke.RunController(ctx, *controllerPort) + return gke.RunController(ctx, *controllerPort, *controllerMtlsEnabled) }, Hidden: true, } diff --git a/cmd/weaver-gke/distributor.go b/cmd/weaver-gke/distributor.go index a208d3a..bb051de 100644 --- a/cmd/weaver-gke/distributor.go +++ b/cmd/weaver-gke/distributor.go @@ -23,8 +23,9 @@ import ( ) var ( - distributorFlags = flag.NewFlagSet("distributor", flag.ContinueOnError) - distributorPort = distributorFlags.Int("port", 0, "Distributor port") + distributorFlags = flag.NewFlagSet("distributor", flag.ContinueOnError) + distributorPort = distributorFlags.Int("port", 0, "Distributor port") + distributorMtlsEnabled = distributorFlags.Bool("mtls", false, "Whether distributor uses MTLS") ) var distributorCmd = tool.Command{ @@ -37,7 +38,7 @@ var distributorCmd = tool.Command{ Flags: -h, --help Print this help message.`, Fn: func(ctx context.Context, args []string) error { - return gke.RunDistributor(ctx, *distributorPort) + return gke.RunDistributor(ctx, *distributorPort, *distributorMtlsEnabled) }, Hidden: true, } diff --git a/cmd/weaver-gke/manager.go b/cmd/weaver-gke/manager.go index 67e6561..44f6747 100644 --- a/cmd/weaver-gke/manager.go +++ b/cmd/weaver-gke/manager.go @@ -23,8 +23,9 @@ import ( ) var ( - managerFlags = flag.NewFlagSet("manager", flag.ContinueOnError) - managerPort = managerFlags.Int("port", 0, "Manager port") + managerFlags = flag.NewFlagSet("manager", flag.ContinueOnError) + managerPort = managerFlags.Int("port", 0, "Manager port") + managerMtlsEnabled = managerFlags.Bool("mtls", false, "Whether manager uses MTLS") ) var managerCmd = tool.Command{ @@ -37,7 +38,7 @@ var managerCmd = tool.Command{ Flags: -h, --help Print this help message.`, Fn: func(ctx context.Context, args []string) error { - return gke.RunManager(ctx, *managerPort) + return gke.RunManager(ctx, *managerPort, *managerMtlsEnabled) }, Hidden: true, } diff --git a/internal/gke/babysitter.go b/internal/gke/babysitter.go index 0386a2f..22d1502 100644 --- a/internal/gke/babysitter.go +++ b/internal/gke/babysitter.go @@ -16,6 +16,8 @@ package gke import ( "context" + "crypto/tls" + "crypto/x509" "fmt" "log/slog" "net" @@ -137,16 +139,24 @@ func RunBabysitter(ctx context.Context) error { return metricsExporter.Export(ctx, metrics, cfg.Telemetry.Metrics.AutoGenerateMetrics) } - caCert, getSelfCert, err := getPodCerts() - if err != nil { - return err + var caCert *x509.Certificate + var getSelfCert func() ([]byte, []byte, error) + if meta.MtlsEnabled { + caCert, getSelfCert, err = getPodCerts() + if err != nil { + return err + } } // Create an unique http client to the manager, that will be reused across all // the http requests to the manager. + var tlsConfig *tls.Config + if meta.MtlsEnabled { + tlsConfig = mtls.ClientTLSConfig(meta.Project, caCert, getSelfCert, "manager") + } m := &manager.HttpClient{ Addr: cfg.ManagerAddr, - Client: makeHttpClient(mtls.ClientTLSConfig(meta.Project, caCert, getSelfCert, "manager")), + Client: makeHttpClient(tlsConfig), } mux := http.NewServeMux() host, err := os.Hostname() @@ -158,17 +168,25 @@ func RunBabysitter(ctx context.Context) error { if err != nil { return err } - selfAddr := fmt.Sprintf("https://%s", lis.Addr()) + var selfAddr string + if meta.MtlsEnabled { + selfAddr = fmt.Sprintf("https://%s", lis.Addr()) + } else { + selfAddr = fmt.Sprintf("http://%s", lis.Addr()) + } _, err = babysitter.Start(ctx, logger, cfg, replicaSet, meta.Project, meta.PodName, internalAddress, mux, selfAddr, m, caCert, getSelfCert, logSaver, traceSaver, metricSaver) if err != nil { return err } server := &http.Server{ - Handler: mux, - TLSConfig: mtls.ServerTLSConfig(meta.Project, caCert, getSelfCert, "manager", "distributor"), + Handler: mux, + } + if meta.MtlsEnabled { + server.TLSConfig = mtls.ServerTLSConfig(meta.Project, caCert, getSelfCert, "manager", "distributor") + return server.ServeTLS(lis, "", "") } - return server.ServeTLS(lis, "", "") + return server.Serve(lis) } // gkeConfigFromEnv reads config.GKEConfig from the Service Weaver internal diff --git a/internal/gke/container.pb.go b/internal/gke/container.pb.go index 1026fdf..1ab8674 100644 --- a/internal/gke/container.pb.go +++ b/internal/gke/container.pb.go @@ -86,6 +86,7 @@ type ContainerMetadata struct { ContainerName string `protobuf:"bytes,7,opt,name=container_name,json=containerName,proto3" json:"container_name,omitempty"` // Kubernetes container name App string `protobuf:"bytes,8,opt,name=app,proto3" json:"app,omitempty"` // Kubernetes app label Telemetry *config.Telemetry `protobuf:"bytes,9,opt,name=telemetry,proto3" json:"telemetry,omitempty"` // Options to configure the telemetry + MtlsEnabled bool `protobuf:"varint,10,opt,name=mtls_enabled,json=mtlsEnabled,proto3" json:"mtls_enabled,omitempty"` // Whether MTLS should be enabled } func (x *ContainerMetadata) Reset() { @@ -183,6 +184,13 @@ func (x *ContainerMetadata) GetTelemetry() *config.Telemetry { return nil } +func (x *ContainerMetadata) GetMtlsEnabled() bool { + if x != nil { + return x.MtlsEnabled + } + return false +} + var File_internal_gke_container_proto protoreflect.FileDescriptor var file_internal_gke_container_proto_rawDesc = []byte{ @@ -190,7 +198,7 @@ var file_internal_gke_container_proto_rawDesc = []byte{ 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x03, 0x67, 0x6b, 0x65, 0x1a, 0x1c, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x22, 0xb7, 0x02, 0x0a, 0x11, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x4d, + 0x6f, 0x22, 0xda, 0x02, 0x0a, 0x11, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x72, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, 0x6e, 0x61, 0x6d, @@ -209,10 +217,13 @@ var file_internal_gke_container_proto_rawDesc = []byte{ 0x08, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x61, 0x70, 0x70, 0x12, 0x2f, 0x0a, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x18, 0x09, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, - 0x52, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x42, 0x29, 0x5a, 0x27, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x57, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2f, 0x77, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2d, 0x67, - 0x6b, 0x65, 0x3b, 0x67, 0x6b, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x52, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x12, 0x21, 0x0a, 0x0c, 0x6d, + 0x74, 0x6c, 0x73, 0x5f, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x18, 0x0a, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x0b, 0x6d, 0x74, 0x6c, 0x73, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x42, 0x29, + 0x5a, 0x27, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x63, 0x65, 0x57, 0x65, 0x61, 0x76, 0x65, 0x72, 0x2f, 0x77, 0x65, 0x61, 0x76, 0x65, + 0x72, 0x2d, 0x67, 0x6b, 0x65, 0x3b, 0x67, 0x6b, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x33, } var ( diff --git a/internal/gke/container.proto b/internal/gke/container.proto index 5186fda..80272c3 100644 --- a/internal/gke/container.proto +++ b/internal/gke/container.proto @@ -65,4 +65,5 @@ message ContainerMetadata { string container_name = 7; // Kubernetes container name string app = 8; // Kubernetes app label config.Telemetry telemetry = 9; // Options to configure the telemetry + bool mtls_enabled = 10; // Whether MTLS should be enabled } diff --git a/internal/gke/deploy.go b/internal/gke/deploy.go index f08d745..108f35d 100644 --- a/internal/gke/deploy.go +++ b/internal/gke/deploy.go @@ -452,8 +452,10 @@ func prepareProject(ctx context.Context, config CloudConfig, cfg *config.GKEConf } // Setup the Certificate Authority. - if err := ensureCA(ctx, config); err != nil { - return nil, "", err + if cfg.Mtls { + if err := ensureCA(ctx, config); err != nil { + return nil, "", err + } } // Ensure the Service Weaver configuration cluster is setup. @@ -481,8 +483,14 @@ func buildRolloutRequest(cfg *config.GKEConfig) *controller.RolloutRequest { for _, region := range cfg.Regions { // NOTE: distributor address must be resolvable from anywhere inside // the project's VPC. - distributorAddr := - fmt.Sprintf("https://distributor.%s.svc.%s-%s:80", namespaceName, applicationClusterName, region) + var distributorAddr string + if cfg.Mtls { + distributorAddr = + fmt.Sprintf("https://distributor.%s.svc.%s-%s:80", namespaceName, applicationClusterName, region) + } else { + distributorAddr = + fmt.Sprintf("http://distributor.%s.svc.%s-%s:80", namespaceName, applicationClusterName, region) + } req.Locations = append(req.Locations, &controller.RolloutRequest_Location{ Name: region, DistributorAddr: distributorAddr, @@ -1042,7 +1050,7 @@ func ensureConfigCluster(ctx context.Context, config CloudConfig, cfg *config.GK if err != nil { return nil, "", err } - cluster, err := ensureManagedCluster(ctx, config, name, region) + cluster, err := ensureManagedCluster(ctx, config, name, region, cfg.Mtls) if err != nil { return nil, "", err } @@ -1110,7 +1118,7 @@ func ensureConfigCluster(ctx context.Context, config CloudConfig, cfg *config.GK // It returns the cluster information and the IP address of the gateway that // routes internal traffic to Service Weaver applications in the cluster. func ensureApplicationCluster(ctx context.Context, config CloudConfig, cfg *config.GKEConfig, region string) (*ClusterInfo, string, error) { - cluster, err := ensureManagedCluster(ctx, config, applicationClusterName, region) + cluster, err := ensureManagedCluster(ctx, config, applicationClusterName, region, cfg.Mtls) if err != nil { return nil, "", err } @@ -1254,7 +1262,7 @@ func ensureApplicationCluster(ctx context.Context, config CloudConfig, cfg *conf // ensureManagedCluster ensures that a Service Weaver managed cluster is available // and running in the given region. -func ensureManagedCluster(ctx context.Context, config CloudConfig, name, region string) (*ClusterInfo, error) { +func ensureManagedCluster(ctx context.Context, config CloudConfig, name, region string, mtlsEnabled bool) (*ClusterInfo, error) { exists, err := hasCluster(ctx, config, name, region) if err != nil { return nil, err @@ -1380,14 +1388,16 @@ func ensureManagedCluster(ctx context.Context, config CloudConfig, name, region return nil, err } - // Setup the workload certificate config in the cluster. - if err := ensureWorkloadCertificateConfig(ctx, cluster); err != nil { - return nil, err - } + if mtlsEnabled { + // Setup the workload certificate config in the cluster. + if err := ensureWorkloadCertificateConfig(ctx, cluster); err != nil { + return nil, err + } - // Setup the trust config in the cluster. - if err := ensureTrustConfig(ctx, cluster); err != nil { - return nil, err + // Setup the trust config in the cluster. + if err := ensureTrustConfig(ctx, cluster); err != nil { + return nil, err + } } // Scale down resources used by system services. @@ -1848,7 +1858,7 @@ func ensureWeaverServices(ctx context.Context, config CloudConfig, cfg *config.G if err != nil { return err } - if err := ensureController(ctx, config, name, region, cfg.Telemetry, toolImageURL); err != nil { + if err := ensureController(ctx, config, name, region, cfg.Telemetry, cfg.Mtls, toolImageURL); err != nil { return err } for _, region := range cfg.Regions { @@ -1856,10 +1866,10 @@ func ensureWeaverServices(ctx context.Context, config CloudConfig, cfg *config.G if err != nil { return err } - if err := ensureDistributor(ctx, cluster, cfg.Telemetry, toolImageURL); err != nil { + if err := ensureDistributor(ctx, cluster, cfg.Telemetry, cfg.Mtls, toolImageURL); err != nil { return err } - if err := ensureManager(ctx, cluster, cfg.Telemetry, toolImageURL); err != nil { + if err := ensureManager(ctx, cluster, cfg.Telemetry, cfg.Mtls, toolImageURL); err != nil { return err } } @@ -1868,13 +1878,13 @@ func ensureWeaverServices(ctx context.Context, config CloudConfig, cfg *config.G // ensureController ensures that a controller is running in the config cluster. func ensureController(ctx context.Context, config CloudConfig, clusterName, region string, - telemetry *config.Telemetry, toolImageURL string) error { + telemetry *config.Telemetry, mtlsEnabled bool, toolImageURL string) error { cluster, err := GetClusterInfo(ctx, config, clusterName, region) if err != nil { return err } const name = "controller" - if err := ensureNannyDeployment(ctx, cluster, name, controllerKubeServiceAccount, telemetry, toolImageURL); err != nil { + if err := ensureNannyDeployment(ctx, cluster, name, controllerKubeServiceAccount, telemetry, mtlsEnabled, toolImageURL); err != nil { return err } if err := ensureNannyVerticalPodAutoscaler(ctx, cluster, name); err != nil { @@ -1885,9 +1895,9 @@ func ensureController(ctx context.Context, config CloudConfig, clusterName, regi // ensureDistributor ensures that a distributor is running in the given cluster. func ensureDistributor(ctx context.Context, cluster *ClusterInfo, - telemetry *config.Telemetry, toolImageURL string) error { + telemetry *config.Telemetry, mtlsEnabled bool, toolImageURL string) error { const name = "distributor" - if err := ensureNannyDeployment(ctx, cluster, name, distributorKubeServiceAccount, telemetry, toolImageURL); err != nil { + if err := ensureNannyDeployment(ctx, cluster, name, distributorKubeServiceAccount, telemetry, mtlsEnabled, toolImageURL); err != nil { return err } if err := ensureNannyVerticalPodAutoscaler(ctx, cluster, name); err != nil { @@ -1898,9 +1908,9 @@ func ensureDistributor(ctx context.Context, cluster *ClusterInfo, // ensureManager ensures that a manager is running in the given cluster. func ensureManager(ctx context.Context, cluster *ClusterInfo, - telemetry *config.Telemetry, toolImageURL string) error { + telemetry *config.Telemetry, mtlsEnabled bool, toolImageURL string) error { const name = "manager" - if err := ensureNannyDeployment(ctx, cluster, name, managerKubeServiceAccount, telemetry, toolImageURL); err != nil { + if err := ensureNannyDeployment(ctx, cluster, name, managerKubeServiceAccount, telemetry, mtlsEnabled, toolImageURL); err != nil { return err } if err := ensureNannyVerticalPodAutoscaler(ctx, cluster, name); err != nil { @@ -1912,7 +1922,7 @@ func ensureManager(ctx context.Context, cluster *ClusterInfo, // ensureNannyDeployment ensures that a nanny deployment with the given name // and service account is running in the given cluster. func ensureNannyDeployment(ctx context.Context, cluster *ClusterInfo, name string, - serviceAccount string, telemetry *config.Telemetry, toolImageURL string) error { + serviceAccount string, telemetry *config.Telemetry, mtlsEnabled bool, toolImageURL string) error { meta := ContainerMetadata{ Project: cluster.CloudConfig.Project, ClusterName: cluster.Name, @@ -1921,6 +1931,7 @@ func ensureNannyDeployment(ctx context.Context, cluster *ClusterInfo, name strin ContainerName: nannyContainerName, App: name, Telemetry: telemetry, + MtlsEnabled: mtlsEnabled, } metaStr, err := proto.ToEnv(&meta) if err != nil { @@ -1950,6 +1961,16 @@ func ensureNannyDeployment(ctx context.Context, cluster *ClusterInfo, name strin } return oldTag < newTag, nil } + + objectMetaTmpl := metav1.ObjectMeta{ + Labels: map[string]string{"app": name}, + } + if mtlsEnabled { + objectMetaTmpl.Annotations = map[string]string{ + "security.cloud.google.com/use-workload-certificates": "", + } + } + return patchDeployment(ctx, cluster, patchOptions{}, shouldUpdate, &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -1960,12 +1981,7 @@ func ensureNannyDeployment(ctx context.Context, cluster *ClusterInfo, name strin MatchLabels: map[string]string{"app": name}, }, Template: v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{"app": name}, - Annotations: map[string]string{ - "security.cloud.google.com/use-workload-certificates": "", - }, - }, + ObjectMeta: objectMetaTmpl, Spec: v1.PodSpec{ PriorityClassName: controlPriorityClassName, Containers: []v1.Container{ @@ -1973,7 +1989,7 @@ func ensureNannyDeployment(ctx context.Context, cluster *ClusterInfo, name strin Name: name, Image: toolImageURL, Args: []string{ - fmt.Sprintf("/weaver/weaver-gke %s --port=%d", name, nannyServingPort), + fmt.Sprintf("/weaver/weaver-gke %s --port=%d --mtls=%v", name, nannyServingPort, mtlsEnabled), }, Resources: v1.ResourceRequirements{ Requests: v1.ResourceList{ diff --git a/internal/gke/gke.go b/internal/gke/gke.go index 293991d..419d4b8 100644 --- a/internal/gke/gke.go +++ b/internal/gke/gke.go @@ -200,7 +200,7 @@ func multiplyQuantity(mult int64, x resource.Quantity) resource.Quantity { // deploy deploys the Kubernetes ReplicaSet in the given cluster. func deploy(ctx context.Context, cluster *ClusterInfo, logger *slog.Logger, cfg *config.GKEConfig, replicaSet string) error { - if err := ensureReplicaSet(ctx, cluster, logger, cfg, replicaSet); err != nil { + if err := ensureReplicaSet(ctx, cluster, logger, cfg, replicaSet, cfg.Mtls); err != nil { return err } @@ -363,7 +363,7 @@ func Store(cluster *ClusterInfo) store.Store { return newKubeStore(cluster.Clientset.CoreV1().ConfigMaps(namespaceName)) } -func ensureReplicaSet(ctx context.Context, cluster *ClusterInfo, logger *slog.Logger, cfg *config.GKEConfig, replicaSet string) error { +func ensureReplicaSet(ctx context.Context, cluster *ClusterInfo, logger *slog.Logger, cfg *config.GKEConfig, replicaSet string, mtlsEnabled bool) error { dep := cfg.Deployment kubeName := name{dep.App.Name, replicaSet, dep.Id[:8]}.DNSLabel() saName := cfg.ComponentIdentity[replicaSet] @@ -372,6 +372,18 @@ func ensureReplicaSet(ctx context.Context, cluster *ClusterInfo, logger *slog.Lo return err } + objectMetaTmpl := metav1.ObjectMeta{ + Labels: map[string]string{ + appKey: name{dep.App.Name}.DNSLabel(), + versionKey: name{dep.Id}.DNSLabel(), + replicaSetKey: name{replicaSet}.DNSLabel(), + }, + } + if mtlsEnabled { + objectMetaTmpl.Annotations = map[string]string{ + "security.cloud.google.com/use-workload-certificates": "", + } + } return patchDeployment(ctx, cluster, patchOptions{logger: logger}, nil /*shouldUpdate*/, &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: kubeName, @@ -391,16 +403,7 @@ func ensureReplicaSet(ctx context.Context, cluster *ClusterInfo, logger *slog.Lo }, }, Template: v1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - appKey: name{dep.App.Name}.DNSLabel(), - versionKey: name{dep.Id}.DNSLabel(), - replicaSetKey: name{replicaSet}.DNSLabel(), - }, - Annotations: map[string]string{ - "security.cloud.google.com/use-workload-certificates": "", - }, - }, + ObjectMeta: objectMetaTmpl, Spec: v1.PodSpec{ PriorityClassName: applicationPriorityClassName, Containers: []v1.Container{container}, diff --git a/internal/gke/nanny.go b/internal/gke/nanny.go index eb4a8df..a140c6c 100644 --- a/internal/gke/nanny.go +++ b/internal/gke/nanny.go @@ -145,7 +145,7 @@ func Controller(ctx context.Context, config CloudConfig) (string, *http.Client, } // RunController creates and runs a controller. -func RunController(ctx context.Context, port int) error { +func RunController(ctx context.Context, port int, mtlsEnabled bool) error { id := uuid.New().String() cluster, err := inClusterInfo(ctx) if err != nil { @@ -172,14 +172,19 @@ func RunController(ctx context.Context, port int) error { if err != nil { return err } - caCert, getSelfCert, err := getPodCerts() - if err != nil { - return err + var tlsConfig *tls.Config + if mtlsEnabled { + caCert, getSelfCert, err := getPodCerts() + if err != nil { + return err + } + tlsConfig = mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "distributor") + + // Create unique http client s.t., all the http requests from the controller to + // the distributor use the same underlying http client. } + httpClient := makeHttpClient(tlsConfig) - // Create unique http client s.t., all the http requests from the controller to - // the distributor use the same underlying http client. - httpClient := makeHttpClient(mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "distributor")) mux := http.NewServeMux() if _, err := controller.Start(ctx, mux, @@ -214,7 +219,7 @@ func RunController(ctx context.Context, port int) error { } // RunDistributor creates and runs a distributor. -func RunDistributor(ctx context.Context, port int) error { +func RunDistributor(ctx context.Context, port int, mtlsEnabled bool) error { id := uuid.New().String() cluster, err := inClusterInfo(ctx) if err != nil { @@ -242,9 +247,21 @@ func RunDistributor(ctx context.Context, port int) error { if err != nil { return err } - caCert, getSelfCert, err := getPodCerts() - if err != nil { - return err + + var caCert *x509.Certificate + var getSelfCert func() ([]byte, []byte, error) + + var tlsConfig *tls.Config + var managerAddr string + if mtlsEnabled { + caCert, getSelfCert, err = getPodCerts() + if err != nil { + return err + } + tlsConfig = mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "manager") + managerAddr = fmt.Sprintf("https://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region) + } else { + managerAddr = fmt.Sprintf("http://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region) } // Track http clients to the babysitters, keyed by replica set and the babysitter address. @@ -255,14 +272,13 @@ func RunDistributor(ctx context.Context, port int) error { bsHttpClients := map[clientKey]endpoints.Babysitter{} mux := http.NewServeMux() - managerAddr := fmt.Sprintf("https://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region) if _, err := distributor.Start(ctx, mux, s, logger, &manager.HttpClient{ Addr: managerAddr, - Client: makeHttpClient(mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "manager")), + Client: makeHttpClient(tlsConfig), }, cluster.Region, func(cfg *config.GKEConfig, replicaSet, addr string) (endpoints.Babysitter, error) { @@ -279,7 +295,11 @@ func RunDistributor(ctx context.Context, port int) error { defer mu.Unlock() httpClient, ok := bsHttpClients[httpClientId] if !ok { - httpClient = babysitter.NewHttpClient(addr, mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, replicaSetIdentity)) + var tlsConfig *tls.Config + if mtlsEnabled { + tlsConfig = mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, replicaSetIdentity) + } + httpClient = babysitter.NewHttpClient(addr, tlsConfig) bsHttpClients[httpClientId] = httpClient } return httpClient, nil @@ -303,15 +323,18 @@ func RunDistributor(ctx context.Context, port int) error { } logger.Info("Distributor listening", "address", lis.Addr()) + server := &http.Server{ - Handler: mux, - TLSConfig: mtls.ServerTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "controller"), + Handler: mux, + } + if mtlsEnabled { + server.TLSConfig = mtls.ServerTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, "controller") } return runNannyServer(ctx, server, lis) } // RunManager creates and runs a manager. -func RunManager(ctx context.Context, port int) error { +func RunManager(ctx context.Context, port int, mtlsEnabled bool) error { id := uuid.New().String() cluster, err := inClusterInfo(ctx) if err != nil { @@ -339,9 +362,18 @@ func RunManager(ctx context.Context, port int) error { if err != nil { return err } - caCert, getSelfCert, err := getPodCerts() - if err != nil { - return err + + var caCert *x509.Certificate + var getSelfCert func() ([]byte, []byte, error) + var managerAddr string + if mtlsEnabled { + caCert, getSelfCert, err = getPodCerts() + if err != nil { + return err + } + managerAddr = fmt.Sprintf("https://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region) + } else { + managerAddr = fmt.Sprintf("http://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region) } // Track http clients to the babysitters, keyed by replica set and the babysitter address. @@ -355,7 +387,7 @@ func RunManager(ctx context.Context, port int) error { m := manager.NewManager(ctx, s, logger, - fmt.Sprintf("https://manager.%s.svc.%s-%s:80", namespaceName, applicationClusterName, cluster.Region), + managerAddr, 2*time.Second, /*updateRoutingInterval*/ // getHealthyPods func(ctx context.Context, cfg *config.GKEConfig, replicaSet string) ([]*nanny.Pod, error) { @@ -372,7 +404,11 @@ func RunManager(ctx context.Context, port int) error { defer mu.Unlock() httpClient, ok := bsHttpClients[httpClientId] if !ok { - httpClient = babysitter.NewHttpClient(addr, mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, replicaSetIdentity)) + var tlsConfig *tls.Config + if mtlsEnabled { + tlsConfig = mtls.ClientTLSConfig(cluster.CloudConfig.Project, caCert, getSelfCert, replicaSetIdentity) + } + httpClient = babysitter.NewHttpClient(addr, tlsConfig) bsHttpClients[httpClientId] = httpClient } return httpClient @@ -425,8 +461,12 @@ func RunManager(ctx context.Context, port int) error { ) logger.Info("Manager listening", "address", lis.Addr()) - verifyPeerCert := func(peer []*x509.Certificate) (string, error) { - return mtls.VerifyCertificateChain(cluster.CloudConfig.Project, caCert, peer) + + var verifyPeerCert func(peer []*x509.Certificate) (string, error) + if mtlsEnabled { + verifyPeerCert = func(peer []*x509.Certificate) (string, error) { + return mtls.VerifyCertificateChain(cluster.CloudConfig.Project, caCert, peer) + } } return manager.RunHTTPServer(m, logger, lis, getSelfCert, verifyPeerCert) } @@ -483,7 +523,13 @@ func getHealthyPods(ctx context.Context, cfg *config.GKEConfig, logger *slog.Log <-ch wait.Done() }() - babysitterAddr := fmt.Sprintf("https://%s:%d", ip, babysitterPort) + var babysitterAddr string + if cfg.Mtls { + babysitterAddr = fmt.Sprintf("https://%s:%d", ip, babysitterPort) + } else { + babysitterAddr = fmt.Sprintf("http://%s:%d", ip, babysitterPort) + } + babysitter := newBabysitter(babysitterAddr) reply, err := babysitter.GetLoad(ctx, &endpoints.GetLoadRequest{}) if err != nil {