Skip to content

Commit

Permalink
Ensure action cgroups are placed under the k8s container cgroup
Browse files Browse the repository at this point in the history
  • Loading branch information
bduffany committed Nov 19, 2024
1 parent 564b8ea commit 5d7253d
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 13 deletions.
1 change: 1 addition & 0 deletions enterprise/server/cmd/executor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ go_library(
"@org_golang_google_grpc//encoding/gzip",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//enterprise/server/remote_execution/cgroup",
"//enterprise/server/remote_execution/vbd",
"//server/util/networking",
],
Expand Down
9 changes: 8 additions & 1 deletion enterprise/server/cmd/executor/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,14 @@ func main() {
imageCacheAuth := container.NewImageCacheAuthenticator(container.ImageCacheAuthenticatorOpts{})
env.SetImageCacheAuthenticator(imageCacheAuth)

runnerPool, err := runner.NewPool(env, cacheRoot, &runner.PoolOptions{})
actionsCgroupParent, err := getActionsCgroupParent()
if err != nil {
log.Fatalf("Failed to get cgroup of executor process: %s", err)
}

runnerPool, err := runner.NewPool(env, cacheRoot, &runner.PoolOptions{
CgroupParent: actionsCgroupParent,
})
if err != nil {
log.Fatalf("Failed to initialize runner pool: %s", err)
}
Expand Down
31 changes: 31 additions & 0 deletions enterprise/server/cmd/executor/executor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,45 @@ package main

import (
"context"
"errors"
"fmt"
"os"
"strings"

"github.com/buildbuddy-io/buildbuddy/enterprise/server/remote_execution/cgroup"
"github.com/buildbuddy-io/buildbuddy/enterprise/server/remote_execution/vbd"
"github.com/buildbuddy-io/buildbuddy/server/util/flag"
"github.com/buildbuddy-io/buildbuddy/server/util/log"
"github.com/buildbuddy-io/buildbuddy/server/util/networking"
)

var (
inheritCgroup = flag.Bool("executor.inherit_cgroup", false, "Ensure that action cgroups inherit from the executor's starting cgroup. This flag will be removed in the future.", flag.Internal)
)

// getActionsCgroupParent returns the parent cgroup under which all action child
// cgroups should be created. The returned cgroup path is relative to the cgroup
// FS root path.
func getActionsCgroupParent() (string, error) {
if !*inheritCgroup {
return "", nil
}
relpath, err := cgroup.GetCurrent()
if err != nil {
if errors.Is(err, cgroup.ErrV1NotSupported) {
log.Warningf("Note: executor is running under cgroup v1, which has limited support. Some functionality may not work as expected.")
return "", nil
}
}
// Return the executor cgroup directly, for now.
// TODO(https://github.com/buildbuddy-io/buildbuddy-internal/issues/4110):
// restructure cgroups so that all actions are run in a single cgroup and
// the executor runs as a sibling of that.
s := strings.TrimPrefix(relpath, string(os.PathSeparator))
log.Infof("Using current process cgroup %q for action execution", s)
return s, nil
}

func setupNetworking(rootContext context.Context) {
// Clean up net namespaces in case vestiges remain from a previous executor.
if !networking.PreserveExistingNetNamespaces() {
Expand Down
4 changes: 4 additions & 0 deletions enterprise/server/cmd/executor/executor_notlinux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ package main

import "context"

func getActionsCgroupParent() (string, error) {
return "", nil
}

func setupNetworking(rootContext context.Context) {
}

Expand Down
38 changes: 38 additions & 0 deletions enterprise/server/remote_execution/cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,41 @@ const (
cidPlaceholder = "{{.ContainerID}}"
)

var (
// ErrV1NotSupported is returned when a function does not support cgroup V1.
ErrV1NotSupported = fmt.Errorf("cgroup v1 is not supported")
)

// GetCurrent returns the cgroup of which the current process is a member.
//
// The returned path is relative to the cgroupfs root, but begins with a slash.
//
// For example, if the current process is part of "/sys/fs/cgroup/foo", this
// returns "/foo".
func GetCurrent() (string, error) {
b, err := os.ReadFile("/proc/self/cgroup")
if err != nil {
return "", fmt.Errorf("read cgroup from procfs: %w", err)
}
s := strings.TrimRight(string(b), "\n")
if s == "" {
return "", nil
}
lines := strings.Split(s, "\n")
// In cgroup v1, a process can be a member of multiple cgroup hierarchies.
if len(lines) > 1 {
return "", ErrV1NotSupported
}
parts := strings.Split(lines[0], ":")
if len(parts) < 3 {
return "", fmt.Errorf("invalid /proc/self/cgroup value %q", err)
}
if controllers := parts[1]; controllers != "" {
return "", ErrV1NotSupported
}
return strings.Join(parts[2:], ":"), nil
}

// Setup configures the cgroup at the given path with the given settings.
// Any settings for cgroup controllers that aren't enabled are ignored.
// IO limits are applied to the given block device, if specified.
Expand Down Expand Up @@ -296,6 +331,9 @@ func (p *Paths) v1Stats(ctx context.Context, cid string) (*repb.UsageStats, erro
// started (i.e. `podman create` does not set up the cgroups; `podman start`
// does). We use this walking approach because the logic for figuring out the
// actual cgroup paths depends on the system setup and is pretty complicated.
//
// TODO: get rid of this lookup. Going forward, we're only supporting cgroup2
// and expect it to be mounted at /sys/fs/cgroup.
func (p *Paths) find(ctx context.Context, cid string) error {
// If already initialized, do nothing.
p.mu.RLock()
Expand Down
28 changes: 16 additions & 12 deletions enterprise/server/remote_execution/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,17 @@ type PoolOptions struct {
// ContainerProvider is an optional implementation overriding
// newContainerImpl.
ContainerProvider container.Provider

// CgroupParent is the parent cgroup under which all runner containers are
// placed.
CgroupParent string
}

type pool struct {
env environment.Env
podID string
buildRoot string
cgroupParent string
blockDevice *block_io.Device
cacheRoot string
overrideProvider container.Provider
Expand Down Expand Up @@ -576,11 +581,12 @@ func NewPool(env environment.Env, cacheRoot string, opts *PoolOptions) (*pool, e
}

p := &pool{
env: env,
podID: podID,
buildRoot: *rootDirectory,
cacheRoot: cacheRoot,
runners: []*taskRunner{},
env: env,
podID: podID,
buildRoot: *rootDirectory,
cacheRoot: cacheRoot,
cgroupParent: opts.CgroupParent,
runners: []*taskRunner{},
}
if opts.ContainerProvider != nil {
p.overrideProvider = opts.ContainerProvider
Expand Down Expand Up @@ -1054,13 +1060,11 @@ func (p *pool) newRunner(ctx context.Context, key *rnpb.RunnerKey, props *platfo

func (p *pool) newContainer(ctx context.Context, props *platform.Properties, task *repb.ScheduledTask, workingDir string) (*container.TracedCommandContainer, error) {
args := &container.Init{
Props: props,
Task: task,
WorkDir: workingDir,
BlockDevice: p.blockDevice,
// TODO(http://go/b/4140): Don't nest container cgroups directly under
// the root cgroup.
CgroupParent: "",
Props: props,
Task: task,
WorkDir: workingDir,
BlockDevice: p.blockDevice,
CgroupParent: p.cgroupParent,
}

// Overriding in tests.
Expand Down

0 comments on commit 5d7253d

Please sign in to comment.