From cc21fe37bd5aec23613f617b3ca505b4d9982bd0 Mon Sep 17 00:00:00 2001 From: Rafael David Tinoco Date: Wed, 25 Oct 2023 11:51:14 -0300 Subject: [PATCH] chore(containers): make feature flag a safe guard - make default behavior (--containers) to enrich containers Keep a safeguard measure (change cmdline flag to --no-containers): - do not try to register runtime services - do not try to register runtime sockets (with or without --crs) --- builder/entrypoint.sh | 3 - cmd/tracee-ebpf/main.go | 4 +- cmd/tracee/cmd/root.go | 6 +- docs/docs/data-sources/containers.md | 4 - docs/docs/deep-dive/caching-events.md | 9 +- docs/docs/integrating/container-engines.md | 48 ++---- pkg/cmd/cobra/cobra.go | 12 +- pkg/cmd/tracee.go | 10 +- pkg/cmd/urfave/urfave.go | 12 +- pkg/config/config.go | 2 +- pkg/containers/containers.go | 115 ++++++++------- pkg/ebpf/controlplane/cgroups.go | 2 +- pkg/ebpf/controlplane/controller.go | 6 +- pkg/ebpf/events_enrich.go | 13 +- pkg/ebpf/events_pipeline.go | 162 +++++++++++---------- pkg/ebpf/tracee.go | 12 +- pkg/events/usermode.go | 4 +- 17 files changed, 209 insertions(+), 215 deletions(-) diff --git a/builder/entrypoint.sh b/builder/entrypoint.sh index d6401966ae98..5a38e9003c1f 100755 --- a/builder/entrypoint.sh +++ b/builder/entrypoint.sh @@ -12,8 +12,6 @@ TRACEE_EXE=${TRACEE_EXE:="/tracee/tracee"} LIBBPFGO_OSRELEASE_FILE=${LIBBPFGO_OSRELEASE_FILE:="/etc/os-release-host"} -CONTAINERS_ENRICHMENT=${CONTAINERS_ENRICHMENT:="0"} - CAPABILITIES_BYPASS=${CAPABILITIES_BYPASS:="0"} CAPABILITIES_ADD=${CAPABILITIES_ADD:=""} CAPABILITIES_DROP=${CAPABILITIES_DROP:=""} @@ -35,7 +33,6 @@ run_tracee() { --output=option:parse-arguments \ --cache cache-type=mem \ --cache mem-cache-size=512 \ - --containers=$CONTAINERS_ENRICHMENT \ --capabilities bypass=$CAPABILITIES_BYPASS \ --capabilities add=$CAPABILITIES_ADD \ --capabilities drop=$CAPABILITIES_DROP diff --git a/cmd/tracee-ebpf/main.go b/cmd/tracee-ebpf/main.go index 85461faf550e..2c5dab357fba 100644 --- a/cmd/tracee-ebpf/main.go +++ b/cmd/tracee-ebpf/main.go @@ -144,8 +144,8 @@ func main() { Value: ":3366", }, &cli.BoolFlag{ - Name: "containers", - Usage: "enable container info enrichment to events. this feature is experimental and may cause unexpected behavior in the pipeline", + Name: "no-containers", + Usage: "disable container info enrichment to events. safeguard option.", }, &cli.StringSliceFlag{ Name: "log", diff --git a/cmd/tracee/cmd/root.go b/cmd/tracee/cmd/root.go index 745983a62eb7..820b678d49fc 100644 --- a/cmd/tracee/cmd/root.go +++ b/cmd/tracee/cmd/root.go @@ -148,11 +148,11 @@ func initCmd() error { // Container flags rootCmd.Flags().Bool( - "containers", + "no-containers", false, - "\t\t\t\t\tEnable container info enrichment to events. This feature is experimental and may cause unexpected behavior in the pipeline", + "\t\t\t\t\tDisable container info enrichment to events. Safeguard option.", ) - err = viper.BindPFlag("containers", rootCmd.Flags().Lookup("containers")) + err = viper.BindPFlag("no-containers", rootCmd.Flags().Lookup("no-containers")) if err != nil { return errfmt.WrapError(err) } diff --git a/docs/docs/data-sources/containers.md b/docs/docs/data-sources/containers.md index 638c7d25bcaf..c88a71edcf6c 100644 --- a/docs/docs/data-sources/containers.md +++ b/docs/docs/data-sources/containers.md @@ -4,10 +4,6 @@ The [container enrichment](../integrating/container-engines.md) feature gives Tr The [data source](./overview.md) feature makes the information gathered from active containers accessible to signatures. When an event is captured and triggers a signature, that signature can retrieve information about the container using its container ID, which is bundled with the event being analyzed by the signature. -## Enabling the Feature - -The data source does not need to be enabled, but requires that the `container enrichment` feature is. To enable the enrichment feature, execute trace with `--containers`. For more information you can read [container enrichment](../integrating/container-engines.md) page. - ## Internal Data Organization From the [data-sources documentation](../data-sources/overview.md), you'll see that searches use keys. It's a bit like looking up information with a specific tag (or a key=value storage). diff --git a/docs/docs/deep-dive/caching-events.md b/docs/docs/deep-dive/caching-events.md index df88a81000dd..db8e36ce89c2 100644 --- a/docs/docs/deep-dive/caching-events.md +++ b/docs/docs/deep-dive/caching-events.md @@ -35,14 +35,13 @@ The effects of this are the following: ## Use caching -Example using **1GB cache**, container **enrichment** in the pipeline, argument -**parsing** so arguments are formatted in a human consumable way: +Example using **1GB cache**: ```console sudo ./dist/tracee \ --cache cache-type=mem \ --cache mem-cache-size=1024 \ - --containers -o format:json \ + -o format:json \ -o option:parse-arguments \ -trace container \ --crs docker:/var/run/docker.sock @@ -54,5 +53,5 @@ sudo ./dist/tracee \ | jq -c '. | {cgroupid, processname, containername}' ``` You may cause latencies in **tracee** pipeline because the event json - processing from `jq` might not be as fast as how **tracee** is capable - of writing events to it. + processing from `jq` might not be as fast as how **tracee** is capable of + writing events to it. diff --git a/docs/docs/integrating/container-engines.md b/docs/docs/integrating/container-engines.md index 03873a7cf080..6f7289b09c05 100644 --- a/docs/docs/integrating/container-engines.md +++ b/docs/docs/integrating/container-engines.md @@ -4,16 +4,14 @@ Tracee is capable of **extracting information about running containers**. It does that by tracking container namespaces creation kernel events and enriching those events by communicating with the relevant container's runtime and SDK. -!!! Experimental Warning - This feature is experimental and should be explicitly enabled in - **tracee**, by using the `--container` flag OR, if running tracee - container image, setting the `CONTAINERS_ENRICHMENT` environment flag (see - example bellow). - 1. Running **tracee** manually If running tracee directly (not in a container), it will automatically search for known supported runtimes in their socket's default locations. + You may track if tracee was able to find the container runtime socket by + running tracee with `--log debug` option. There will be a line to each known + runtime engine socket location and a message saying if tracee wass able to + find it or not. 2. Running **tracee** using a docker container @@ -22,32 +20,28 @@ those events by communicating with the relevant container's runtime and SDK. Using containerd as our runtime for example, this can be done by running tracee like: - + ```console docker run \ --name tracee --rm -it \ --pid=host --cgroupns=host --privileged \ -v /etc/os-release:/etc/os-release-host:ro \ -v /var/run/containerd:/var/run/containerd \ - -e CONTAINERS_ENRICHMENT=1 \ aquasec/tracee:{{ git.tag }} ``` Most container runtimes have their sockets installed by default in `/var/run`. If your system includes multiple container runtimes, tracee can - track them all, however one should mount either all their runtime sockets - or `/var/run` in it's entirety to do so. + track them all, however one should mount either all their runtime sockets or + `/var/run` in it's entirety to do so. ## Supported Container Runtime Engines Currently, tracee will look in the following paths for auto-discovering the listed runtimes: 1. Docker: `/var/run/docker.sock` - 2. Containerd: `/var/run/containerd/containerd.sock` - 3. CRI-O: `/var/run/crio/crio.sock` - 4. Podman: `/var/run/podman/podman.sock` !!! Tip @@ -56,30 +50,6 @@ Currently, tracee will look in the following paths for auto-discovering the list nesting and so sockets must be appropriately mounted and set up for tracee to enrich all containers correctly. -## Viewing enrichment output - -As a user, when container enrichment is enabled the event output will include enriched fields in these cases: - -1. Running **tracee** with a json format will include all container enriched fields - - ```console - docker run \ - --name tracee --rm -it \ - --pid=host --cgroupns=host --privileged \ - -v /etc/os-release:/etc/os-release-host:ro \ - -v /var/run/docker.sock:/var/run/docker.sock \ - aquasec/tracee:{{ git.tag }} \ - --output json --containers - ``` - -2. Running in container filtering mode and with enrichment enabled will add the image name to the table printer +## Enrichment output - ```console - docker run \ - --name tracee --rm -it \ - --pid=host --cgroupns=host --privileged \ - -v /etc/os-release:/etc/os-release-host:ro \ - -v /var/run/containerd:/var/run/containerd \ - aquasec/tracee:{{ git.tag }} \ - --scope container --containers - ``` +Example of the output. diff --git a/pkg/cmd/cobra/cobra.go b/pkg/cmd/cobra/cobra.go index 35113d9431c3..fcef2f59df49 100644 --- a/pkg/cmd/cobra/cobra.go +++ b/pkg/cmd/cobra/cobra.go @@ -63,7 +63,7 @@ func GetTraceeRunner(c *cobra.Command, version string) (cmd.Runner, error) { cfg := config.Config{ PerfBufferSize: viper.GetInt("perf-buffer-size"), BlobPerfBufferSize: viper.GetInt("blob-perf-buffer-size"), - ContainersEnrich: viper.GetBool("containers"), + NoContainersEnrich: viper.GetBool("no-containers"), } // OS release information @@ -84,11 +84,13 @@ func GetTraceeRunner(c *cobra.Command, version string) (cmd.Runner, error) { // Container Runtime command line flags - sockets, err := flags.PrepareContainers(viper.GetStringSlice("crs")) - if err != nil { - return runner, err + if !cfg.NoContainersEnrich { + sockets, err := flags.PrepareContainers(viper.GetStringSlice("crs")) + if err != nil { + return runner, err + } + cfg.Sockets = sockets } - cfg.Sockets = sockets // Cache command line flags diff --git a/pkg/cmd/tracee.go b/pkg/cmd/tracee.go index 30fc327ef43f..8178cbdc34c3 100644 --- a/pkg/cmd/tracee.go +++ b/pkg/cmd/tracee.go @@ -111,11 +111,11 @@ func GetContainerMode(cfg config.Config) config.ContainerMode { for p := range cfg.Policies.Map() { if p.ContainerFilterEnabled() { - // enable printer container print mode if container filters are set - containerMode = config.ContainerModeEnabled - if cfg.ContainersEnrich { - // further enable container enrich print mode if container enrichment is enabled - containerMode = config.ContainerModeEnriched + // Container Enrichment is enabled by default ... + containerMode = config.ContainerModeEnriched + if cfg.NoContainersEnrich { + // ... but might be disabled as a safeguard measure. + containerMode = config.ContainerModeEnabled } break diff --git a/pkg/cmd/urfave/urfave.go b/pkg/cmd/urfave/urfave.go index e4702229a8c0..b85ae92c3528 100644 --- a/pkg/cmd/urfave/urfave.go +++ b/pkg/cmd/urfave/urfave.go @@ -22,7 +22,7 @@ func GetTraceeRunner(c *cli.Context, version string) (cmd.Runner, error) { cfg := config.Config{ PerfBufferSize: c.Int("perf-buffer-size"), BlobPerfBufferSize: c.Int("blob-perf-buffer-size"), - ContainersEnrich: c.Bool("containers"), + NoContainersEnrich: c.Bool("no-containers"), } // Output command line flags @@ -63,11 +63,13 @@ func GetTraceeRunner(c *cli.Context, version string) (cmd.Runner, error) { // Container Runtime command line flags - sockets, err := flags.PrepareContainers(c.StringSlice("crs")) - if err != nil { - return runner, err + if !cfg.NoContainersEnrich { + sockets, err := flags.PrepareContainers(c.StringSlice("crs")) + if err != nil { + return runner, err + } + cfg.Sockets = sockets } - cfg.Sockets = sockets // Cache command line flags diff --git a/pkg/config/config.go b/pkg/config/config.go index 326dcbd59d8d..ab65456303f2 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -30,7 +30,7 @@ type Config struct { KernelConfig *helpers.KernelConfig OSInfo *helpers.OSInfo Sockets runtime.Sockets - ContainersEnrich bool + NoContainersEnrich bool EngineConfig engine.Config MetricsEnabled bool } diff --git a/pkg/containers/containers.go b/pkg/containers/containers.go index 0c268430d2e7..ac6560cf2a14 100644 --- a/pkg/containers/containers.go +++ b/pkg/containers/containers.go @@ -44,9 +44,10 @@ type CgroupInfo struct { expiresAt time.Time } -// New initializes a Containers object and returns a pointer to it. -// User should further call "Populate" and iterate with Containers data. +// New initializes a Containers object and returns a pointer to it. User should further +// call "Populate" and iterate with Containers data. func New( + noContainersEnrich bool, cgroups *cgroup.Cgroups, sockets cruntime.Sockets, mapName string, @@ -61,34 +62,36 @@ func New( bpfMapName: mapName, } - runtimeService := RuntimeInfoService(sockets) + // Attempt to register enrichers for all supported runtimes. - // attempt to register for all supported runtimes - err := runtimeService.Register(cruntime.Containerd, cruntime.ContainerdEnricher) - if err != nil { - logger.Debugw("Enricher", "error", err) - } - err = runtimeService.Register(cruntime.Crio, cruntime.CrioEnricher) - if err != nil { - logger.Debugw("Enricher", "error", err) - } - err = runtimeService.Register(cruntime.Docker, cruntime.DockerEnricher) - if err != nil { - logger.Debugw("Enricher", "error", err) - } - // podman and docker use compatible http apis, therefore the docker enricher - // works for podman. - err = runtimeService.Register(cruntime.Podman, cruntime.DockerEnricher) - if err != nil { - logger.Debugw("Enricher", "error", err) - } + if !noContainersEnrich { + runtimeService := RuntimeInfoService(sockets) - containers.enricher = runtimeService + err := runtimeService.Register(cruntime.Containerd, cruntime.ContainerdEnricher) + if err != nil { + logger.Debugw("Enricher", "error", err) + } + err = runtimeService.Register(cruntime.Crio, cruntime.CrioEnricher) + if err != nil { + logger.Debugw("Enricher", "error", err) + } + err = runtimeService.Register(cruntime.Docker, cruntime.DockerEnricher) + if err != nil { + logger.Debugw("Enricher", "error", err) + } + // Docker enricher also works for podman (compatible HTTP apis) + err = runtimeService.Register(cruntime.Podman, cruntime.DockerEnricher) + if err != nil { + logger.Debugw("Enricher", "error", err) + } + + containers.enricher = runtimeService + } return containers, nil } -// Close executes cleanup logic for Containers object +// Close executes cleanup logic for Containers object. func (c *Containers) Close() error { return nil } @@ -109,7 +112,7 @@ func (c *Containers) Populate() error { } // GetContainerIdFromTaskDir gets a containerID from a given task or process -// directory path +// directory path. func GetContainerIdFromTaskDir(taskPath string) (string, error) { containerId := "" taskPath = fmt.Sprintf("%s/cgroup", taskPath) @@ -159,14 +162,15 @@ func (c *Containers) populate() error { return filepath.WalkDir(c.cgroups.GetDefaultCgroup().GetMountPoint(), fn) } -// cgroupUpdate checks if given path belongs to a known container runtime, -// saving container information in Containers CgroupInfo map. -// NOTE: ALL given cgroup dir paths are stored in CgroupInfo map. -// NOTE: not thread-safe, lock should be placed in the external calling function, depending -// on the transaction length. -func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time, dead bool) (CgroupInfo, error) { - // Cgroup paths should be stored and evaluated relative to the mountpoint, - // trim it from the path. +// cgroupUpdate checks if given path belongs to a known container runtime, saving +// container information in Containers CgroupInfo map. +func (c *Containers) cgroupUpdate( + cgroupId uint64, path string, ctime time.Time, dead bool, +) (CgroupInfo, error) { + // NOTE: ALL given cgroup dir paths are stored in CgroupInfo map. + // NOTE: not thread-safe, lock handled by external calling function + + // Cgroup paths should be stored and evaluated relative to the mountpoint. path = strings.TrimPrefix(path, c.cgroups.GetDefaultCgroup().GetMountPoint()) containerId, containerRuntime, isRoot := getContainerIdFromCgroup(path) container := cruntime.ContainerMetadata{ @@ -187,10 +191,10 @@ func (c *Containers) cgroupUpdate(cgroupId uint64, path string, ctime time.Time, return info, nil } -// EnrichCgroupInfo checks for a given cgroupId if it is relevant to some running container -// it then calls the runtime info service to gather additional data from the container's runtime -// it returns the retrieved metadata and a relevant error -// this function shouldn't be called twice for the same cgroupId unless attempting a retry +// EnrichCgroupInfo checks for a given cgroupId if it is relevant to some running +// container. It then calls the runtime info service to gather additional data from the +// container's runtime. It returns the retrieved metadata and a relevant error. It should +// not be called twice for the same cgroupId unless attempting a retry. func (c *Containers) EnrichCgroupInfo(cgroupId uint64) (cruntime.ContainerMetadata, error) { c.cgroupsMutex.Lock() defer c.cgroupsMutex.Unlock() @@ -245,7 +249,7 @@ var ( gardenContainerIdFromCgroupRegex = regexp.MustCompile(`^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){4}$`) ) -// getContainerIdFromCgroup extracts container id and its runtime from path. It returns +// getContainerIdFromCgroup extracts container id and its runtime from path. It returns // the container id, the runtime string and a bool telling if the container is the root of // its cgroupfs hierarchy. func getContainerIdFromCgroup(cgroupPath string) (string, cruntime.RuntimeId, bool) { @@ -288,9 +292,10 @@ func getContainerIdFromCgroup(cgroupPath string) (string, cruntime.RuntimeId, bo // non-systemd docker with format in GitHub Actions: .../actions_job/01adbf...f26db7f/ runtime = cruntime.Docker } - // return first match: closest to root dir path component - // (to have container id of the outer container) - // container root determined by being matched on the last path part + + // Return the first match, closest to the root dir path component, so that the + // container id of the outer container is returned. The container root is + // determined by being matched on the last path part. return id, runtime, i == len(cgroupParts)-1 } @@ -306,9 +311,9 @@ func getContainerIdFromCgroup(cgroupPath string) (string, cruntime.RuntimeId, bo return "", cruntime.Unknown, false } -// CgroupRemove removes cgroupInfo of deleted cgroup dir from Containers struct. -// NOTE: Expiration logic of 30 seconds to avoid race conditions (if cgroup dir -// event arrives too fast and its cgroupInfo data is still needed). +// CgroupRemove removes cgroupInfo of deleted cgroup dir from Containers struct. There is +// an expiration logic of 30 seconds to avoid race conditions (if cgroup dir event arrives +// too fast and its cgroupInfo data is still needed). func (c *Containers) CgroupRemove(cgroupId uint64, hierarchyID uint32) { const expiryTime = 30 * time.Second // cgroupv1: no need to check other controllers than the default @@ -369,7 +374,7 @@ func (c *Containers) CgroupMkdir(cgroupId uint64, subPath string, hierarchyID ui return c.cgroupUpdate(cgroupId, subPath, curTime, true) } -// FindContainerCgroupID32LSB returns the 32 LSB of the Cgroup ID for a given container ID +// FindContainerCgroupID32LSB returns the 32 LSB of the Cgroup ID for a given container ID. func (c *Containers) FindContainerCgroupID32LSB(containerID string) []uint32 { var cgroupIDs []uint32 c.cgroupsMutex.RLock() @@ -382,17 +387,16 @@ func (c *Containers) FindContainerCgroupID32LSB(containerID string) []uint32 { return cgroupIDs } -// GetCgroupInfo returns the Containers struct cgroupInfo data of a given cgroupId. +// GetCgroupInfo returns the contents of the Containers struct cgroupInfo data of a given cgroupId. func (c *Containers) GetCgroupInfo(cgroupId uint64) CgroupInfo { if !c.CgroupExists(cgroupId) { - // There should be a cgroupInfo for the given cgroupId but there isn't. - // Tracee might be processing an event for an already created container - // before the CgroupMkdirEventID logic was executed, for example. - - // Get the path for given cgroupId from cgroupfs and update its - // cgroupInfo in the Containers struct. An empty subPath will make - // getCgroupPath() to walk all cgroupfs directories until it finds the - // directory of given cgroupId. + // There should be a cgroupInfo for the given cgroupId but there isn't. Tracee + // might be processing an event for an already created container before the + // CgroupMkdirEventID logic was executed, for example. + + // Get the path for given cgroupId from cgroupfs and update its cgroupInfo in the + // Containers struct. An empty subPath will make getCgroupPath() to WALK ALL THE + // cgroupfs directories, until it finds the directory of given cgroupId. var cgroupInfo CgroupInfo c.cgroupsMutex.Lock() @@ -453,6 +457,8 @@ const ( containerStarted ) +// PopulateBpfMap populates the map with all the existing containers so eBPF programs can +// orchestrate new ones with the correct state. func (c *Containers) PopulateBpfMap(bpfModule *libbpfgo.Module) error { containersMap, err := bpfModule.GetMap(c.bpfMapName) if err != nil { @@ -471,6 +477,7 @@ func (c *Containers) PopulateBpfMap(bpfModule *libbpfgo.Module) error { return errfmt.WrapError(err) } +// RemoveFromBPFMap removes a container from the map so eBPF programs can stop tracking it. func (c *Containers) RemoveFromBPFMap(bpfModule *libbpfgo.Module, cgroupId uint64, hierarchyID uint32) error { // cgroupv1: no need to check other controllers than the default switch c.cgroups.GetDefaultCgroup().(type) { diff --git a/pkg/ebpf/controlplane/cgroups.go b/pkg/ebpf/controlplane/cgroups.go index b6ca43c3d4d4..83c29be8598b 100644 --- a/pkg/ebpf/controlplane/cgroups.go +++ b/pkg/ebpf/controlplane/cgroups.go @@ -47,7 +47,7 @@ func (ctrl *Controller) processCgroupMkdir(args []trace.Argument) error { return errfmt.WrapError(err) } - if ctrl.enrichEnabled { + if !ctrl.enrichDisabled { // If cgroupId belongs to a container, enrich now (in a goroutine) go func() { _, err := ctrl.cgroupManager.EnrichCgroupInfo(cgroupId) diff --git a/pkg/ebpf/controlplane/controller.go b/pkg/ebpf/controlplane/controller.go index 677960750ba3..2112d52af753 100644 --- a/pkg/ebpf/controlplane/controller.go +++ b/pkg/ebpf/controlplane/controller.go @@ -27,14 +27,14 @@ type Controller struct { signalBuffer *libbpfgo.PerfBuffer cgroupManager *containers.Containers processTree *proctree.ProcessTree - enrichEnabled bool + enrichDisabled bool } // NewController creates a new controller. func NewController( bpfModule *libbpfgo.Module, cgroupManager *containers.Containers, - enrichEnabled bool, + enrichDisabled bool, procTree *proctree.ProcessTree, ) (*Controller, error) { var err error @@ -45,7 +45,7 @@ func NewController( bpfModule: bpfModule, cgroupManager: cgroupManager, processTree: procTree, - enrichEnabled: enrichEnabled, + enrichDisabled: enrichDisabled, } p.signalBuffer, err = bpfModule.InitPerfBuf("signals", p.signalChan, p.lostSignalChan, 1024) diff --git a/pkg/ebpf/events_enrich.go b/pkg/ebpf/events_enrich.go index 30cbcfb85dbb..83d084472668 100644 --- a/pkg/ebpf/events_enrich.go +++ b/pkg/ebpf/events_enrich.go @@ -44,7 +44,15 @@ import ( // is enriched and its enqueued events are de-queued. // -func (t *Tracee) enrichContainerEvents(ctx gocontext.Context, in <-chan *trace.Event) (chan *trace.Event, chan error) { +// enrichContainerEvents is a pipeline stage that enriches container events with metadata. +func (t *Tracee) enrichContainerEvents(ctx gocontext.Context, in <-chan *trace.Event, +) ( + chan *trace.Event, chan error, +) { + // Events may be enriched in the initial decode state, if the enrichment data has been + // stored in the Containers structure. In that case, this pipeline stage will be + // quickly skipped. The enrichment happens in a different stage to ensure that the + // pipeline is not blocked by the container runtime calls. const ( contQueueSize = 10000 // max num of events queued per container queueReadySize = 100000 // max num of events queued in total @@ -79,7 +87,6 @@ func (t *Tracee) enrichContainerEvents(ctx gocontext.Context, in <-chan *trace.E if event == nil { continue // might happen during initialization (ctrl+c seg faults) } - eventID := events.ID(event.EventID) // send out irrelevant events (non container or already enriched), don't skip the cgroup lifecycle events if (event.Container.ID == "" || event.Container.Name != "") && eventID != events.CgroupMkdir && eventID != events.CgroupRmdir { @@ -134,11 +141,9 @@ func (t *Tracee) enrichContainerEvents(ctx gocontext.Context, in <-chan *trace.E // de-queue event if queue is enriched if _, ok := queues[cgroupId]; ok { event := <-queues[cgroupId] - if event == nil { continue // might happen during initialization (ctrl+c seg faults) } - eventID := events.ID(event.EventID) // check if not enriched, and only enrich regular non cgroup related events if event.Container.Name == "" && eventID != events.CgroupMkdir && eventID != events.CgroupRmdir { diff --git a/pkg/ebpf/events_pipeline.go b/pkg/ebpf/events_pipeline.go index 360adcceac79..7eb0c2c2c673 100644 --- a/pkg/ebpf/events_pipeline.go +++ b/pkg/ebpf/events_pipeline.go @@ -16,92 +16,98 @@ import ( "github.com/aquasecurity/tracee/types/trace" ) -// Max depth of each stack trace to track -// Matches 'MAX_STACK_DEPTH' in eBPF code +// Max depth of each stack trace to track (MAX_STACK_DETPH in eBPF code) const maxStackDepth int = 20 // Matches 'NO_SYSCALL' in eBPF code const noSyscall int32 = -1 -// handleEvents is a high-level function that starts all operations related to events processing +// handleEvents is the main pipeline of tracee. It receives events from the perf buffer +// and passes them through a series of stages, each stage is a goroutine that performs a +// specific task on the event. The pipeline is started in a separate goroutine. func (t *Tracee) handleEvents(ctx context.Context) { logger.Debugw("Starting handleEvents goroutine") defer logger.Debugw("Stopped handleEvents goroutine") var errcList []<-chan error - // Source pipeline stage. + // Decode stage: events are read from the perf buffer and decoded into trace.Event type. + eventsChan, errc := t.decodeEvents(ctx, t.eventsChannel) errcList = append(errcList, errc) + // Cache stage: events go through a caching function. + if t.config.Cache != nil { eventsChan, errc = t.queueEvents(ctx, eventsChan) errcList = append(errcList, errc) } + // Sort stage: events go through a sorting function. + if t.config.Output.EventsSorting { eventsChan, errc = t.eventsSorter.StartPipeline(ctx, eventsChan) errcList = append(errcList, errc) } - // Process events stage - // in this stage we perform event specific logic + // Process events stage: events go through a processing functions. + eventsChan, errc = t.processEvents(ctx, eventsChan) errcList = append(errcList, errc) - // Enrichment stage - // In this stage container events are enriched with additional runtime data - // Events may be enriched in the initial decode state if the enrichment data has been stored in the Containers structure - // In that case, this pipeline stage will be quickly skipped - // This is done in a separate stage to ensure enrichment is non blocking (since container runtime calls may timeout and block the pipeline otherwise) - if t.config.ContainersEnrich { + // Enrichment stage: container events are enriched with additional runtime data. + + if !t.config.NoContainersEnrich { // TODO: remove safe-guard soon. eventsChan, errc = t.enrichContainerEvents(ctx, eventsChan) errcList = append(errcList, errc) } // Derive events stage: events go through a derivation function. + eventsChan, errc = t.deriveEvents(ctx, eventsChan) errcList = append(errcList, errc) - // Engine events stage: events go through a signatures match. + // Engine events stage: events go through the signatures engine for detection. + if t.config.EngineConfig.Enabled { eventsChan, errc = t.engineEvents(ctx, eventsChan) errcList = append(errcList, errc) } // Sink pipeline stage: events go through printers. + errc = t.sinkEvents(ctx, eventsChan) errcList = append(errcList, errc) // Pipeline started. Waiting for pipeline to complete + if err := t.WaitForPipeline(errcList...); err != nil { logger.Errorw("Pipeline", "error", err) } } -// Under some circumstances, tracee-rules might be slower to consume events -// than tracee-ebpf is capable of generating them. This requires -// tracee-ebpf to deal with this possible lag, but, at the same, -// perf-buffer consumption can't be left behind (or important events coming -// from the kernel might be loss, causing detection misses). +// Under some circumstances, tracee-rules might be slower to consume events than +// tracee-ebpf is capable of generating them. This requires tracee-ebpf to deal with this +// possible lag, but, at the same, perf-buffer consumption can't be left behind (or +// important events coming from the kernel might be loss, causing detection misses). // // There are 3 variables connected to this issue: // -// 1) perf buffer could be increased to hold very big amount of memory -// pages: The problem with this approach is that the requested space, -// to perf-buffer, through libbpf, has to be contiguous and it is almost -// impossible to get very big contiguous allocations through mmap after -// a node is running for some time. +// 1) perf buffer could be increased to hold very big amount of memory pages: The problem +// with this approach is that the requested space, to perf-buffer, through libbpf, has to +// be contiguous and it is almost impossible to get very big contiguous allocations +// through mmap after a node is running for some time. // -// 2) raising the events channel buffer to hold a very big amount of -// events: The problem with this approach is that the overhead of -// dealing with that amount of buffers, in a golang channel, causes -// event losses as well. It means this is not enough to relief the -// pressure from kernel events into perf-buffer. +// 2) raising the events channel buffer to hold a very big amount of events: The problem +// with this approach is that the overhead of dealing with that amount of buffers, in a +// golang channel, causes event losses as well. It means this is not enough to relief the +// pressure from kernel events into perf-buffer. // // 3) create an internal, to tracee-ebpf, buffer based on the node size. -// queueEvents implements an internal FIFO queue for caching events +// queueEvents is the cache pipeline stage. For each received event, it goes through a +// caching function that will enqueue the event into a queue. The queue is then de-queued +// by a different goroutine that will send the event down the pipeline. func (t *Tracee) queueEvents(ctx context.Context, in <-chan *trace.Event) (chan *trace.Event, chan error) { out := make(chan *trace.Event, 10000) errc := make(chan error, 1) @@ -143,7 +149,9 @@ func (t *Tracee) queueEvents(ctx context.Context, in <-chan *trace.Event) (chan return out, errc } -// decodeEvents read the events received from the BPF programs and parse it into trace.Event type +// decodeEvents is the event decoding pipeline stage. For each received event, it goes +// through a decoding function that will decode the event from its raw format into a +// trace.Event type. func (t *Tracee) decodeEvents(outerCtx context.Context, sourceChan chan []byte) (<-chan *trace.Event, <-chan error) { out := make(chan *trace.Event, 10000) errc := make(chan error, 1) @@ -272,8 +280,8 @@ func (t *Tracee) decodeEvents(outerCtx context.Context, sourceChan chan []byte) // matchPolicies does the userland filtering (policy matching) for events. It iterates through all // existing policies, that were set by the kernel in the event bitmap. Some of those policies might // not match the event after userland filters are applied. In those cases, the policy bit is cleared -// (so the event is "filtered" for that policy). -// This may be called in different stages of the pipeline (decode, derive, engine). +// (so the event is "filtered" for that policy). This may be called in different stages of the +// pipeline (decode, derive, engine). func (t *Tracee) matchPolicies(event *trace.Event) uint64 { eventID := events.ID(event.EventID) bitmap := event.MatchedPoliciesKernel @@ -292,10 +300,10 @@ func (t *Tracee) matchPolicies(event *trace.Event) uint64 { continue } - // The event might have this policy bit set, but the policy might not have this event ID. - // This happens whenever the event submitted by the kernel is going to derive an event that - // this policy is interested in. In this case, don't do anything and let the derivation - // stage handle this event. + // The event might have this policy bit set, but the policy might not have this + // event ID. This happens whenever the event submitted by the kernel is going to + // derive an event that this policy is interested in. In this case, don't do + // anything and let the derivation stage handle this event. _, ok := p.EventsToTrace[eventID] if !ok { continue @@ -375,15 +383,17 @@ func parseContextFlags(containerId string, flags uint32) trace.ContextFlags { ) var cflags trace.ContextFlags - // Handle the edge case where containerStarted flag remains true despite an empty containerId. - // See #3251 for more details. + // Handle the edge case where containerStarted flag remains true despite an empty + // containerId. See #3251 for more details. cflags.ContainerStarted = (containerId != "") && (flags&contStartFlag) != 0 cflags.IsCompat = (flags & IsCompatFlag) != 0 return cflags } -// Get the syscall name from its ID, taking into account architecture and 32bit/64bit modes +// parseSyscallID returns the syscall name from its ID, taking into account architecture +// and 32bit/64bit modes. It also returns an error if the syscall ID is not found in the +// events definition. func parseSyscallID(syscallID int, isCompat bool, compatTranslationMap map[events.ID]events.ID) (string, error) { id := events.ID(syscallID) if !isCompat { @@ -404,10 +414,11 @@ func parseSyscallID(syscallID int, isCompat bool, compatTranslationMap map[event return "", errfmt.Errorf("no syscall event with compat syscall id %d", syscallID) } -// processEvents is the event processing pipeline stage. For each received event, it goes through -// all event processors and check if there is any internal processing needed for that event type. -// It also clears policy bits for out-of-order container related events (after the processing -// logic). +// processEvents is the event processing pipeline stage. For each received event, it goes +// through all event processors and check if there is any internal processing needed for +// that event type. It also clears policy bits for out-of-order container related events +// (after the processing logic). This stage also starts some logic that will be used by +// the processing logic in subsequent events. func (t *Tracee) processEvents(ctx context.Context, in <-chan *trace.Event) ( <-chan *trace.Event, <-chan error, ) { @@ -439,13 +450,14 @@ func (t *Tracee) processEvents(ctx context.Context, in <-chan *trace.Event) ( // Get a bitmap with all policies containing container filters policiesWithContainerFilter := t.config.Policies.ContainerFilterEnabled() - // Filter out events that don't have a container ID from all the policies that have - // container filters. This will guarantee that any of those policies won't get matched - // by this event. This situation might happen if the events from a recently created - // container appear BEFORE the initial cgroup_mkdir of that container root directory. - // This could be solved by sorting the events by a monotonic timestamp, for example, - // but sorting might not always be enabled, so, in those cases, ignore the event IF - // the event is not a cgroup_mkdir or cgroup_rmdir. + // Filter out events that don't have a container ID from all the policies that + // have container filters. This will guarantee that any of those policies + // won't get matched by this event. This situation might happen if the events + // from a recently created container appear BEFORE the initial cgroup_mkdir of + // that container root directory. This could be solved by sorting the events + // by a monotonic timestamp, for example, but sorting might not always be + // enabled, so, in those cases, ignore the event IF the event is not a + // cgroup_mkdir or cgroup_rmdir. if policiesWithContainerFilter > 0 && event.Container.ID == "" { eventId := events.ID(event.EventID) @@ -479,9 +491,9 @@ func (t *Tracee) processEvents(ctx context.Context, in <-chan *trace.Event) ( return out, errc } -// deriveEvents is the event derivation pipeline stage. For each received event, it runs the event -// derivation logic, described in the derivation table, and send the derived events down the -// pipeline. +// deriveEVents is the event derivation pipeline stage. For each received event, it runs +// the event derivation logic, described in the derivation table, and send the derived +// events down the pipeline. func (t *Tracee) deriveEvents(ctx context.Context, in <-chan *trace.Event) ( <-chan *trace.Event, <-chan error, ) { @@ -499,9 +511,10 @@ func (t *Tracee) deriveEvents(ctx context.Context, in <-chan *trace.Event) ( continue // might happen during initialization (ctrl+c seg faults) } - // Get a copy of our event before sending it down the pipeline. This is needed - // because later modification of the event (in particular of the matched policies) - // can affect the derivation and later pipeline logic acting on the derived event. + // Get a copy of our event before sending it down the pipeline. This is + // needed because later modification of the event (in particular of the + // matched policies) can affect the derivation and later pipeline logic + // acting on the derived event. eventCopy := *event out <- event @@ -514,8 +527,8 @@ func (t *Tracee) deriveEvents(ctx context.Context, in <-chan *trace.Event) ( } for i := range derivatives { - // Skip events that dont work with filtering due to missing types being handled. - // https://github.com/aquasecurity/tracee/issues/2486 + // Skip events that dont work with filtering due to missing types + // being handled (https://github.com/aquasecurity/tracee/issues/2486) switch events.ID(derivatives[i].EventID) { case events.SymbolsLoaded: case events.SharedObjectLoaded: @@ -528,9 +541,10 @@ func (t *Tracee) deriveEvents(ctx context.Context, in <-chan *trace.Event) ( } } - // Passing "derivative" variable here will make the ptr address always be the - // same as the last item. This makes the printer to print 2 or 3 times the last - // event, instead of printing all derived events (when there are more than one). + // Passing "derivative" variable here will make the ptr address always + // be the same as the last item. This makes the printer to print 2 or + // 3 times the last event, instead of printing all derived events + // (when there are more than one). out <- &derivatives[i] } case <-ctx.Done(): @@ -542,6 +556,9 @@ func (t *Tracee) deriveEvents(ctx context.Context, in <-chan *trace.Event) ( return out, errc } +// sinkEvents is the event sink pipeline stage. For each received event, it goes through a +// series of printers that will print the event to the desired output. It also handles the +// event pool, returning the event to the pool after it is processed. func (t *Tracee) sinkEvents(ctx context.Context, in <-chan *trace.Event) <-chan error { errc := make(chan error, 1) @@ -594,6 +611,7 @@ func (t *Tracee) sinkEvents(ctx context.Context, in <-chan *trace.Event) <-chan return errc } +// getStackAddresses returns the stack addresses for a given StackID func (t *Tracee) getStackAddresses(stackID uint32) []uint64 { stackAddresses := make([]uint64, maxStackDepth) stackFrameSize := (strconv.IntSize / 8) @@ -634,17 +652,14 @@ func (t *Tracee) WaitForPipeline(errs ...<-chan error) error { return nil } -// MergeErrors merges multiple channels of errors. -// Based on https://blog.golang.org/pipelines. +// MergeErrors merges multiple channels of errors (https://blog.golang.org/pipelines) func MergeErrors(cs ...<-chan error) <-chan error { var wg sync.WaitGroup - // We must ensure that the output channel has the capacity to hold as many errors - // as there are error channels. This will ensure that it never blocks, even - // if WaitForPipeline returns early. + // We must ensure that the output channel has the capacity to hold as many errors as + // there are error channels. This will ensure that it never blocks, even if + // WaitForPipeline returns early. out := make(chan error, len(cs)) - // Start an output goroutine for each input channel in cs. output - // copies values from c to out until c is closed, then calls wg.Done. output := func(c <-chan error) { for n := range c { out <- n @@ -656,8 +671,6 @@ func MergeErrors(cs ...<-chan error) <-chan error { go output(c) } - // Start a goroutine to close out once all the output goroutines are - // done. This must start after the wg.Add call. go func() { wg.Wait() close(out) @@ -670,10 +683,11 @@ func (t *Tracee) handleError(err error) { logger.Errorw("Tracee encountered an error", "error", err) } -// parseArguments parses the arguments of the event. It must happen before the signatures are -// evaluated. For the new experience (cmd/tracee), it needs to happen in the the "events_engine" -// stage of the pipeline. For the old experience (cmd/tracee-ebpf && cmd/tracee-rules), it happens -// on the "sink" stage of the pipeline (close to the printers). +// parseArguments parses the arguments of the event. It must happen before the signatures +// are evaluated. For the new experience (cmd/tracee), it needs to happen in the the +// "events_engine" stage of the pipeline. For the old experience (cmd/tracee-ebpf && +// cmd/tracee-rules), it happens on the "sink" stage of the pipeline (close to the +// printers). func (t *Tracee) parseArguments(e *trace.Event) error { if t.config.Output.ParseArguments { err := events.ParseArgs(e) diff --git a/pkg/ebpf/tracee.go b/pkg/ebpf/tracee.go index 782f7cd51dc7..66b0da21d8e3 100644 --- a/pkg/ebpf/tracee.go +++ b/pkg/ebpf/tracee.go @@ -398,6 +398,7 @@ func (t *Tracee) Init(ctx gocontext.Context) error { // Initialize containers enrichment logic t.containers, err = containers.New( + t.config.NoContainersEnrich, t.cgroups, t.config.Sockets, "containers_map", @@ -405,11 +406,12 @@ func (t *Tracee) Init(ctx gocontext.Context) error { if err != nil { return errfmt.Errorf("error initializing containers: %v", err) } - if err := t.containers.Populate(); err != nil { - return errfmt.Errorf("error initializing containers: %v", err) + return errfmt.Errorf("error populating containers: %v", err) } + // Initialize containers related logic + t.contPathResolver = containers.InitContainerPathResolver(&t.pidsInMntns) t.contSymbolsLoader = sharedobjs.InitContainersSymbolsLoader(t.contPathResolver, 1024) @@ -783,7 +785,7 @@ func (t *Tracee) computeConfigValues() []byte { // options binary.LittleEndian.PutUint32(configVal[4:8], t.getOptionsConfig()) // cgroup_v1_hid - binary.LittleEndian.PutUint32(configVal[8:12], uint32(t.containers.GetDefaultCgroupHierarchyID())) + binary.LittleEndian.PutUint32(configVal[8:12], uint32(t.cgroups.GetDefaultCgroupHierarchyID())) // padding binary.LittleEndian.PutUint32(configVal[12:16], 0) @@ -1260,7 +1262,7 @@ func (t *Tracee) initBPF() error { t.controlPlane, err = controlplane.NewController( t.bpfModule, t.containers, - t.config.ContainersEnrich, + t.config.NoContainersEnrich, t.processTree, ) if err != nil { @@ -1590,7 +1592,7 @@ func (t *Tracee) invokeInitEvents(out chan *trace.Event) { emit = t.eventsState[events.ExistingContainer].Emit if emit > 0 { - for _, e := range events.ExistingContainersEvents(t.containers, t.config.ContainersEnrich) { + for _, e := range events.ExistingContainersEvents(t.containers, t.config.NoContainersEnrich) { setMatchedPolicies(&e, emit) out <- &e _ = t.stats.EventCount.Increment() diff --git a/pkg/events/usermode.go b/pkg/events/usermode.go index 228c61bef225..0be76a41160b 100644 --- a/pkg/events/usermode.go +++ b/pkg/events/usermode.go @@ -91,14 +91,14 @@ func fetchInitNamespaces() map[string]uint32 { } // ExistingContainersEvents returns a list of events for each existing container -func ExistingContainersEvents(cts *containers.Containers, enrich bool) []trace.Event { +func ExistingContainersEvents(cts *containers.Containers, enrichDisabled bool) []trace.Event { var events []trace.Event def := Core.GetDefinitionByID(ExistingContainer) for id, info := range cts.GetContainers() { container := runtime.ContainerMetadata{} - if enrich { + if !enrichDisabled { container, _ = cts.EnrichCgroupInfo(uint64(id)) } params := def.GetParams()