Skip to content

Commit

Permalink
chore: add BPFPerfEventWrites metric (DEBUG)
Browse files Browse the repository at this point in the history
Enabled only when the build with DEBUG=1.

BPFPerfEventWrites counts the number of events processed by the eBPF
programs and written to the perf event buffer.

It is incremented right before the event is written to the perf buffer,
making it possible to measure even if the event is lost.

This metric can be used to monitor the performance of individual eBPF
events and to detect potential bottlenecks.
  • Loading branch information
geyslan committed Oct 10, 2024
1 parent 4a4fd7f commit 1251004
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 34 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,10 @@ UNAME_M := $(shell uname -m)
UNAME_R := $(shell uname -r)

ifeq ($(DEBUG),1)
BPF_DEBUG_FLAG = -DDEBUG
GO_DEBUG_FLAG =
else
BPF_DEBUG_FLAG =
GO_DEBUG_FLAG = -w
endif

Expand Down Expand Up @@ -423,6 +425,7 @@ $(OUTPUT_DIR)/tracee.bpf.o: \
$(TRACEE_EBPF_OBJ_HEADERS)
#
$(CMD_CLANG) \
$(BPF_DEBUG_FLAG) \
-D__TARGET_ARCH_$(LINUX_ARCH) \
-D__BPF_TRACING__ \
-DCORE \
Expand Down Expand Up @@ -501,6 +504,7 @@ $(OUTPUT_DIR)/tracee: \
-ldflags="$(GO_DEBUG_FLAG) \
-extldflags \"$(CGO_EXT_LDFLAGS_EBPF)\" \
-X github.com/aquasecurity/tracee/pkg/version.version=$(VERSION) \
-X github.com/aquasecurity/tracee/pkg/version.debug=$(DEBUG) \
" \
-v -o $@ \
./cmd/tracee
Expand Down
16 changes: 16 additions & 0 deletions pkg/ebpf/c/common/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,15 @@ statfunc int save_args_to_submit_buf(event_data_t *event, args_t *args)
return arg_num;
}

#ifdef DEBUG
struct event_counts {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_EVENT_ID);
__type(key, u32); // eventid
__type(value, u64); // count
} event_counts SEC(".maps");
#endif

statfunc int events_perf_submit(program_data_t *p, long ret)
{
p->event->context.retval = ret;
Expand All @@ -484,6 +493,13 @@ statfunc int events_perf_submit(program_data_t *p, long ret)
:
: [size] "r"(size), [max_size] "i"(MAX_EVENT_SIZE));

#ifdef DEBUG
// increment event count before event submission attempt
u64 *event_count = bpf_map_lookup_elem(&event_counts, &p->event->context.eventid);
if (event_count)
__sync_fetch_and_add(event_count, 1);
#endif

return bpf_perf_event_output(p->ctx, &events, BPF_F_CURRENT_CPU, p->event, size);
}

Expand Down
77 changes: 77 additions & 0 deletions pkg/ebpf/perf_count.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package ebpf

import (
"context"
"encoding/binary"
"time"
"unsafe"

"github.com/aquasecurity/tracee/pkg/counter"
"github.com/aquasecurity/tracee/pkg/events"
"github.com/aquasecurity/tracee/pkg/logger"
)

// countPerfEventWrites counts the number of times each event is attempted
// to be written to the perf buffer.
func (t *Tracee) countPerfEventWrites(ctx context.Context) {
logger.Debugw("Starting countPerfEventWrites goroutine")
defer logger.Debugw("Stopped countPerfEventWrites goroutine")

evtsCountsBPFMap, err := t.bpfModule.GetMap("event_counts")
if err != nil {
logger.Errorw("Failed to get event_counts map", "error", err)
return
}

for _, id := range t.policyManager.EventsSelected() {
key := uint32(id)
value := uint64(0)
err := evtsCountsBPFMap.Update(unsafe.Pointer(&key), unsafe.Pointer(&value))
if err != nil {
logger.Errorw("Failed to update event_counts map", "error", err)
}
}

total := counter.NewCounter(0)
evtsCounts := make(map[uint32]uint64)
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
return
case <-ticker.C:
iter := evtsCountsBPFMap.Iterator()
for iter.Next() {
key := binary.LittleEndian.Uint32(iter.Key())
value, err := evtsCountsBPFMap.GetValue(unsafe.Pointer(&key))
if err != nil {
logger.Errorw("Failed to get value from event_counts map", "error", err)
continue
}

evtsCounts[key] = binary.LittleEndian.Uint64(value)
}

total.Set(0)
for k, v := range evtsCounts {
if v == 0 {
continue
}
err := total.Increment(v)
if err != nil {
logger.Errorw("Failed to increment total counter", "error", err)
}

logger.Debugw("Event sending attempts",
"event", events.Core.GetDefinitionByID(events.ID(k)).GetName(),
"count", v,
)
}

logger.Debugw("Event sending attempts", "total", total.Get())
t.stats.BPFPerfEventWrites.Set(total.Get())
}
}
}
7 changes: 7 additions & 0 deletions pkg/ebpf/tracee.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
"github.com/aquasecurity/tracee/pkg/utils/environment"
"github.com/aquasecurity/tracee/pkg/utils/proc"
"github.com/aquasecurity/tracee/pkg/utils/sharedobjs"
"github.com/aquasecurity/tracee/pkg/version"
"github.com/aquasecurity/tracee/types/trace"
)

Expand Down Expand Up @@ -1370,6 +1371,12 @@ func (t *Tracee) Run(ctx gocontext.Context) error {
t.controlPlane.Start()
go t.controlPlane.Run(ctx)

// Measure event perf buffer write attempts (debug build only)

if version.DebugBuild() {
go t.countPerfEventWrites(ctx)
}

// Main event loop (polling events perf buffer)

t.eventsPerfMap.Poll(pollTimeout)
Expand Down
59 changes: 35 additions & 24 deletions pkg/metrics/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@ import (

// When updating this struct, please make sure to update the relevant exporting functions
type Stats struct {
EventCount counter.Counter
EventsFiltered counter.Counter
NetCapCount counter.Counter // network capture events
BPFLogsCount counter.Counter
ErrorCount counter.Counter
LostEvCount counter.Counter
LostWrCount counter.Counter
LostNtCapCount counter.Counter // lost network capture events
LostBPFLogsCount counter.Counter
EventCount counter.Counter
EventsFiltered counter.Counter
NetCapCount counter.Counter // network capture events
BPFLogsCount counter.Counter
BPFPerfEventWrites counter.Counter // calls to write to the event perf buffer
ErrorCount counter.Counter
LostEvCount counter.Counter
LostWrCount counter.Counter
LostNtCapCount counter.Counter // lost network capture events
LostBPFLogsCount counter.Counter
}

// Register Stats to prometheus metrics exporter
Expand Down Expand Up @@ -54,49 +55,59 @@ func (stats *Stats) RegisterPrometheus() error {

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "lostevents_total",
Help: "events lost in the submission buffer",
}, func() float64 { return float64(stats.LostEvCount.Get()) }))
Name: "bpf_logs_total",
Help: "logs collected by tracee-ebpf during ebpf execution",
}, func() float64 { return float64(stats.BPFLogsCount.Get()) }))

if err != nil {
return errfmt.WrapError(err)
}

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "write_lostevents_total",
Help: "events lost in the write buffer",
}, func() float64 { return float64(stats.LostWrCount.Get()) }))
Name: "bpf_perf_event_writes_total",
Help: "total number of calls to write to the event perf buffer",
}, func() float64 { return float64(stats.BPFPerfEventWrites.Get()) }))

if err != nil {
return errfmt.WrapError(err)
}

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "network_capture_lostevents_total",
Help: "network capture lost events in network capture buffer",
}, func() float64 { return float64(stats.LostNtCapCount.Get()) }))
Name: "errors_total",
Help: "errors accumulated by tracee-ebpf",
}, func() float64 { return float64(stats.ErrorCount.Get()) }))

if err != nil {
return errfmt.WrapError(err)
}

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "bpf_logs_total",
Help: "logs collected by tracee-ebpf during ebpf execution",
}, func() float64 { return float64(stats.BPFLogsCount.Get()) }))
Name: "lostevents_total",
Help: "events lost in the submission buffer",
}, func() float64 { return float64(stats.LostEvCount.Get()) }))

if err != nil {
return errfmt.WrapError(err)
}

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "errors_total",
Help: "errors accumulated by tracee-ebpf",
}, func() float64 { return float64(stats.ErrorCount.Get()) }))
Name: "write_lostevents_total",
Help: "events lost in the write buffer",
}, func() float64 { return float64(stats.LostWrCount.Get()) }))

if err != nil {
return errfmt.WrapError(err)
}

err = prometheus.Register(prometheus.NewCounterFunc(prometheus.CounterOpts{
Namespace: "tracee_ebpf",
Name: "network_capture_lostevents_total",
Help: "network capture lost events in network capture buffer",
}, func() float64 { return float64(stats.LostNtCapCount.Get()) }))

return errfmt.WrapError(err)
}
19 changes: 10 additions & 9 deletions pkg/server/grpc/diagnostic.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@ type DiagnosticService struct {
func (s *DiagnosticService) GetMetrics(ctx context.Context, in *pb.GetMetricsRequest) (*pb.GetMetricsResponse, error) {
stats := s.tracee.Stats()
metrics := &pb.GetMetricsResponse{
EventCount: stats.EventCount.Get(),
EventsFiltered: stats.EventsFiltered.Get(),
NetCapCount: stats.NetCapCount.Get(),
BPFLogsCount: stats.BPFLogsCount.Get(),
ErrorCount: stats.ErrorCount.Get(),
LostEvCount: stats.LostEvCount.Get(),
LostWrCount: stats.LostWrCount.Get(),
LostNtCapCount: stats.LostNtCapCount.Get(),
LostBPFLogsCount: stats.LostBPFLogsCount.Get(),
EventCount: stats.EventCount.Get(),
EventsFiltered: stats.EventsFiltered.Get(),
NetCapCount: stats.NetCapCount.Get(),
BPFLogsCount: stats.BPFLogsCount.Get(),
BPFPerfEventWrites: stats.BPFPerfEventWrites.Get(), // only available in debug build
ErrorCount: stats.ErrorCount.Get(),
LostEvCount: stats.LostEvCount.Get(),
LostWrCount: stats.LostWrCount.Get(),
LostNtCapCount: stats.LostNtCapCount.Get(),
LostBPFLogsCount: stats.LostBPFLogsCount.Get(),
}

return metrics, nil
Expand Down
9 changes: 8 additions & 1 deletion pkg/version/version.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
package version

var version string
var (
version string
debug string
)

func GetVersion() string {
return version
}

func DebugBuild() bool {
return debug == "1"
}

0 comments on commit 1251004

Please sign in to comment.