Skip to content

Commit

Permalink
cpu_watcher:增加内核态互斥锁争用数据采集 (#856)
Browse files Browse the repository at this point in the history
* cpu_watcher:schedule_delay增加阈值选项&&workflow增加测试流程

* .

* cpu_watcher:增加controller功能

* cpu_watcher:schedule_delay增加dump出调度延迟过大task的前两个task

* sar功能适配controller

* .

* proc_image:增加进程画像子功能使用说明

* 增加内核态互斥锁争用数据采集

* helper
  • Loading branch information
vvzxy authored Jul 8, 2024
1 parent 89b17f2 commit 9faff7b
Show file tree
Hide file tree
Showing 5 changed files with 347 additions and 30 deletions.
2 changes: 1 addition & 1 deletion eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ INCLUDES := -I$(OUTPUT) -I../../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(L
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay
APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay mutrace
TARGETS=cpu_watcher
CONTROLLER := controller

Expand Down
172 changes: 172 additions & 0 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright 2023 The LMP Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// author: [email protected] [email protected] [email protected]

#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_tracing.h>
#include "cpu_watcher.h"

char LICENSE[] SEC("license") = "Dual BSD/GPL";

BPF_HASH(mutex_info_map,u64,struct mutex_info, 1024);

struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");


/*----------------------------------------------*/
/* 内核态互斥锁 */
/*----------------------------------------------*/

SEC("kprobe/mutex_lock")
int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) {
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts; // 保存锁获取时间
} else {
struct mutex_info new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
return 0;
}

SEC("kprobe/mutex_trylock")
int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) {
int ret = PT_REGS_RC(ctx);
if (ret == 0) { // 成功获取锁
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts;
} else {
struct mutex_info new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
}
return 0;
}

SEC("kprobe/__mutex_lock_slowpath")
int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
struct mutex_contention_event *e;
struct task_struct *owner_task;
struct task_struct *contender_task;
pid_t pid = bpf_get_current_pid_tgid();
long owner;
u64 lock_addr = (u64)lock;
u64 ts = bpf_ktime_get_ns();
e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0);
if (!e) {
return 0;
}
e->contender_pid = pid;
e->ptr = lock_addr;
bpf_get_current_comm(&e->contender_name, sizeof(e->contender_name));
bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner);
owner_task = (struct task_struct *)(owner & ~0x1L);
contender_task = (struct task_struct *)bpf_get_current_task();
bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio);
if (owner_task) {
bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid);
bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm);
bpf_probe_read_kernel(&e->owner_prio, sizeof(e->owner_prio), &owner_task->prio);
} else {
e->owner_pid = 0;
__builtin_memset(e->owner_name, 0, sizeof(e->owner_name));
}
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->contended_total += ts - info->acquire_time;
} else {
struct mutex_info new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = ts,
.last_owner = 0,
.acquire_time = 0,
.ptr = lock_addr
};
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
bpf_ringbuf_submit(e, 0);
return 0;
}

SEC("kprobe/mutex_unlock")
int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) {
u64 lock_addr = (u64)lock;
u64 ts = bpf_ktime_get_ns();
pid_t pid = bpf_get_current_pid_tgid();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间
info->locked_total += held_time; // 更新锁被持有的总时间
if (held_time > info->locked_max) {
info->locked_max = held_time; // 更新锁被持有的最长时间
}
info->last_owner = pid; // 更新最后一次持有该锁的线程ID
}
return 0;
}

/*----------------------------------------------*/
/* 用户态互斥锁 */
/*----------------------------------------------*/

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock_init, pthread_mutex_t *mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock,pthread_mutex_t *mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_try, pthread_mutex_t *mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_unlock, pthread_mutex_t *mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_destroy, pthread_mutex_t *mutex){

// }
100 changes: 72 additions & 28 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@
#include "preempt.skel.h"
#include "schedule_delay.skel.h"
#include "mq_delay.skel.h"
#include "mutrace.skel.h"

typedef long long unsigned int u64;
typedef unsigned int u32;



struct list_head {
struct list_head *next;
struct list_head *prev;
Expand All @@ -65,6 +67,7 @@ static struct env {
int freq;
bool EWMA;
int cycle;
int MUTRACE;
} env = {
.time = 0,
.period = 1,
Expand All @@ -78,6 +81,7 @@ static struct env {
.freq = 99,
.EWMA = false,
.cycle = 0,
.MUTRACE = false,
};


Expand All @@ -88,6 +92,7 @@ struct sc_delay_bpf *sc_skel;
struct preempt_bpf *preempt_skel;
struct schedule_delay_bpf *sd_skel;
struct mq_delay_bpf *mq_skel;
struct mutrace_bpf *mu_skel;

static int csmap_fd;
static int sarmap_fd;
Expand Down Expand Up @@ -132,6 +137,7 @@ static const struct argp_option opts[] = {
{"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" },
{"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" },
{"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" },
{"mutrace", 'x', 0, 0, "Print mutrace data(the data of cpu)" },
{"ewma", 'E',0,0,"dynamic filte the data"},
{"cycle", 'T',"CYCLE",0,"Periods of the ewma"},
{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
Expand Down Expand Up @@ -166,6 +172,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'm':
env.MQ_DELAY = true;
break;
case 'x':
env.MUTRACE = true;
break;
case 'E':
env.EWMA = true;
break;
Expand Down Expand Up @@ -547,38 +556,23 @@ static int preempt_print(void *ctx, void *data, unsigned long data_sz)
return 0;
}

// 定义一个结构来存储已输出的条目
struct output_entry {
int pid;
char comm[16];
long long delay;
};

// 定义一个数组来存储已输出的条目
struct output_entry seen_entries[MAX_ENTRIES];
int seen_count = 0;

// 检查条目是否已存在
bool entry_exists(int pid, const char *comm, long long delay) {
for (int i = 0; i < seen_count; i++) {
if (seen_entries[i].pid == pid &&
strcmp(seen_entries[i].comm, comm) == 0 &&
seen_entries[i].delay == delay) {
return true;
}
}
return false;
}

// 添加条目到已输出的条目列表
void add_entry(int pid, const char *comm, long long delay) {
if (seen_count < MAX_ENTRIES) {
seen_entries[seen_count].pid = pid;
strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm));
seen_entries[seen_count].delay = delay;
seen_count++;
//mutrace输出
static int mutrace_print(void *ctx, void *data, unsigned long data_sz) {
const struct mutex_contention_event *e = data;
if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) {
return 0;
}
// 增加锁争用次数
increment_lock_count(e->ptr);
uint64_t contention_count = get_lock_count(e->ptr);
printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count);
return 0;
}



static int schedule_print()
{
int err,key = 0;
Expand Down Expand Up @@ -961,6 +955,40 @@ int main(int argc, char **argv)
fprintf(stderr, "Failed to create ring buffer\n");
goto mq_delay_cleanup;
}
}else if (env.MUTRACE) {
mu_skel = mutrace_bpf__open();
if (!mu_skel) {
fprintf(stderr, "Failed to open and load BPF skeleton\n");
return 1;
}

err = mutrace_bpf__load(mu_skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto mutrace_cleanup;
}
//ctrl
if(err < 0){
goto mutrace_cleanup;
}
//ctrl
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
goto mutrace_cleanup;
}
err = mutrace_bpf__attach(mu_skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto mutrace_cleanup;
}

rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL);
printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count");
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
goto mutrace_cleanup;
}
}
while (!exiting) {
if(env.SAR){
Expand Down Expand Up @@ -1053,6 +1081,17 @@ int main(int argc, char **argv)
break;
}
}
else if (env.MUTRACE) {
err = ring_buffer__poll(rb, 100 /* timeout, ms */);
if (err == -EINTR) {
err = 0;
break;
}
if (err < 0) {
printf("Error polling perf buffer: %d\n", err);
break;
}
}
else {
printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n");
break;
Expand Down Expand Up @@ -1092,4 +1131,9 @@ int main(int argc, char **argv)
ring_buffer__free(rb);
mq_delay_bpf__destroy(mq_skel);
return err < 0 ? -err : 0;

mutrace_cleanup:
ring_buffer__free(rb);
mutrace_bpf__destroy(mu_skel);
return err < 0 ? -err : 0;
}
21 changes: 21 additions & 0 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,27 @@ struct proc_history {
struct proc_info last[2]; // 存储最后两个调度的进程信息
};

/*----------------------------------------------*/
/* mutrace相关结构体 */
/*----------------------------------------------*/
struct mutex_info {
u64 locked_total;//锁被持有的总时间
u64 locked_max;//锁被持有的最长时间
u64 contended_total;//锁发生竞争的总时间
pid_t last_owner;//最后一次持有该锁的线程 ID
u64 acquire_time; // 锁每次被获取的时间戳,方便后续计算
u64 ptr;//地址
};

struct mutex_contention_event {
u64 ptr;//锁地址
pid_t owner_pid;//持有者pid
pid_t contender_pid;//抢占者pid
char contender_name[TASK_COMM_LEN];
char owner_name[TASK_COMM_LEN];
int owner_prio;
int contender_prio;
};

/*----------------------------------------------*/
/* mq_delay相关结构体 */
Expand Down
Loading

0 comments on commit 9faff7b

Please sign in to comment.