Skip to content

Commit

Permalink
cpu_watcher:拿取更细粒度内核互斥锁使用的信息 (#860)
Browse files Browse the repository at this point in the history
* cpu_watcher:schedule_delay增加阈值选项&&workflow增加测试流程

* .

* cpu_watcher:增加controller功能

* cpu_watcher:schedule_delay增加dump出调度延迟过大task的前两个task

* sar功能适配controller

* .

* proc_image:增加进程画像子功能使用说明

* cpu_watcher:增加内核态互斥锁信息细粒度采集

* Update mutrace.bpf.c

* 1

* Update mutrace.bpf.c

* Update cpu_watcher.c

* 增加输出过滤,识别被争用的锁

* 添加测试程序
  • Loading branch information
vvzxy authored Jul 24, 2024
1 parent dcb5d0c commit eba33a7
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 41 deletions.
75 changes: 48 additions & 27 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,57 +22,70 @@

char LICENSE[] SEC("license") = "Dual BSD/GPL";

BPF_HASH(mutex_info_map,u64,struct mutex_info, 1024);

const int ctrl_key = 0;
BPF_HASH(mutex_info_map, u64, struct mutex_info_kernel, 1024);
BPF_ARRAY(mu_ctrl_map, int, struct mu_ctrl, 1);
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");

static inline struct mu_ctrl *get_mu_ctrl(void) {
struct mu_ctrl *mu_ctrl;
mu_ctrl = bpf_map_lookup_elem(&mu_ctrl_map, &ctrl_key);
if (!mu_ctrl || !mu_ctrl->mu_func) {
return NULL;
}
return mu_ctrl;
}

/*----------------------------------------------*/
/* 内核态互斥锁 */
/*----------------------------------------------*/

SEC("kprobe/mutex_lock")
int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) {
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts; // 保存锁获取时间
info->acquire_time = ts; // 保存锁获取时间
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.count = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
return 0;
}

SEC("kprobe/mutex_trylock")
int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) {
int ret = PT_REGS_RC(ctx);
if (ret == 0) { // 成功获取锁
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (ret != 0) { // 成功获取锁
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts;
info->acquire_time = ts;
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.count = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
}
Expand All @@ -81,6 +94,7 @@ int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) {

SEC("kprobe/__mutex_lock_slowpath")
int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
struct mu_ctrl *mu_ctrl = get_mu_ctrl();
struct mutex_contention_event *e;
struct task_struct *owner_task;
struct task_struct *contender_task;
Expand All @@ -98,7 +112,7 @@ int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner);
owner_task = (struct task_struct *)(owner & ~0x1L);
contender_task = (struct task_struct *)bpf_get_current_task();
bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio);
bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio);
if (owner_task) {
bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid);
bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm);
Expand All @@ -107,18 +121,22 @@ int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
e->owner_pid = 0;
__builtin_memset(e->owner_name, 0, sizeof(e->owner_name));
}
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->contended_total += ts - info->acquire_time;
u64 contention_start = ts;
info->contended_total += (contention_start - info->acquire_time); // 更新争用时间
info->count++; // 更新争用次数
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = ts,
.contended_total = 0,
.count = 1, // 初始化争用次数
.last_owner = 0,
.acquire_time = 0,
.acquire_time = ts, // 初始化获取时间
.ptr = lock_addr
};
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
bpf_ringbuf_submit(e, 0);
Expand All @@ -130,43 +148,46 @@ int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) {
u64 lock_addr = (u64)lock;
u64 ts = bpf_ktime_get_ns();
pid_t pid = bpf_get_current_pid_tgid();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间
info->locked_total += held_time; // 更新锁被持有的总时间
if (held_time > info->locked_max) {
info->locked_max = held_time; // 更新锁被持有的最长时间
}
info->last_owner = pid; // 更新最后一次持有该锁的线程ID
bpf_get_current_comm(&info->last_name, sizeof(info->last_name)); // 更新最后一次持有该锁的线程名称
}
return 0;
}



/*----------------------------------------------*/
/* 用户态互斥锁 */
/*----------------------------------------------*/

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock_init, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_lock_init, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock,pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_lock,void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_try, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_trylock, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_unlock, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_unlock, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_destroy, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_destroy,void *__mutex){

// }
// }
27 changes: 25 additions & 2 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ static struct env {
bool SCHEDULE_DELAY;
bool MQ_DELAY;
int freq;
bool mutrace;
bool mutex_detail;
} env = {
.usemode = 0,
.SAR = false,
Expand All @@ -51,6 +53,8 @@ static struct env {
.SCHEDULE_DELAY = false,
.MQ_DELAY = false,
.freq = 99,
.mutrace = false,
.mutex_detail = false,
};

const char argp_program_doc[] ="Trace process to get cpu watcher.\n";
Expand All @@ -66,7 +70,9 @@ static const struct argp_option opts[] = {
{"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" },
{"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" },
{"schedule_delay_min_us_set", 'e', "THRESHOLD", 0, "Print scheduling delays that exceed the threshold (the data of cpu)" },
{"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" },
{"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" },
{"mutrace", 'x', 0, 0, "Print kernel mutex contend" },
{"mutex_detail", 'i', 0, 0, "Print kernel mutex details" },
{ NULL, 'h', NULL, OPTION_HIDDEN, "show the full help" },
{},
};
Expand Down Expand Up @@ -114,7 +120,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'm':
env.MQ_DELAY = true;
break;
break;
case 'x':
env.mutrace = true;
break;
case 'i':
env.mutex_detail = true;
break;
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
Expand Down Expand Up @@ -157,6 +169,11 @@ int deactivate_mode(){
struct mq_ctrl mq_ctrl = {false,0};
err = update_mq_ctrl_map(mq_ctrl);
if(err < 0) return err;
}
if(env.mutrace){
struct mu_ctrl mu_ctrl = {false,false,0};
err = update_mu_ctrl_map(mu_ctrl);
if(err < 0) return err;
}
return 0;
}
Expand Down Expand Up @@ -223,6 +240,12 @@ int main(int argc, char **argv)
err = update_mq_ctrl_map(mq_ctrl);
if(err < 0) return err;
}

if(env.mutrace){
struct mu_ctrl mu_ctrl = {true,env.mutex_detail,MUTEX_WATCHER+env.mutex_detail};
err = update_mu_ctrl_map(mu_ctrl);
if(err < 0) return err;
}
}else if(env.usemode == 2){ // deactivate mode
err = deactivate_mode();
if(err<0){
Expand Down
79 changes: 68 additions & 11 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <sys/select.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
Expand Down Expand Up @@ -102,6 +103,8 @@ static int preemptmap_fd;
static int schedulemap_fd;
struct schedule_ctrl sd_ctrl = {};
static int mqmap_fd;
static int mumap_fd;
struct mu_ctrl mu_ctrl = {};

//static int prev_watcher = 0;//上一个使用的工具,用于在切换使用功能时,打印不用功能的表头;

Expand Down Expand Up @@ -560,18 +563,55 @@ static int preempt_print(void *ctx, void *data, unsigned long data_sz)

//mutrace输出
static int mutrace_print(void *ctx, void *data, unsigned long data_sz) {
const struct mutex_contention_event *e = data;
if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) {
return 0;
}
// 增加锁争用次数
increment_lock_count(e->ptr);
uint64_t contention_count = get_lock_count(e->ptr);
printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count);
int err,key = 0;
err = bpf_map_lookup_elem(mumap_fd,&key,&mu_ctrl);
if (err < 0) {
fprintf(stderr, "failed to lookup infos: %d\n", err);
return -1;
}
if(!mu_ctrl.mu_func) return 0;
if(mu_ctrl.prev_watcher == MUTEX_WATCHER ){
printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count");
mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关
err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
}
}else if (mu_ctrl.prev_watcher == MUTEX_WATCHER +1) {
printf("%s\n"," lock_ptr locked_total locked_max contended_total count last_owner last_owmer_name");
mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关
err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
}
}
if(!mu_ctrl.mutex_detail){
const struct mutex_contention_event *e = data;
if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) {
return 0;
}
// 增加锁争用次数
increment_lock_count(e->ptr);
uint64_t contention_count = get_lock_count(e->ptr);
printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count);
}
return 0;
}


static int mutex_detail() {
int fd = bpf_map__fd(mu_skel->maps.mutex_info_map);
u64 key, next_key;
struct mutex_info_kernel info;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
int err = bpf_map_lookup_elem(fd, &next_key, &info);
if (err == 0 && info.contended_total != 0) { // 添加过滤条件
printf(" %15llu %15lluns %15lluns %15lluns %15d %15d %20s\n",
next_key, info.locked_total, info.locked_max, info.contended_total, info.count, info.last_owner, info.last_name);
}
key = next_key;
}
return 0;
}

static int schedule_print()
{
Expand Down Expand Up @@ -697,6 +737,7 @@ int main(int argc, char **argv)
struct bpf_map *preempt_ctrl_map = NULL;
struct bpf_map *schedule_ctrl_map = NULL;
struct bpf_map *mq_ctrl_map = NULL;
struct bpf_map *mu_ctrl_map = NULL;
int key = 0;
int err;
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
Expand Down Expand Up @@ -967,6 +1008,18 @@ int main(int argc, char **argv)
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto mutrace_cleanup;
}
err = common_pin_map(&mu_ctrl_map,mu_skel->obj,"mu_ctrl_map",mu_ctrl_path);
if(err < 0){
goto mutrace_cleanup;
}
mumap_fd = bpf_map__fd(mu_ctrl_map);
struct mu_ctrl init_value = {false,false,MUTEX_WATCHER};

err = bpf_map_update_elem(mumap_fd, &key, &init_value, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
goto mutrace_cleanup;
}
//ctrl
if(err < 0){
goto mutrace_cleanup;
Expand All @@ -983,7 +1036,6 @@ int main(int argc, char **argv)
}

rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL);
printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count");
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
Expand Down Expand Up @@ -1091,6 +1143,11 @@ int main(int argc, char **argv)
printf("Error polling perf buffer: %d\n", err);
break;
}
if(env.MUTRACE&&mu_ctrl.mutex_detail){
err = mutex_detail();
sleep(1);
printf("-------------------------------------------------------------\n");
}
}
else {
printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n");
Expand Down Expand Up @@ -1136,4 +1193,4 @@ int main(int argc, char **argv)
ring_buffer__free(rb);
mutrace_bpf__destroy(mu_skel);
return err < 0 ? -err : 0;
}
}
Loading

0 comments on commit eba33a7

Please sign in to comment.