Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu_watcher:拿取更细粒度内核互斥锁使用的信息 #860

Merged
merged 21 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 48 additions & 27 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,57 +22,70 @@

char LICENSE[] SEC("license") = "Dual BSD/GPL";

BPF_HASH(mutex_info_map,u64,struct mutex_info, 1024);

const int ctrl_key = 0;
BPF_HASH(mutex_info_map, u64, struct mutex_info_kernel, 1024);
BPF_ARRAY(mu_ctrl_map, int, struct mu_ctrl, 1);
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 256 * 1024);
} rb SEC(".maps");

static inline struct mu_ctrl *get_mu_ctrl(void) {
struct mu_ctrl *mu_ctrl;
mu_ctrl = bpf_map_lookup_elem(&mu_ctrl_map, &ctrl_key);
if (!mu_ctrl || !mu_ctrl->mu_func) {
return NULL;
}
return mu_ctrl;
}

/*----------------------------------------------*/
/* 内核态互斥锁 */
/*----------------------------------------------*/

SEC("kprobe/mutex_lock")
int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) {
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts; // 保存锁获取时间
info->acquire_time = ts; // 保存锁获取时间
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.count = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
return 0;
}

SEC("kprobe/mutex_trylock")
int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) {
int ret = PT_REGS_RC(ctx);
if (ret == 0) { // 成功获取锁
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (ret != 0) { // 成功获取锁
u64 lock_addr = (u64)lock; // 获取锁地址
u64 ts = bpf_ktime_get_ns();
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->acquire_time = ts;
info->acquire_time = ts;
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = 0,
.count = 0,
.last_owner = 0,
.acquire_time = ts,
.ptr = lock_addr
};
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
}
Expand All @@ -81,6 +94,7 @@ int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) {

SEC("kprobe/__mutex_lock_slowpath")
int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
struct mu_ctrl *mu_ctrl = get_mu_ctrl();
struct mutex_contention_event *e;
struct task_struct *owner_task;
struct task_struct *contender_task;
Expand All @@ -98,7 +112,7 @@ int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner);
owner_task = (struct task_struct *)(owner & ~0x1L);
contender_task = (struct task_struct *)bpf_get_current_task();
bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio);
bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio);
if (owner_task) {
bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid);
bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm);
Expand All @@ -107,18 +121,22 @@ int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) {
e->owner_pid = 0;
__builtin_memset(e->owner_name, 0, sizeof(e->owner_name));
}
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
info->contended_total += ts - info->acquire_time;
u64 contention_start = ts;
info->contended_total += (contention_start - info->acquire_time); // 更新争用时间
info->count++; // 更新争用次数
} else {
struct mutex_info new_info = {
struct mutex_info_kernel new_info = {
.locked_total = 0,
.locked_max = 0,
.contended_total = ts,
.contended_total = 0,
.count = 1, // 初始化争用次数
.last_owner = 0,
.acquire_time = 0,
.acquire_time = ts, // 初始化获取时间
.ptr = lock_addr
};
__builtin_memset(new_info.last_name, 0, sizeof(new_info.last_name));
bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY);
}
bpf_ringbuf_submit(e, 0);
Expand All @@ -130,43 +148,46 @@ int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) {
u64 lock_addr = (u64)lock;
u64 ts = bpf_ktime_get_ns();
pid_t pid = bpf_get_current_pid_tgid();
struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
struct mutex_info_kernel *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr);
if (info) {
u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间
info->locked_total += held_time; // 更新锁被持有的总时间
if (held_time > info->locked_max) {
info->locked_max = held_time; // 更新锁被持有的最长时间
}
info->last_owner = pid; // 更新最后一次持有该锁的线程ID
bpf_get_current_comm(&info->last_name, sizeof(info->last_name)); // 更新最后一次持有该锁的线程名称
}
return 0;
}



/*----------------------------------------------*/
/* 用户态互斥锁 */
/*----------------------------------------------*/

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock_init, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_lock_init, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_lock,pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_lock,void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_try, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_trylock, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_unlock, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_unlock, void *__mutex){

// }

// SEC("uprobe")
// int BPF_KPROBE(pthread_mutex_destroy, pthread_mutex_t *mutex){
// int BPF_KPROBE(pthread_mutex_destroy,void *__mutex){

// }
// }
27 changes: 25 additions & 2 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ static struct env {
bool SCHEDULE_DELAY;
bool MQ_DELAY;
int freq;
bool mutrace;
bool mutex_detail;
} env = {
.usemode = 0,
.SAR = false,
Expand All @@ -51,6 +53,8 @@ static struct env {
.SCHEDULE_DELAY = false,
.MQ_DELAY = false,
.freq = 99,
.mutrace = false,
.mutex_detail = false,
};

const char argp_program_doc[] ="Trace process to get cpu watcher.\n";
Expand All @@ -66,7 +70,9 @@ static const struct argp_option opts[] = {
{"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" },
{"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" },
{"schedule_delay_min_us_set", 'e', "THRESHOLD", 0, "Print scheduling delays that exceed the threshold (the data of cpu)" },
{"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" },
{"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" },
{"mutrace", 'x', 0, 0, "Print kernel mutex contend" },
{"mutex_detail", 'i', 0, 0, "Print kernel mutex details" },
{ NULL, 'h', NULL, OPTION_HIDDEN, "show the full help" },
{},
};
Expand Down Expand Up @@ -114,7 +120,13 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'm':
env.MQ_DELAY = true;
break;
break;
case 'x':
env.mutrace = true;
break;
case 'i':
env.mutex_detail = true;
break;
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
Expand Down Expand Up @@ -157,6 +169,11 @@ int deactivate_mode(){
struct mq_ctrl mq_ctrl = {false,0};
err = update_mq_ctrl_map(mq_ctrl);
if(err < 0) return err;
}
if(env.mutrace){
struct mu_ctrl mu_ctrl = {false,false,0};
err = update_mu_ctrl_map(mu_ctrl);
if(err < 0) return err;
}
return 0;
}
Expand Down Expand Up @@ -223,6 +240,12 @@ int main(int argc, char **argv)
err = update_mq_ctrl_map(mq_ctrl);
if(err < 0) return err;
}

if(env.mutrace){
struct mu_ctrl mu_ctrl = {true,env.mutex_detail,MUTEX_WATCHER+env.mutex_detail};
err = update_mu_ctrl_map(mu_ctrl);
if(err < 0) return err;
}
}else if(env.usemode == 2){ // deactivate mode
err = deactivate_mode();
if(err<0){
Expand Down
79 changes: 68 additions & 11 deletions eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <sys/select.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
Expand Down Expand Up @@ -102,6 +103,8 @@ static int preemptmap_fd;
static int schedulemap_fd;
struct schedule_ctrl sd_ctrl = {};
static int mqmap_fd;
static int mumap_fd;
struct mu_ctrl mu_ctrl = {};

//static int prev_watcher = 0;//上一个使用的工具,用于在切换使用功能时,打印不用功能的表头;

Expand Down Expand Up @@ -560,18 +563,55 @@ static int preempt_print(void *ctx, void *data, unsigned long data_sz)

//mutrace输出
static int mutrace_print(void *ctx, void *data, unsigned long data_sz) {
const struct mutex_contention_event *e = data;
if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) {
return 0;
}
// 增加锁争用次数
increment_lock_count(e->ptr);
uint64_t contention_count = get_lock_count(e->ptr);
printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count);
int err,key = 0;
err = bpf_map_lookup_elem(mumap_fd,&key,&mu_ctrl);
if (err < 0) {
fprintf(stderr, "failed to lookup infos: %d\n", err);
return -1;
}
if(!mu_ctrl.mu_func) return 0;
if(mu_ctrl.prev_watcher == MUTEX_WATCHER ){
printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count");
mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关
err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
}
}else if (mu_ctrl.prev_watcher == MUTEX_WATCHER +1) {
printf("%s\n"," lock_ptr locked_total locked_max contended_total count last_owner last_owmer_name");
mu_ctrl.prev_watcher = MUTEX_WATCHER + 9;//打印表头功能关
err = bpf_map_update_elem(mumap_fd, &key, &mu_ctrl, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
}
}
if(!mu_ctrl.mutex_detail){
const struct mutex_contention_event *e = data;
if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) {
return 0;
}
// 增加锁争用次数
increment_lock_count(e->ptr);
uint64_t contention_count = get_lock_count(e->ptr);
printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count);
}
return 0;
}


static int mutex_detail() {
int fd = bpf_map__fd(mu_skel->maps.mutex_info_map);
u64 key, next_key;
struct mutex_info_kernel info;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
int err = bpf_map_lookup_elem(fd, &next_key, &info);
if (err == 0 && info.contended_total != 0) { // 添加过滤条件
printf(" %15llu %15lluns %15lluns %15lluns %15d %15d %20s\n",
next_key, info.locked_total, info.locked_max, info.contended_total, info.count, info.last_owner, info.last_name);
}
key = next_key;
}
return 0;
}

static int schedule_print()
{
Expand Down Expand Up @@ -697,6 +737,7 @@ int main(int argc, char **argv)
struct bpf_map *preempt_ctrl_map = NULL;
struct bpf_map *schedule_ctrl_map = NULL;
struct bpf_map *mq_ctrl_map = NULL;
struct bpf_map *mu_ctrl_map = NULL;
int key = 0;
int err;
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
Expand Down Expand Up @@ -967,6 +1008,18 @@ int main(int argc, char **argv)
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto mutrace_cleanup;
}
err = common_pin_map(&mu_ctrl_map,mu_skel->obj,"mu_ctrl_map",mu_ctrl_path);
if(err < 0){
goto mutrace_cleanup;
}
mumap_fd = bpf_map__fd(mu_ctrl_map);
struct mu_ctrl init_value = {false,false,MUTEX_WATCHER};

err = bpf_map_update_elem(mumap_fd, &key, &init_value, 0);
if(err < 0){
fprintf(stderr, "Failed to update elem\n");
goto mutrace_cleanup;
}
//ctrl
if(err < 0){
goto mutrace_cleanup;
Expand All @@ -983,7 +1036,6 @@ int main(int argc, char **argv)
}

rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL);
printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count");
if (!rb) {
err = -1;
fprintf(stderr, "Failed to create ring buffer\n");
Expand Down Expand Up @@ -1091,6 +1143,11 @@ int main(int argc, char **argv)
printf("Error polling perf buffer: %d\n", err);
break;
}
if(env.MUTRACE&&mu_ctrl.mutex_detail){
err = mutex_detail();
sleep(1);
printf("-------------------------------------------------------------\n");
}
}
else {
printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n");
Expand Down Expand Up @@ -1136,4 +1193,4 @@ int main(int argc, char **argv)
ring_buffer__free(rb);
mutrace_bpf__destroy(mu_skel);
return err < 0 ? -err : 0;
}
}
Loading
Loading