From 9faff7b90aa306d24725b04591a9c51292ad95ac Mon Sep 17 00:00:00 2001 From: vvzxy <145555693+vvzxy@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:11:42 +0800 Subject: [PATCH] =?UTF-8?q?cpu=5Fwatcher=EF=BC=9A=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=86=85=E6=A0=B8=E6=80=81=E4=BA=92=E6=96=A5=E9=94=81=E4=BA=89?= =?UTF-8?q?=E7=94=A8=E6=95=B0=E6=8D=AE=E9=87=87=E9=9B=86=20(#856)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cpu_watcher:schedule_delay增加阈值选项&&workflow增加测试流程 * . * cpu_watcher:增加controller功能 * cpu_watcher:schedule_delay增加dump出调度延迟过大task的前两个task * sar功能适配controller * . * proc_image:增加进程画像子功能使用说明 * 增加内核态互斥锁争用数据采集 * helper --- .../CPU_Subsystem/cpu_watcher/Makefile | 2 +- .../cpu_watcher/bpf/mutrace.bpf.c | 172 ++++++++++++++++++ .../CPU_Subsystem/cpu_watcher/cpu_watcher.c | 100 +++++++--- .../cpu_watcher/include/cpu_watcher.h | 21 +++ .../cpu_watcher/include/cpu_watcher_helper.h | 82 ++++++++- 5 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile index bcc7c1236..1e1d4eaab 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile @@ -42,7 +42,7 @@ INCLUDES := -I$(OUTPUT) -I../../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(L CFLAGS := -g -Wall ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay +APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay mutrace TARGETS=cpu_watcher CONTROLLER := controller diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c new file mode 100644 index 000000000..e2e24aa65 --- /dev/null +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c @@ -0,0 +1,172 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +BPF_HASH(mutex_info_map,u64,struct mutex_info, 1024); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + + +/*----------------------------------------------*/ +/* 内核态互斥锁 */ +/*----------------------------------------------*/ + +SEC("kprobe/mutex_lock") +int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) { + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; // 保存锁获取时间 + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + return 0; +} + +SEC("kprobe/mutex_trylock") +int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) { + int ret = PT_REGS_RC(ctx); + if (ret == 0) { // 成功获取锁 + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + } + return 0; +} + +SEC("kprobe/__mutex_lock_slowpath") +int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) { + struct mutex_contention_event *e; + struct task_struct *owner_task; + struct task_struct *contender_task; + pid_t pid = bpf_get_current_pid_tgid(); + long owner; + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) { + return 0; + } + e->contender_pid = pid; + e->ptr = lock_addr; + bpf_get_current_comm(&e->contender_name, sizeof(e->contender_name)); + bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner); + owner_task = (struct task_struct *)(owner & ~0x1L); + contender_task = (struct task_struct *)bpf_get_current_task(); + bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio); + if (owner_task) { + bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid); + bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm); + bpf_probe_read_kernel(&e->owner_prio, sizeof(e->owner_prio), &owner_task->prio); + } else { + e->owner_pid = 0; + __builtin_memset(e->owner_name, 0, sizeof(e->owner_name)); + } + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->contended_total += ts - info->acquire_time; + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = ts, + .last_owner = 0, + .acquire_time = 0, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + bpf_ringbuf_submit(e, 0); + return 0; +} + +SEC("kprobe/mutex_unlock") +int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) { + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间 + info->locked_total += held_time; // 更新锁被持有的总时间 + if (held_time > info->locked_max) { + info->locked_max = held_time; // 更新锁被持有的最长时间 + } + info->last_owner = pid; // 更新最后一次持有该锁的线程ID + } + return 0; +} + +/*----------------------------------------------*/ +/* 用户态互斥锁 */ +/*----------------------------------------------*/ + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_lock_init, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_lock,pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_try, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_unlock, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_destroy, pthread_mutex_t *mutex){ + +// } \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c index 0cdf445f5..36a533d09 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c @@ -35,11 +35,13 @@ #include "preempt.skel.h" #include "schedule_delay.skel.h" #include "mq_delay.skel.h" +#include "mutrace.skel.h" typedef long long unsigned int u64; typedef unsigned int u32; + struct list_head { struct list_head *next; struct list_head *prev; @@ -65,6 +67,7 @@ static struct env { int freq; bool EWMA; int cycle; + int MUTRACE; } env = { .time = 0, .period = 1, @@ -78,6 +81,7 @@ static struct env { .freq = 99, .EWMA = false, .cycle = 0, + .MUTRACE = false, }; @@ -88,6 +92,7 @@ struct sc_delay_bpf *sc_skel; struct preempt_bpf *preempt_skel; struct schedule_delay_bpf *sd_skel; struct mq_delay_bpf *mq_skel; +struct mutrace_bpf *mu_skel; static int csmap_fd; static int sarmap_fd; @@ -132,6 +137,7 @@ static const struct argp_option opts[] = { {"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" }, {"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" }, {"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" }, + {"mutrace", 'x', 0, 0, "Print mutrace data(the data of cpu)" }, {"ewma", 'E',0,0,"dynamic filte the data"}, {"cycle", 'T',"CYCLE",0,"Periods of the ewma"}, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, @@ -166,6 +172,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'm': env.MQ_DELAY = true; break; + case 'x': + env.MUTRACE = true; + break; case 'E': env.EWMA = true; break; @@ -547,38 +556,23 @@ static int preempt_print(void *ctx, void *data, unsigned long data_sz) return 0; } -// 定义一个结构来存储已输出的条目 -struct output_entry { - int pid; - char comm[16]; - long long delay; -}; -// 定义一个数组来存储已输出的条目 -struct output_entry seen_entries[MAX_ENTRIES]; -int seen_count = 0; - -// 检查条目是否已存在 -bool entry_exists(int pid, const char *comm, long long delay) { - for (int i = 0; i < seen_count; i++) { - if (seen_entries[i].pid == pid && - strcmp(seen_entries[i].comm, comm) == 0 && - seen_entries[i].delay == delay) { - return true; - } - } - return false; -} -// 添加条目到已输出的条目列表 -void add_entry(int pid, const char *comm, long long delay) { - if (seen_count < MAX_ENTRIES) { - seen_entries[seen_count].pid = pid; - strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm)); - seen_entries[seen_count].delay = delay; - seen_count++; +//mutrace输出 +static int mutrace_print(void *ctx, void *data, unsigned long data_sz) { + const struct mutex_contention_event *e = data; + if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) { + return 0; } + // 增加锁争用次数 + increment_lock_count(e->ptr); + uint64_t contention_count = get_lock_count(e->ptr); + printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count); + return 0; } + + + static int schedule_print() { int err,key = 0; @@ -961,6 +955,40 @@ int main(int argc, char **argv) fprintf(stderr, "Failed to create ring buffer\n"); goto mq_delay_cleanup; } + }else if (env.MUTRACE) { + mu_skel = mutrace_bpf__open(); + if (!mu_skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + err = mutrace_bpf__load(mu_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto mutrace_cleanup; + } + err = mutrace_bpf__attach(mu_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto mutrace_cleanup; + } + + rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL); + printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count"); + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto mutrace_cleanup; + } } while (!exiting) { if(env.SAR){ @@ -1053,6 +1081,17 @@ int main(int argc, char **argv) break; } } + else if (env.MUTRACE) { + err = ring_buffer__poll(rb, 100 /* timeout, ms */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + } else { printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n"); break; @@ -1092,4 +1131,9 @@ int main(int argc, char **argv) ring_buffer__free(rb); mq_delay_bpf__destroy(mq_skel); return err < 0 ? -err : 0; + +mutrace_cleanup: + ring_buffer__free(rb); + mutrace_bpf__destroy(mu_skel); + return err < 0 ? -err : 0; } \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h index eadd0d874..3425e8975 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h @@ -155,6 +155,27 @@ struct proc_history { struct proc_info last[2]; // 存储最后两个调度的进程信息 }; +/*----------------------------------------------*/ +/* mutrace相关结构体 */ +/*----------------------------------------------*/ +struct mutex_info { + u64 locked_total;//锁被持有的总时间 + u64 locked_max;//锁被持有的最长时间 + u64 contended_total;//锁发生竞争的总时间 + pid_t last_owner;//最后一次持有该锁的线程 ID + u64 acquire_time; // 锁每次被获取的时间戳,方便后续计算 + u64 ptr;//地址 +}; + +struct mutex_contention_event { + u64 ptr;//锁地址 + pid_t owner_pid;//持有者pid + pid_t contender_pid;//抢占者pid + char contender_name[TASK_COMM_LEN]; + char owner_name[TASK_COMM_LEN]; + int owner_prio; + int contender_prio; +}; /*----------------------------------------------*/ /* mq_delay相关结构体 */ diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h index 5250aef89..dc78f2622 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h @@ -25,6 +25,7 @@ #define PREEMPT_WACTHER 40 #define SCHEDULE_WACTHER 50 #define MQ_WACTHER 60 +#define HASH_SIZE 1024 /*----------------------------------------------*/ /* ewma算法 */ @@ -64,7 +65,9 @@ bool dynamic_filter(struct ewma_info *ewma_syscall_delay, double dataPoint) { return 0; } - +/*----------------------------------------------*/ +/* bpf file system */ +/*----------------------------------------------*/ const char *sar_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sar_ctrl_map"; const char *cs_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/cs_ctrl_map"; const char *sc_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sc_ctrl_map"; @@ -199,4 +202,81 @@ int update_mq_ctrl_map(struct mq_ctrl mq_ctrl){ return 0; } + +/*----------------------------------------------*/ +/* mutex_count */ +/*----------------------------------------------*/ + +typedef struct { + uint64_t ptr; + uint64_t count; +} lock_count_t; + +lock_count_t lock_counts[HASH_SIZE]; + +static uint64_t hash(uint64_t ptr) { + return ptr % HASH_SIZE; +} + +static void increment_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + lock_counts[h].ptr = ptr; + lock_counts[h].count = 1; + } else { + lock_counts[h].count++; + } +} + +static uint64_t get_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + return 0; + } else { + return lock_counts[h].count; + } +} + +/*----------------------------------------------*/ +/* hash */ +/*----------------------------------------------*/ + + +struct output_entry { + int pid; + char comm[16]; + long long delay; +}; + + +struct output_entry seen_entries[MAX_ENTRIES]; +int seen_count = 0; + + +bool entry_exists(int pid, const char *comm, long long delay) { + for (int i = 0; i < seen_count; i++) { + if (seen_entries[i].pid == pid && + strcmp(seen_entries[i].comm, comm) == 0 && + seen_entries[i].delay == delay) { + return true; + } + } + return false; +} + + +void add_entry(int pid, const char *comm, long long delay) { + if (seen_count < MAX_ENTRIES) { + seen_entries[seen_count].pid = pid; + strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm)); + seen_entries[seen_count].delay = delay; + seen_count++; + } +} #endif // CPU_WATCHER_HELPER_H \ No newline at end of file