diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/README.md b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/README.md index b981830cd..ce938ded0 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/README.md +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/README.md @@ -29,14 +29,20 @@ make clean 清除生成文件 | 参数 | 描述 | | :----------------: | :----------------------------------------: | -| -s :SAR | 实时采集SAR的各项指标,每秒输出一次 | +| -s :SAR | 实时采集SAR的各项指标 | +| -i:interval | 修改SAR功能的输出间隔 | +| -P:percent | 按照百分比输出SAR功能的各项指标 | | -p:preempt_time | 实时采集当前系统的每次抢占调度详细信息 | | -d:schedule_delay | 实时采集当前系统的调度时延 | | -S:syscall_delay | 实时采集当前系统调用时间 | | -m:mq_delay | 实时采集当前消息队列通信时延 | | -c:cs_delay | 实时对内核函数schedule()的执行时长进行测试 | -### 1.SAR 统计功能(每秒输出一次): +### 1.SAR 统计功能: + +```shell +./cpu_watcher -s +``` #### 输出效果: @@ -56,6 +62,16 @@ make clean 清除生成文件 16:18:14 43 1032 1 577 19513 1704 3919 26 10 30 ``` +​ 使用参数i可以调整输出间隔,默认为1s,参数p可以按照cpu核数和自定义的输出间隔对数据进行归一化,并以百分比的形式输出,且大于60%的数据会标红输出: + +```shell +./cpu_watcher -s -i 2 -P +``` +#### 输出效果: + +![image13](image/image13.png) + + 对上述参数的解释: - `proc/s`: 每秒创建的进程数,此数值是通过fork数来统计的。 @@ -71,6 +87,7 @@ make clean 清除生成文件 原理介绍: [libbpf_sar工具原理分析](docs/libbpf_sar.md) + ### **2.统计抢占调度时间:** ​ 统计系统中发生抢占调度的情况,包括抢占进程的`pid`与进程名,以及被强占进程的`pid`,和本次抢占时间,单位纳秒。 @@ -98,21 +115,22 @@ node 14221 2589 3355 ### 3.**统计调度延迟:** -​ 分析系统中进程调度的延迟情况,提供相关统计数据,输出包括当前系统的最大调度延迟、最小调度延迟、平均调度延迟。 +​ 分析系统中进程调度的延迟情况,提供相关统计数据,输出包括当前系统的最大调度延迟、最小调度延迟、平均调度延迟,以及对应进程的名字。 #### 输出效果: ``` - TIME avg_delay/μs max_delay/μs min_delay/μs -17:31:28 35.005000 97.663000 9.399000 -17:31:29 326.518000 12618.465000 7.994000 -17:31:30 455.837000 217053.545000 6.462000 -17:31:31 422.582000 217053.545000 6.462000 -17:31:32 382.627000 217053.545000 6.462000 -17:31:33 360.499000 217053.545000 6.462000 -17:31:34 364.805000 217053.545000 6.462000 -17:31:35 362.039000 217053.545000 6.462000 -17:31:36 373.751000 217053.545000 6.462000 + TIME avg_delay/μs max_delay/μs max_proc_name min_delay/μs min_proc_name +22:06:02 642.770000 60711.755000 node 5.227000 cpu_watcher +22:06:03 510.041000 60711.755000 node 5.227000 cpu_watcher +22:06:04 491.107000 60711.755000 node 5.227000 cpu_watcher +22:06:05 468.128000 60711.755000 node 5.227000 cpu_watcher +22:06:06 454.244000 60711.755000 node 5.227000 cpu_watcher +22:06:07 472.455000 61931.163000 node 5.227000 cpu_watcher +22:06:08 441.756000 61931.163000 node 3.360000 cpu_watcher +22:06:09 442.631000 61931.163000 node 3.360000 cpu_watcher +22:06:10 407.389000 61931.163000 node 2.549000 cpu_watcher +22:06:11 426.593000 62247.982000 node 2.549000 cpu_watcher ``` 原理介绍: @@ -234,4 +252,4 @@ per_len = 1000 如果你也对cpu_watcher或ebpf感兴趣,欢迎加入我们一起开发cpu_watcher工具,希望我们可以共同成长。 -**cpu_watcher负责人:** albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com +**cpu_watcher负责人:** albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c index 8e3f85370..fe3832719 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c @@ -555,7 +555,7 @@ static int mq_event(void *ctx, void *data,unsigned long data_sz) }else{ delay = (e->rcv_exit_time - e->send_enter_time)/1000000.0 + send_delay + rcv_delay; } - printf("%02d:%02d:%02d %-8llu %-8lu %-8lu \t%-16ld %-16ld %-16ld %-16ld\t%-15.5f %-15.5f %-15.5f\n", + printf("%02d:%02d:%02d %-8u %-8u %-8u \t%-16llu %-16llu %-16llu %-16llu\t%-15.5f %-15.5f %-15.5f\n", localTime->tm_hour, localTime->tm_min, localTime->tm_sec, e->mqdes,e->send_pid,e->rcv_pid, e->send_enter_time,e->send_exit_time,e->rcv_enter_time,e->rcv_exit_time, diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/docs/image/image13.png b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/docs/image/image13.png new file mode 100644 index 000000000..566fd1c1e Binary files /dev/null and b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/docs/image/image13.png differ diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test/test_cpuwatcher.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test/test_cpuwatcher.c index 87fcf2431..0d5561ebd 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test/test_cpuwatcher.c +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test/test_cpuwatcher.c @@ -1,21 +1,3 @@ -// Copyright 2024 The LMP Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com -// -// process image of the user test program - #include #include #include @@ -49,19 +31,20 @@ static struct env { const char argp_program_doc[] ="To test cpu_watcher.\n"; static const struct argp_option opts[] = { - { "sar", 's', NULL, 0, "To test sar" }, - { "cs_delay", 'c', NULL, 0, "To test cs_delay" }, - { "sc_delay", 'S', NULL, 0, "To test sc_delay" }, - { "mq_delay", 'm', NULL, 0, "To test mq_delay" }, - { "preempt_delay", 'p', NULL, 0, "To test preempt_delay" }, - { "schedule_delay", 'd', NULL, 0, "To test schedule_delay"}, - { "all", 'a', NULL, 0, "To test all" }, - { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help" }, + { "sar", 's', NULL, 0, "To test sar", 0 }, + { "cs_delay", 'c', NULL, 0, "To test cs_delay", 0 }, + { "sc_delay", 'S', NULL, 0, "To test sc_delay", 0 }, + { "mq_delay", 'm', NULL, 0, "To test mq_delay", 0 }, + { "preempt_delay", 'p', NULL, 0, "To test preempt_delay", 0 }, + { "schedule_delay", 'd', NULL, 0, "To test schedule_delay", 0 }, + { "all", 'a', NULL, 0, "To test all", 0 }, + { NULL, 'h', NULL, OPTION_HIDDEN, "show the full help", 0 }, {}, }; static error_t parse_arg(int key, char *arg, struct argp_state *state) { + (void)arg; switch (key) { case 'a': env.sar_test = true; @@ -99,20 +82,48 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } +void *schedule_stress_test(void *arg) { + (void)arg; + while (1) { + sched_yield(); // 调度函数 + } + return NULL; +} + +void start_schedule_stress_test(int num_threads) { + pthread_t *threads = malloc(num_threads * sizeof(pthread_t)); + for (int i = 0; i < num_threads; i++) { + pthread_create(&threads[i], NULL, schedule_stress_test, NULL); + } + for (int i = 0; i < num_threads; i++) { + pthread_join(threads[i], NULL); + } + free(threads); +} + void *func(void *arg) { + (void)arg; int tpid; - tpid = gettid(); printf("新线程pid:%d,睡眠3s后退出\n",tpid); sleep(3); printf("新线程退出\n"); + return NULL; +} + +void input_pid() { + int stop; + int pid = getpid(); + printf("test_proc进程的PID:【%d】\n", pid); + printf("输入任意数字继续程序的运行:"); + scanf("%d", &stop); // 使用时将其取消注释 + printf("程序开始执行...\n"); + printf("\n"); } int main(int argc, char **argv){ - int pid,stop; int err; - pthread_t tid; static const struct argp argp = { .options = opts, .parser = parse_arg, @@ -123,27 +134,40 @@ int main(int argc, char **argv){ if (err) return err; - pid = getpid(); - printf("test_proc进程的PID:【%d】\n", pid); - printf("输入任意数字继续程序的运行:"); - scanf("%d",&stop); // 使用时将其取消注释 - printf("程序开始执行...\n"); - printf("\n"); - if(env.sar_test){ - /*sar的测试代码*/ + printf("SAR_TEST----------------------------------------------\n"); + //SAR功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程,持续15秒,观察加压前后的变化。 + char *argvv[] = { "/usr/bin/stress-ng", "--hdd", "4", "--hdd-opts", "wr-seq,rd-seq", "--io", "4", "--timeout", "15s", "--metrics-brief", NULL }; + char *envp[] = { "PATH=/bin", NULL }; + printf("SAR功能测试逻辑:系统上执行混合压力测试,包括4个顺序读写硬盘线程、4个IO操作线程和4个UDP网络操作线程,持续15秒,观察加压前后的变化\n"); + printf("执行指令 stress-ng --hdd 4 --hdd-opts wr-seq,rd-seq --io 4 --udp 4 --timeout 15s --metrics-brief\n"); + execve("/usr/bin/stress-ng", argvv, envp); + perror("execve"); + printf("\n"); } if(env.cs_delay_test){ - /*cs_delay的测试代码*/ + printf("CS_DELAY_TEST----------------------------------------------\n"); + //CS_DELAY功能测试逻辑:无限循环的线程函数,不断调用 sched_yield() 来放弃 CPU 使用权,模拟高调度负载。 + start_schedule_stress_test(10); // 创建10个线程进行调度压力测试 } if(env.sc_delay_test){ - /*sc_delay的测试代码*/ + printf("SC_DELAY_TEST----------------------------------------------\n"); + //SC_DELAY功能测试逻辑:创建多个系统调用,观察其变化 + const int num_iterations = 1000000; // 系统调用的迭代次数 + for (int i = 0; i < num_iterations; i++) { + getpid(); // 获取进程ID + getppid(); // 获取父进程ID + time(NULL); // 获取当前时间 + syscall(SYS_gettid); // 获取线程ID + } + printf("系统调用压力测试完成。\n"); } if(env.mq_delay_test){ /*mq_delay的测试代码*/ + input_pid(); // 在mq_delay_test中调用 system("./sender & ./receiver"); sleep(60); system("^Z"); @@ -154,7 +178,16 @@ int main(int argc, char **argv){ } if(env.schedule_test){ - /*schedule_delay的测试代码*/ + printf("SCHEDULE_TEST----------------------------------------------\n"); + // 调度延迟测试逻辑:创建线程执行 sysbench --threads=32 --time=10 cpu run,观察加压前后的变化 + char *argvv[] = { "/usr/bin/sysbench", "--threads=32", "--time=10", "cpu", "run", NULL }; + char *envp[] = { "PATH=/bin", NULL }; + printf("调度延迟测试逻辑:\n"); + printf("执行指令 sysbench --threads=32 --time=10 cpu run\n"); + execve("/usr/bin/sysbench", argvv, envp); + perror("execve"); + + printf("\n"); } return 0;