diff --git a/.github/workflows/ebpf_cpu_watcher.yml b/.github/workflows/ebpf_cpu_watcher.yml index 41364689f..84848c083 100644 --- a/.github/workflows/ebpf_cpu_watcher.yml +++ b/.github/workflows/ebpf_cpu_watcher.yml @@ -23,18 +23,23 @@ jobs: - name: Install dependencies run: | sudo apt update - sudo apt install libbpf-dev clang llvm libelf-dev libpcap-dev gcc-multilib build-essential + sudo apt install -y libbpf-dev clang llvm libelf-dev libpcap-dev gcc-multilib build-essential git submodule update --init --recursive - - name: Run cpu_watcher + - name: Build cpu_watcher run: | cd eBPF_Supermarket/CPU_Subsystem/cpu_watcher/ make - sudo ./cpu_watcher - - name: Run test_cpuwatcher - + - name: Run cpu_watcher + run: | + sudo ./eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher + + - name: Build test_cpuwatcher run: | cd eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test make - ./test_cpuwatcher + + - name: Run test_cpuwatcher + run: | + ./eBPF_Supermarket/CPU_Subsystem/cpu_watcher/test/test_cpuwatcher diff --git a/.github/workflows/ebpf_net_manager.yml b/.github/workflows/ebpf_net_manager.yml index 4881a21c7..a0364aef2 100644 --- a/.github/workflows/ebpf_net_manager.yml +++ b/.github/workflows/ebpf_net_manager.yml @@ -36,10 +36,15 @@ jobs: cd eBPF_Supermarket/Network_Subsystem/net_manager/ sudo ./configure sudo make + ifconfig # run - sudo timeout -s SIGINT 5 ./xdp_loader -d ens33 -S || if [[ $? != 124 && $? != 0 ]];then exit $?;fi - sudo ./xacladm load ens33 ./conf.d/mac_load.conf - sudo xdp-loader unload ens33 --all + cd testenv + sudo ./testenv.sh setup --name veth-basic02 + cd .. + cd net_manager + sudo timeout -s SIGINT 5 ./xdp_loader -d eth0 -S || if [[ $? != 124 && $? != 0 ]];then exit $?;fi + sudo ./xdp_loader -d eth0 -S + diff --git a/MagicEyes/src/visualization/vscode_ext/README.md b/MagicEyes/src/visualization/vscode_ext/README.md index 1f1a6c5e1..17cd1fc5f 100644 --- a/MagicEyes/src/visualization/vscode_ext/README.md +++ b/MagicEyes/src/visualization/vscode_ext/README.md @@ -4,7 +4,27 @@ ![](./images/lmp_vscode_ext.gif) -### 2. 相关提示 +### 2. 安装与使用 + +#### 2.1 导入插件 + +![import_vscode_ext](./images/import_vscode_ext.png) + +安装成功如下: + +![lmp_ext_install_success](./images/lmp_ext_install_success.png) + +#### 2.2 设置 + +- 启动grafana(可以在docker中启动),启动prometheus与BPF后端采集程序可以看到数据呈现 +- 设置IP地址与端口,默认端口是`localhost:3000` +- 设置token + +![create_token](./images/create_token.png) + +> [grafana官方_创建token](https://grafana.com/docs/grafana/latest/administration/service-accounts/#create-a-service-account-in-grafana) + +![set_token](./images/set_token.png) 设置可视化面板存放路径 @@ -20,7 +40,15 @@ ![](./images/error_info.png) -### 3. 开发注意事项 +### 3. 插件开发 + +#### 3.1 开发 + +安装yarn并且通过`yarn install`安装所需依赖 + +> tips: 按 F5 开启调试 + +#### 3.2 开发注意事项 1. yo code生成的框架,vscode最小版本是1.90,需要修改为1.74,不然我当前的版本。1.89无法运行插件 2. tsconfig diff --git a/MagicEyes/src/visualization/vscode_ext/images/create_token.png b/MagicEyes/src/visualization/vscode_ext/images/create_token.png new file mode 100644 index 000000000..382f04675 Binary files /dev/null and b/MagicEyes/src/visualization/vscode_ext/images/create_token.png differ diff --git a/MagicEyes/src/visualization/vscode_ext/images/import_vscode_ext.png b/MagicEyes/src/visualization/vscode_ext/images/import_vscode_ext.png new file mode 100644 index 000000000..6b780114a Binary files /dev/null and b/MagicEyes/src/visualization/vscode_ext/images/import_vscode_ext.png differ diff --git a/MagicEyes/src/visualization/vscode_ext/images/lmp_ext_install_success.png b/MagicEyes/src/visualization/vscode_ext/images/lmp_ext_install_success.png new file mode 100644 index 000000000..2f5198b70 Binary files /dev/null and b/MagicEyes/src/visualization/vscode_ext/images/lmp_ext_install_success.png differ diff --git a/MagicEyes/src/visualization/vscode_ext/images/set_token.png b/MagicEyes/src/visualization/vscode_ext/images/set_token.png new file mode 100644 index 000000000..06eefba18 Binary files /dev/null and b/MagicEyes/src/visualization/vscode_ext/images/set_token.png differ diff --git a/MagicEyes/src/visualization/vscode_ext/lmp_ext_vscode/src/extension.ts b/MagicEyes/src/visualization/vscode_ext/lmp_ext_vscode/src/extension.ts index 388c2b339..2b339d898 100644 --- a/MagicEyes/src/visualization/vscode_ext/lmp_ext_vscode/src/extension.ts +++ b/MagicEyes/src/visualization/vscode_ext/lmp_ext_vscode/src/extension.ts @@ -168,7 +168,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_cpu_watcher = { title: cpu_watcher_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现cpu_watcher的grafana的可视化面板", arguments: [ cpu_watcher_label ] @@ -182,7 +182,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_proc_image = { title: proc_iamge_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现proc_image的grafana的可视化面板", arguments: [ proc_iamge_label ] @@ -214,7 +214,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_net_manager = { title: net_manager_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现net_manager的grafana的可视化面板", arguments: [ net_manager_label ] @@ -231,7 +231,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_mem_watcher = { title: mem_watcher_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现mem_watcher的grafana的可视化面板", arguments: [ mem_watcher_label ] @@ -248,7 +248,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_stack_analyzer = { title: stack_analyzer_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现stack_analyzer的grafana的可视化面板", arguments: [ stack_analyzer_label ] @@ -265,7 +265,7 @@ export class TreeViewProvider implements TreeDataProvider { let command_kvm_watcher = { title: kvm_watcher_label, command: 'itemClick', - tooltip: "点击将呈现net_watcher的grafana的可视化面板", + tooltip: "点击将呈现kvm_watcher的grafana的可视化面板", arguments: [ kvm_watcher_label ] @@ -390,4 +390,4 @@ export class TreeViewProvider implements TreeDataProvider { } } - */ \ No newline at end of file + */ diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile index bcc7c1236..1e1d4eaab 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/Makefile @@ -42,7 +42,7 @@ INCLUDES := -I$(OUTPUT) -I../../../libbpf/include/uapi -I$(dir $(VMLINUX)) -I$(L CFLAGS := -g -Wall ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay +APPS =cs_delay sar sc_delay preempt schedule_delay mq_delay mutrace TARGETS=cpu_watcher CONTROLLER := controller diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c new file mode 100644 index 000000000..e2e24aa65 --- /dev/null +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/bpf/mutrace.bpf.c @@ -0,0 +1,172 @@ +// Copyright 2023 The LMP Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://github.com/linuxkerneltravel/lmp/blob/develop/LICENSE +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// author: albert_xuu@163.com zhangxy1016304@163.com zhangziheng0525@163.com + +#include +#include +#include +#include +#include "cpu_watcher.h" + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +BPF_HASH(mutex_info_map,u64,struct mutex_info, 1024); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} rb SEC(".maps"); + + +/*----------------------------------------------*/ +/* 内核态互斥锁 */ +/*----------------------------------------------*/ + +SEC("kprobe/mutex_lock") +int BPF_KPROBE(trace_mutex_lock, struct mutex *lock) { + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; // 保存锁获取时间 + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + return 0; +} + +SEC("kprobe/mutex_trylock") +int BPF_KPROBE(trace_mutex_trylock, struct mutex *lock) { + int ret = PT_REGS_RC(ctx); + if (ret == 0) { // 成功获取锁 + u64 lock_addr = (u64)lock; // 获取锁地址 + u64 ts = bpf_ktime_get_ns(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->acquire_time = ts; + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = 0, + .last_owner = 0, + .acquire_time = ts, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + } + return 0; +} + +SEC("kprobe/__mutex_lock_slowpath") +int BPF_KPROBE(trace_mutex_lock_slowpath, struct mutex *lock) { + struct mutex_contention_event *e; + struct task_struct *owner_task; + struct task_struct *contender_task; + pid_t pid = bpf_get_current_pid_tgid(); + long owner; + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + e = bpf_ringbuf_reserve(&rb, sizeof(*e), 0); + if (!e) { + return 0; + } + e->contender_pid = pid; + e->ptr = lock_addr; + bpf_get_current_comm(&e->contender_name, sizeof(e->contender_name)); + bpf_probe_read_kernel(&owner, sizeof(owner), &lock->owner); + owner_task = (struct task_struct *)(owner & ~0x1L); + contender_task = (struct task_struct *)bpf_get_current_task(); + bpf_probe_read_kernel(&e->contender_prio, sizeof(e->contender_prio), &contender_task->prio); + if (owner_task) { + bpf_probe_read_kernel(&e->owner_pid, sizeof(e->owner_pid), &owner_task->pid); + bpf_probe_read_kernel_str(&e->owner_name, sizeof(e->owner_name), owner_task->comm); + bpf_probe_read_kernel(&e->owner_prio, sizeof(e->owner_prio), &owner_task->prio); + } else { + e->owner_pid = 0; + __builtin_memset(e->owner_name, 0, sizeof(e->owner_name)); + } + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + info->contended_total += ts - info->acquire_time; + } else { + struct mutex_info new_info = { + .locked_total = 0, + .locked_max = 0, + .contended_total = ts, + .last_owner = 0, + .acquire_time = 0, + .ptr = lock_addr + }; + bpf_map_update_elem(&mutex_info_map, &lock_addr, &new_info, BPF_ANY); + } + bpf_ringbuf_submit(e, 0); + return 0; +} + +SEC("kprobe/mutex_unlock") +int BPF_KPROBE(trace_mutex_unlock, struct mutex *lock) { + u64 lock_addr = (u64)lock; + u64 ts = bpf_ktime_get_ns(); + pid_t pid = bpf_get_current_pid_tgid(); + struct mutex_info *info = bpf_map_lookup_elem(&mutex_info_map, &lock_addr); + if (info) { + u64 held_time = ts - info->acquire_time; // 计算锁被持有的时间 + info->locked_total += held_time; // 更新锁被持有的总时间 + if (held_time > info->locked_max) { + info->locked_max = held_time; // 更新锁被持有的最长时间 + } + info->last_owner = pid; // 更新最后一次持有该锁的线程ID + } + return 0; +} + +/*----------------------------------------------*/ +/* 用户态互斥锁 */ +/*----------------------------------------------*/ + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_lock_init, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_lock,pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_try, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_unlock, pthread_mutex_t *mutex){ + +// } + +// SEC("uprobe") +// int BPF_KPROBE(pthread_mutex_destroy, pthread_mutex_t *mutex){ + +// } \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c index 0cdf445f5..36a533d09 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/cpu_watcher.c @@ -35,11 +35,13 @@ #include "preempt.skel.h" #include "schedule_delay.skel.h" #include "mq_delay.skel.h" +#include "mutrace.skel.h" typedef long long unsigned int u64; typedef unsigned int u32; + struct list_head { struct list_head *next; struct list_head *prev; @@ -65,6 +67,7 @@ static struct env { int freq; bool EWMA; int cycle; + int MUTRACE; } env = { .time = 0, .period = 1, @@ -78,6 +81,7 @@ static struct env { .freq = 99, .EWMA = false, .cycle = 0, + .MUTRACE = false, }; @@ -88,6 +92,7 @@ struct sc_delay_bpf *sc_skel; struct preempt_bpf *preempt_skel; struct schedule_delay_bpf *sd_skel; struct mq_delay_bpf *mq_skel; +struct mutrace_bpf *mu_skel; static int csmap_fd; static int sarmap_fd; @@ -132,6 +137,7 @@ static const struct argp_option opts[] = { {"preempt_time", 'p', 0, 0, "Print preempt_time (the data of preempt_schedule)" }, {"schedule_delay", 'd', 0, 0, "Print schedule_delay (the data of cpu)" }, {"mq_delay", 'm', 0, 0, "Print mq_delay(the data of proc)" }, + {"mutrace", 'x', 0, 0, "Print mutrace data(the data of cpu)" }, {"ewma", 'E',0,0,"dynamic filte the data"}, {"cycle", 'T',"CYCLE",0,"Periods of the ewma"}, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, @@ -166,6 +172,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'm': env.MQ_DELAY = true; break; + case 'x': + env.MUTRACE = true; + break; case 'E': env.EWMA = true; break; @@ -547,38 +556,23 @@ static int preempt_print(void *ctx, void *data, unsigned long data_sz) return 0; } -// 定义一个结构来存储已输出的条目 -struct output_entry { - int pid; - char comm[16]; - long long delay; -}; -// 定义一个数组来存储已输出的条目 -struct output_entry seen_entries[MAX_ENTRIES]; -int seen_count = 0; - -// 检查条目是否已存在 -bool entry_exists(int pid, const char *comm, long long delay) { - for (int i = 0; i < seen_count; i++) { - if (seen_entries[i].pid == pid && - strcmp(seen_entries[i].comm, comm) == 0 && - seen_entries[i].delay == delay) { - return true; - } - } - return false; -} -// 添加条目到已输出的条目列表 -void add_entry(int pid, const char *comm, long long delay) { - if (seen_count < MAX_ENTRIES) { - seen_entries[seen_count].pid = pid; - strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm)); - seen_entries[seen_count].delay = delay; - seen_count++; +//mutrace输出 +static int mutrace_print(void *ctx, void *data, unsigned long data_sz) { + const struct mutex_contention_event *e = data; + if (e->owner_pid == 0 || e->contender_pid == 0||e->owner_pid == 1) { + return 0; } + // 增加锁争用次数 + increment_lock_count(e->ptr); + uint64_t contention_count = get_lock_count(e->ptr); + printf("%15llu %15d %15s %15d %15d %15s %15d %15ld\n", e->ptr, e->owner_pid, e->owner_name, e->owner_prio,e->contender_pid, e->contender_name, e->contender_prio,contention_count); + return 0; } + + + static int schedule_print() { int err,key = 0; @@ -961,6 +955,40 @@ int main(int argc, char **argv) fprintf(stderr, "Failed to create ring buffer\n"); goto mq_delay_cleanup; } + }else if (env.MUTRACE) { + mu_skel = mutrace_bpf__open(); + if (!mu_skel) { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + err = mutrace_bpf__load(mu_skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + goto mutrace_cleanup; + } + //ctrl + if(err < 0){ + fprintf(stderr, "Failed to update elem\n"); + goto mutrace_cleanup; + } + err = mutrace_bpf__attach(mu_skel); + if (err) { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto mutrace_cleanup; + } + + rb = ring_buffer__new(bpf_map__fd(mu_skel->maps.rb), mutrace_print, NULL, NULL); + printf("%s\n"," lock_ptr owner_pid owner_comm owner_prio contender_pid contender_comm contender_prio contender_count"); + if (!rb) { + err = -1; + fprintf(stderr, "Failed to create ring buffer\n"); + goto mutrace_cleanup; + } } while (!exiting) { if(env.SAR){ @@ -1053,6 +1081,17 @@ int main(int argc, char **argv) break; } } + else if (env.MUTRACE) { + err = ring_buffer__poll(rb, 100 /* timeout, ms */); + if (err == -EINTR) { + err = 0; + break; + } + if (err < 0) { + printf("Error polling perf buffer: %d\n", err); + break; + } + } else { printf("正在开发中......\n-c 打印cs_delay:\t对内核函数schedule()的执行时长进行测试;\n-s sar工具;\n-y 打印sc_delay:\t系统调用运行延迟进行检测; \n-p 打印preempt_time:\t对抢占调度时间输出;\n"); break; @@ -1092,4 +1131,9 @@ int main(int argc, char **argv) ring_buffer__free(rb); mq_delay_bpf__destroy(mq_skel); return err < 0 ? -err : 0; + +mutrace_cleanup: + ring_buffer__free(rb); + mutrace_bpf__destroy(mu_skel); + return err < 0 ? -err : 0; } \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h index eadd0d874..3425e8975 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher.h @@ -155,6 +155,27 @@ struct proc_history { struct proc_info last[2]; // 存储最后两个调度的进程信息 }; +/*----------------------------------------------*/ +/* mutrace相关结构体 */ +/*----------------------------------------------*/ +struct mutex_info { + u64 locked_total;//锁被持有的总时间 + u64 locked_max;//锁被持有的最长时间 + u64 contended_total;//锁发生竞争的总时间 + pid_t last_owner;//最后一次持有该锁的线程 ID + u64 acquire_time; // 锁每次被获取的时间戳,方便后续计算 + u64 ptr;//地址 +}; + +struct mutex_contention_event { + u64 ptr;//锁地址 + pid_t owner_pid;//持有者pid + pid_t contender_pid;//抢占者pid + char contender_name[TASK_COMM_LEN]; + char owner_name[TASK_COMM_LEN]; + int owner_prio; + int contender_prio; +}; /*----------------------------------------------*/ /* mq_delay相关结构体 */ diff --git a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h index 5250aef89..dc78f2622 100644 --- a/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h +++ b/eBPF_Supermarket/CPU_Subsystem/cpu_watcher/include/cpu_watcher_helper.h @@ -25,6 +25,7 @@ #define PREEMPT_WACTHER 40 #define SCHEDULE_WACTHER 50 #define MQ_WACTHER 60 +#define HASH_SIZE 1024 /*----------------------------------------------*/ /* ewma算法 */ @@ -64,7 +65,9 @@ bool dynamic_filter(struct ewma_info *ewma_syscall_delay, double dataPoint) { return 0; } - +/*----------------------------------------------*/ +/* bpf file system */ +/*----------------------------------------------*/ const char *sar_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sar_ctrl_map"; const char *cs_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/cs_ctrl_map"; const char *sc_ctrl_path = "/sys/fs/bpf/cpu_watcher_map/sc_ctrl_map"; @@ -199,4 +202,81 @@ int update_mq_ctrl_map(struct mq_ctrl mq_ctrl){ return 0; } + +/*----------------------------------------------*/ +/* mutex_count */ +/*----------------------------------------------*/ + +typedef struct { + uint64_t ptr; + uint64_t count; +} lock_count_t; + +lock_count_t lock_counts[HASH_SIZE]; + +static uint64_t hash(uint64_t ptr) { + return ptr % HASH_SIZE; +} + +static void increment_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + lock_counts[h].ptr = ptr; + lock_counts[h].count = 1; + } else { + lock_counts[h].count++; + } +} + +static uint64_t get_lock_count(uint64_t ptr) { + uint64_t h = hash(ptr); + while (lock_counts[h].ptr != 0 && lock_counts[h].ptr != ptr) { + h = (h + 1) % HASH_SIZE; + } + if (lock_counts[h].ptr == 0) { + return 0; + } else { + return lock_counts[h].count; + } +} + +/*----------------------------------------------*/ +/* hash */ +/*----------------------------------------------*/ + + +struct output_entry { + int pid; + char comm[16]; + long long delay; +}; + + +struct output_entry seen_entries[MAX_ENTRIES]; +int seen_count = 0; + + +bool entry_exists(int pid, const char *comm, long long delay) { + for (int i = 0; i < seen_count; i++) { + if (seen_entries[i].pid == pid && + strcmp(seen_entries[i].comm, comm) == 0 && + seen_entries[i].delay == delay) { + return true; + } + } + return false; +} + + +void add_entry(int pid, const char *comm, long long delay) { + if (seen_count < MAX_ENTRIES) { + seen_entries[seen_count].pid = pid; + strncpy(seen_entries[seen_count].comm, comm, sizeof(seen_entries[seen_count].comm)); + seen_entries[seen_count].delay = delay; + seen_count++; + } +} #endif // CPU_WATCHER_HELPER_H \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_output.png b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_output.png new file mode 100644 index 000000000..1cfd38889 Binary files /dev/null and b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_output.png differ diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_process.png b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_process.png new file mode 100644 index 000000000..9d989c8ea Binary files /dev/null and b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/images/schedule_delay_process.png differ diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_delay_usemethod.md b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_delay_usemethod.md new file mode 100644 index 000000000..84cee2c22 --- /dev/null +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/docs/schedule_delay_usemethod.md @@ -0,0 +1,152 @@ +# Schedule_delay工具使用说明 + +​ Schedule_delay工具是用来检测系统中调度延时的工具,该工具可以监测特定线程或线程组、以及整个系统的调度延迟(最大、最小、平均)。即从一个任务具备运行的条件,到真正执行(获得 CPU 的执行权)的这段时间。实时观测该指标可以帮助我们了解到当前操作系统的负载。 + +## 原理讲解 + +​ 该工具使用六个map来控制工具的行为和存储数据与用户态交互。分别为: + +- **ARRAY MAP**`sched_ctrl_map`:作为调度控制信息的存储,策略上将该map pin到bpf文件系统,即可在用户态更新该map,与内核态交互。其value为`struct sched_ctrl`: + +```c +struct sched_ctrl { + bool sched_func; // 控制是否记录调度相关事件 + pid_t target_pid; // 目标进程的 PID + int target_cpu_id; // 目标 CPU 的 ID + pid_t target_tgid; // 目标线程组的 TGID +}; +``` + +​ 通过这些控制信息,可以控制该工具在内核态的行为。 + +- **HASH MAP**`enable add`:用来记录进程是否被调度过,以避免重复计数或错误计数。 + +- **HASH MAP**`proc_schedule`:用于存储和跟踪每个进程的调度事件信息,key为进程的id和cpu_id,value为` struct schedule_event`: + +```c +struct schedule_event { + pid_t pid; // 进程ID + int tgid; // 线程组ID + int prio; // 进程优先级 + u32 count; // 调度次数 + u64 enter_time; // 进入调度的时间 + u64 sum_delay; // 累计调度延迟 + u64 max_delay; // 最大调度延迟 + u64 min_delay; // 最小调度延迟 +}; +``` + +- **HASH MAP**`target_schedule`:用于存储指定**线程**的调度信息,value为` struct schedule_event`,在用户态通过更新`sched_ctrl_map`指定要记录的线程。 +- **HASH MAP**`tg_schedule`:用于存储指定**线程组**的调度信息,value为` struct schedule_event`,在用户态通过更新`sched_ctrl_map`指定要记录的线程组。 + +- **ARRAY MAP**`sys_schedule`:用于记录整个系调度延时,value为`struct sum_schedule:` + +```c +struct sum_schedule { + long long unsigned int sum_count; // 总调度次数 + long long unsigned int sum_delay; // 总调度延迟 + long long unsigned int max_delay; // 最大调度延迟 + long long unsigned int min_delay; // 最小调度延迟 +}; +``` + +**代码逻辑图如下:** + +![](images/schedule_delay_process.png) + +## 2.使用方法 + +### 2.1编译 + +​ 首先在`lmp/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image`目录下进行编译操作。 + +```shell +make +``` + +​ 编译成功后,会生成两个可执行文件`proc_image`,`controller`。 + +### 2.2挂载 + +​ 在`lmp/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image`目录下运行 `proc_image`可执行文件。 + +​ 使用-S选项挂载检测调度延迟的几个挂载点: + +```shell +sudo ./proc_image -S +``` + +``` +sudo ./proc_image -S +...... +libbpf: map 'sched_ctrl_map': created successfully, fd=3 +libbpf: map 'proc_schedule': created successfully, fd=4 +libbpf: map 'enable_add': created successfully, fd=5 +libbpf: map 'target_schedule': created successfully, fd=6 +libbpf: map 'tg_schedule': created successfully, fd=7 +libbpf: map 'sys_schedule': created successfully, fd=8 +libbpf: map 'schedule.rodata': created successfully, fd=9 +libbpf: pinned map '/sys/fs/bpf/proc_image_map/sched_ctrl_map' +``` + +​ 此时用户态内核态交互的**sched_ctrl_map**已经pin上,现在我们可以通过controller执行文件来更改输出策略。 + +### 2.3用户态控制交互 + +​ 可使用`controller`工具和内核态进行交互,控制输出。 + +​ 重启一个终端, 在`lmp/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image`目录下运行 `controller`可执行文件。 + +​ 使用schedule_image工具的不同参数,控制该工具的使用策略: + +| 参数 | | +| :--: | ---------------------- | +| -S | 使用schedule_image工具 | +| -a | 激活schedule_image工具 | +| -p | 指定目标线程 | +| -P | 指定目标线程组 | +| -c | 指定检测CPU | +| -t | 指定检测时间 | + +通过以下指令更改控制策略: + +- 激活对线程21416的调度延迟进行监测: + + ``` + sudo ./controller -S -a -p 21416 + ``` + +- 激活对线程组21416的调度延迟进行监测: + + ``` + sudo ./controller -s -a -p 21416 + ``` + +- 关闭对线程21416的调度延迟进行监测: + + ``` + sudo ./controller -S -d -p 21416 + ``` + +- 关闭对线程组21416的调度延迟进行监测: + + ``` + sudo ./controller -S -d -P 21416 + ``` + +- 检测整个系统的调度延迟: + + ``` + sudo ./controller -S -a + ``` + +- 关闭进程画像工具: + + ``` + sudo ./controller -f + ``` + +#### 2.4 输出 + +​ 对线程21416的调度延迟进行监测,可以看到指定进程的调度延迟信息,包括该线程最大、最小、平均的调度延迟(P_xxx_DELAY),以及整个系统的最大、最小、平均的调度延迟(S_xxx_DELAY)。 +![](images/schedule_delay_output.png) \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.bpf.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.bpf.c index 886481e22..bff87d0ac 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.bpf.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.bpf.c @@ -25,55 +25,91 @@ struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(pid_t)); __uint(value_size, sizeof(struct migrate_event)); - __uint(max_entries, 128); + __uint(max_entries, 1024); } migrate SEC(".maps"); + struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); - __uint(max_entries, 16); -} t SEC(".maps"); + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct minfo_key)); + __uint(value_size, sizeof(struct per_migrate)); + __uint(max_entries, 1024); +} migrate_info SEC(".maps"); SEC("tracepoint/sched/sched_migrate_task") int tracepoint_sched_migrate_task(struct trace_event_raw_sched_migrate_task *args){ u64 time = bpf_ktime_get_ns();//当前转移时间点; pid_t pid = args->pid; struct migrate_event *migrate_event; + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + struct rq *orig_rq = BPF_CORE_READ(task,se.cfs_rq,rq); + struct cfs_rq *orig_cfs = BPF_CORE_READ(task,se.cfs_rq); + + bpf_printk("[se]:Pload_avg:%llu\tPutil_avg:%llu\n",BPF_CORE_READ(task,se.avg.load_avg),BPF_CORE_READ(task,se.avg.util_avg)); + bpf_printk("[rq]: nr_running :%d cpu_capacity : %ld cpu_capacity_orig : %ld\n", + BPF_CORE_READ(orig_rq,cpu),BPF_CORE_READ(orig_rq,nr_running), + BPF_CORE_READ(orig_rq,cpu_capacity),BPF_CORE_READ(orig_rq,cpu_capacity_orig)); + bpf_printk("Cload_avg:%ld\n",BPF_CORE_READ(orig_cfs,avg.runnable_avg)); migrate_event = bpf_map_lookup_elem(&migrate,&pid); if(!migrate_event){ - int key = 0,*count=bpf_map_lookup_elem(&t,&key); - if(!count){ - int init = 1; - bpf_map_update_elem(&t,&key,&init,BPF_ANY); - } - else *count +=1; - struct migrate_event migrate_event = {}; + struct per_migrate per_migrate = {}; + struct minfo_key mkey = {}; + mkey.pid = pid; + mkey.count = 1; migrate_event.pid = pid; migrate_event.prio = args->prio; - migrate_event.migrate_info[0].time = time; - migrate_event.migrate_info[0].orig_cpu = args->orig_cpu; - migrate_event.migrate_info[0].dest_cpu = args->dest_cpu; migrate_event.count = 1; + migrate_event.rear = 1; + per_migrate.time = time; + per_migrate.orig_cpu = args->orig_cpu; + per_migrate.dest_cpu = args->dest_cpu; + + per_migrate.cpu_capacity = BPF_CORE_READ(orig_rq,cpu_capacity); + per_migrate.cpu_capacity_orig = BPF_CORE_READ(orig_rq,cpu_capacity_orig); + per_migrate.cpu_load_avg = BPF_CORE_READ(orig_cfs,avg.runnable_avg); + + + per_migrate.pload_avg = BPF_CORE_READ(task,se.avg.load_avg);//进程的量化负载; + per_migrate.putil_avg = BPF_CORE_READ(task,se.avg.util_avg);//进程的实际算力; + per_migrate.mem_usage = BPF_CORE_READ(task,mm,total_vm) << PAGE_SHIFT; + + + per_migrate.read_bytes = BPF_CORE_READ(task,ioac.read_bytes); + per_migrate.write_bytes = BPF_CORE_READ(task,ioac.write_bytes); + + per_migrate.context_switches = BPF_CORE_READ(task,nvcsw) + BPF_CORE_READ(task,nivcsw); + // per_migrate.runtime = BPF_CORE_READ(task,se.sum_exec_runtime); + bpf_map_update_elem(&migrate_info, &mkey, &per_migrate, BPF_ANY); bpf_map_update_elem(&migrate, &pid, &migrate_event, BPF_ANY); } /*&& (migrate_event->migrate_info + migrate_event->count) < (migrate_event->migrate_info + MAX_MIGRATE)*/ - else if(migrate_event->count>0 && migrate_event->countmigrate_info + migrate_event->count) < (migrate_event->migrate_info + MAX_MIGRATE) ) + else if(migrate_event->count>0 && migrate_event->countmigrate_info[migrate_event->count].time = time; - migrate_event->migrate_info[migrate_event->count].orig_cpu = args->orig_cpu; - migrate_event->migrate_info[migrate_event->count++].dest_cpu = args->dest_cpu; - } - else if(migrate_event->count>=MAX_MIGRATE) - { - migrate_event->migrate_info[migrate_event->count % MAX_MIGRATE].time = time; - migrate_event->migrate_info[migrate_event->count % MAX_MIGRATE].orig_cpu = args->orig_cpu; - migrate_event->migrate_info[migrate_event->count % MAX_MIGRATE].dest_cpu = args->dest_cpu; + struct per_migrate per_migrate = {}; + struct minfo_key mkey = {}; migrate_event->count++; - migrate_event->rear ++; - } + mkey.pid = pid; + mkey.count = migrate_event->count; + per_migrate.time = time; + per_migrate.orig_cpu = args->orig_cpu; + per_migrate.dest_cpu = args->dest_cpu; + + per_migrate.cpu_capacity = BPF_CORE_READ(orig_rq,cpu_capacity); + per_migrate.cpu_capacity_orig = BPF_CORE_READ(orig_rq,cpu_capacity_orig); + per_migrate.cpu_load_avg = BPF_CORE_READ(orig_cfs,avg.runnable_avg); + + per_migrate.pload_avg = BPF_CORE_READ(task,se.avg.load_avg);//进程的量化负载; + per_migrate.putil_avg = BPF_CORE_READ(task,se.avg.util_avg);//进程的实际算力; + per_migrate.mem_usage = BPF_CORE_READ(task,mm,total_vm) << PAGE_SHIFT; - //bpf_printk("Time:%llu\tpid:%d\tcomm:%s\tprio:%d\torig_cpu:%d\tdest_cpu:%d\t\n",time,args->pid,args->comm,args->prio,args->orig_cpu,args->dest_cpu); + + per_migrate.read_bytes = BPF_CORE_READ(task,ioac.read_bytes); + per_migrate.write_bytes = BPF_CORE_READ(task,ioac.write_bytes); + + per_migrate.context_switches = BPF_CORE_READ(task,nvcsw) + BPF_CORE_READ(task,nivcsw); + // per_migrate.runtime = BPF_CORE_READ(task,se.sum_exec_runtime); + + bpf_map_update_elem(&migrate_info, &mkey, &per_migrate, BPF_ANY); + } return 0; } \ No newline at end of file diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.c b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.c index ae04ea7e7..af2958210 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.c +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.c @@ -126,45 +126,53 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va static int migrate_print(){ time_t now = time(NULL);// 获取当前时间 struct tm *localTime = localtime(&now);// 将时间转换为本地时间结构 - printf("Time: %02d:%02d:%02d\n",localTime->tm_hour, localTime->tm_min, localTime->tm_sec); - printf("---------------------------------------------------------------------------------\n"); - int err,migrate_fd =bpf_map__fd(skel->maps.migrate),t_fd =bpf_map__fd(skel->maps.t); - int key =0,count; - err = bpf_map_lookup_elem(t_fd,&key,&count); - if (err < 0) { - fprintf(stderr, "failed to lookup infos: %d\n", err); - return -1; - } - printf("total process %d\n",count); - count = 0; - bpf_map_update_elem(t_fd,&key,&count,BPF_ANY); + printf("\nTime: %02d:%02d:%02d\n",localTime->tm_hour, localTime->tm_min, localTime->tm_sec); + printf("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"); + int err,migrate_fd =bpf_map__fd(skel->maps.migrate),migrate_info_fd =bpf_map__fd(skel->maps.migrate_info); pid_t lookup_key = -1 ,next_key; struct migrate_event migrate_event; while(!bpf_map_get_next_key(migrate_fd, &lookup_key, &next_key)){//遍历打印hash map err = bpf_map_lookup_elem(migrate_fd,&next_key,&migrate_event); if (err < 0) { - fprintf(stderr, "failed to lookup infos: %d\n", err); + fprintf(stderr, "failed to lookup infos2: %d\n", err); return -1; } - if(migrate_event.count == migrate_event.rear) { + if(migrate_event.count <= migrate_event.rear) { lookup_key = next_key; continue; } u64 last_time_stamp = 0; printf("\npid:%d\tprio:%d\tcount:%d\trear:%d\n",migrate_event.pid,migrate_event.prio,migrate_event.count,migrate_event.rear); - for(int i=migrate_event.rear;i%d\t", - migrate_event.migrate_info[i%MAX_MIGRATE].time,migrate_event.migrate_info[i%MAX_MIGRATE].orig_cpu,migrate_event.migrate_info[i%MAX_MIGRATE].dest_cpu); + printf("---------------------------------------------------------------------------------\n"); + for(int i=migrate_event.rear;i<=migrate_event.count;i++){ + struct per_migrate migrate_info; + struct minfo_key mkey; + mkey.pid = migrate_event.pid; + mkey.count = i; + err = bpf_map_lookup_elem(migrate_info_fd,&mkey,&migrate_info); + if (err < 0) { + fprintf(stderr, "failed to lookup infos err %d mkey_pid: %d mkey_count: %d\n", err,mkey.pid,i); + continue; + } + printf("time_stamp:%llu\t%d->%d \t PROC_LOAD:%llu \t PROC_UTIL:%llu\t", + migrate_info.time,migrate_info.orig_cpu,migrate_info.dest_cpu,migrate_info.pload_avg,migrate_info.putil_avg); + printf("CPU_LOAD: %ld \t Cpu_Capacity:[%ld:%ld] \t ",migrate_info.cpu_load_avg,migrate_info.cpu_capacity,migrate_info.cpu_capacity_orig); + printf("mmem_usage:%llu kb \t\t read:%llu kb \t\t wite:%llu kb \t\t context_switch:%llu\t", + migrate_info.mem_usage/1024,migrate_info.read_bytes/1024,migrate_info.write_bytes/1024, + migrate_info.context_switches); + if(i==migrate_event.rear && last_time_stamp == 0) { - last_time_stamp = migrate_event.migrate_info[i%MAX_MIGRATE].time; + last_time_stamp = migrate_info.time; printf("delay: /\n"); }else{ - printf("delay: %d us\n",(migrate_event.migrate_info[i%MAX_MIGRATE].time - last_time_stamp)/1000); - last_time_stamp = migrate_event.migrate_info[i%MAX_MIGRATE].time; + printf("delay: %d us\n",(migrate_info.time - last_time_stamp)/1000); + last_time_stamp = migrate_info.time; } + bpf_map_delete_elem(migrate_info_fd,&mkey);//删除已经打印了的数据 + } - migrate_event.rear = migrate_event.count; + migrate_event.rear = migrate_event.count + 1; bpf_map_update_elem(migrate_fd,&next_key,&migrate_event,BPF_ANY); lookup_key = next_key; } @@ -235,7 +243,7 @@ int main(int argc, char **argv) break; } if (err < 0) { - printf("Error polling perf buffer: %d\n", err); + printf("Error: %d\n", err); break; } } diff --git a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.h b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.h index 0bb280c34..685cf0374 100644 --- a/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.h +++ b/eBPF_Supermarket/CPU_Subsystem/eBPF_proc_image/tools/migrate_image.h @@ -6,12 +6,34 @@ typedef unsigned int u32; /*----------------------------------------------*/ /* migrate_event结构体 */ /*----------------------------------------------*/ -#define MAX_MIGRATE 16 +#define MAX_MIGRATE 1024 +#define PAGE_SHIFT 13 // #define ARRY_OVERFLOW -1 +struct minfo_key{ + pid_t pid; + int count; +}; struct per_migrate{//每次迁移,记录该次迁移信息; u64 time; u32 orig_cpu; u32 dest_cpu; + + u64 orig_cpu_load;//cfs->avg.runnale_avg 就绪队列所有调度实体;量化负载总和 + u64 dest_cpu_load; + int cpu_capacity;//计算机算力 + int cpu_capacity_orig;//额定算力 + u64 cpu_load_avg; + u64 pload_avg; + u64 putil_avg; + + int on_cpu; + u64 mem_usage; + u64 read_bytes; + u64 write_bytes; + // u64 syscr; + // u64 syscw; + u64 context_switches; + u64 runtime; }; //每个进程的迁移信息; struct migrate_event{ @@ -19,5 +41,5 @@ struct migrate_event{ pid_t pid; int prio; int count,rear;//迁移频率 - struct per_migrate migrate_info[MAX_MIGRATE];//该进程每次迁移信息; + //struct per_migrate *migrate_info;//该进程每次迁移信息; }; diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/Makefile b/eBPF_Supermarket/Network_Subsystem/net_manager/Makefile index 7bbe2ad6b..3f0306b8d 100644 --- a/eBPF_Supermarket/Network_Subsystem/net_manager/Makefile +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/Makefile @@ -12,7 +12,7 @@ MAKEFLAGS += --no-print-directory -s Q = @ endif -PROJ := xacl_ip router xacl_mac xstate +PROJ := xacl_ip router xacl_mac xstate net_manager diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_defines.h b/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_defines.h index 167822a1f..12c822f8e 100644 --- a/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_defines.h +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_defines.h @@ -27,6 +27,13 @@ struct config { int xsk_if_queue; bool xsk_poll_mode; bool unload_all; + bool show_stats; // 数据统计 + bool ip_filter; //ip过滤 + bool mac_filter; //mac过滤 + bool router; //路由 + bool state; //会话保持 + bool clear; //清理 + }; /* Defined in common_params.o */ diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_params.c b/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_params.c index 44d07f39e..e0fc2ca8b 100644 --- a/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_params.c +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/common/common_params.c @@ -16,66 +16,124 @@ int verbose = 1; #define BUFSIZE 30 +/** + * @brief 打印选项的帮助信息 + * + * @param long_options 包含所有长选项的结构体数组 + * @param required 标志位,用于指示是否打印必需的选项 + */ void _print_options(const struct option_wrapper *long_options, bool required) { - int i, pos; - char buf[BUFSIZE]; - - for (i = 0; long_options[i].option.name != 0; i++) { - if (long_options[i].required != required) - continue; - - if (long_options[i].option.val > 64) /* ord('A') = 65 */ - printf(" -%c,", long_options[i].option.val); - else - printf(" "); - pos = snprintf(buf, BUFSIZE, " --%s", long_options[i].option.name); - if (long_options[i].metavar) - snprintf(&buf[pos], BUFSIZE-pos, " %s", long_options[i].metavar); - printf("%-22s", buf); - printf(" %s", long_options[i].help); - printf("\n"); - } + int i, pos; + char buf[BUFSIZE]; + + // 遍历所有的长选项 + for (i = 0; long_options[i].option.name != 0; i++) { + // 如果选项的必需性与参数不符,则跳过 + if (long_options[i].required != required) + continue; + + // 如果选项的短名称为大写字母,打印其短名称 + if (long_options[i].option.val > 64) /* ord('A') = 65 */ + printf(" -%c,", long_options[i].option.val); + else + // 否则不打印短名称,保留空白对齐 + printf(" "); + + // 将选项的长名称及其元变量(如果有)格式化到 buf 中 + pos = snprintf(buf, BUFSIZE, " --%s", long_options[i].option.name); + if (long_options[i].metavar) + snprintf(&buf[pos], BUFSIZE-pos, " %s", long_options[i].metavar); + + // 打印格式化后的选项名称和帮助信息 + printf("%-22s", buf); + printf(" %s", long_options[i].help); + printf("\n"); + } } + +/** + * @brief 打印程序的用法信息 + * + * @param prog_name 程序名 + * @param doc 程序的文档说明 + * @param long_options 包含所有长选项的结构体数组 + * @param full 标志位,是否打印完整的帮助信息 + */ void usage(const char *prog_name, const char *doc, const struct option_wrapper *long_options, bool full) { - printf("Usage: %s [options]\n", prog_name); + // 打印用法信息的基本格式 + printf("Usage: %s [options]\n", prog_name); - if (!full) { - printf("Use --help (or -h) to see full option list.\n"); - return; - } + // 如果不需要打印完整的帮助信息 + if (!full) { + // 提示用户使用 --help 或 -h 查看完整的选项列表 + printf("Use --help (or -h) to see full option list.\n"); + return; + } + + // 打印文档说明 + printf("\nDOCUMENTATION:\n %s\n", doc); - printf("\nDOCUMENTATION:\n %s\n", doc); - printf("Required options:\n"); - _print_options(long_options, true); - printf("\n"); - printf("Other options:\n"); - _print_options(long_options, false); - printf("\n"); + // 打印必需选项 + printf("Required options:\n"); + _print_options(long_options, true); + printf("\n"); + + // 打印其他选项 + printf("Other options:\n"); + _print_options(long_options, false); + printf("\n"); } + +/** + * @brief 将 option_wrapper 结构体数组转换为标准的 option 结构体数组 + * + * @param wrapper 包含所有长选项的结构体数组 + * @param options 输出参数,用于存储转换后的 option 结构体数组 + * @return int 成功返回0,失败返回-1 + */ int option_wrappers_to_options(const struct option_wrapper *wrapper, struct option **options) { int i, num; struct option *new_options; + + // 计算 wrapper 数组中的选项数量 for (i = 0; wrapper[i].option.name != 0; i++) {} num = i; + // 分配新的 option 数组内存 new_options = malloc(sizeof(struct option) * num); if (!new_options) + // 如果内存分配失败,返回 -1 return -1; + + // 将 wrapper 数组中的每个 option 复制到新的 option 数组中 for (i = 0; i < num; i++) { memcpy(&new_options[i], &wrapper[i], sizeof(struct option)); } + // 将新分配并填充的 option 数组赋值给输出参数 *options *options = new_options; + + // 成功返回 0 return 0; } + +/** + * @brief 解析命令行参数 + * + * @param argc 参数个数 + * @param argv 参数值数组 + * @param options_wrapper 包含所有长选项的结构体数组 + * @param cfg 配置结构体,用于存储解析结果 + * @param doc 程序的文档说明 + */ void parse_cmdline_args(int argc, char **argv, const struct option_wrapper *options_wrapper, struct config *cfg, const char *doc) @@ -86,22 +144,26 @@ void parse_cmdline_args(int argc, char **argv, char *dest; int opt; + // 将 option_wrapper 结构体数组转换为标准的 option 结构体数组 if (option_wrappers_to_options(options_wrapper, &long_options)) { fprintf(stderr, "Unable to malloc()\n"); exit(EXIT_FAIL_OPTION); } - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hd:r:L:R:ASNFU:MQ:czpq", + /* 解析命令行参数 */ + while ((opt = getopt_long(argc, argv, "hd:r:L:R:ASNFU:MQ:czpq:i:m:k:g:n:t", long_options, &longindex)) != -1) { switch (opt) { case 'd': + // 检查设备名称长度是否超出限制 if (strlen(optarg) >= IF_NAMESIZE) { fprintf(stderr, "ERR: --dev name too long\n"); goto error; } + // 设置设备名称 cfg->ifname = (char *)&cfg->ifname_buf; strncpy(cfg->ifname, optarg, IF_NAMESIZE); + // 获取设备索引 cfg->ifindex = if_nametoindex(cfg->ifname); if (cfg->ifindex == 0) { fprintf(stderr, @@ -111,12 +173,15 @@ void parse_cmdline_args(int argc, char **argv, } break; case 'r': + // 检查重定向设备名称长度是否超出限制 if (strlen(optarg) >= IF_NAMESIZE) { fprintf(stderr, "ERR: --redirect-dev name too long\n"); goto error; } + // 设置重定向设备名称 cfg->redirect_ifname = (char *)&cfg->redirect_ifname_buf; strncpy(cfg->redirect_ifname, optarg, IF_NAMESIZE); + // 获取重定向设备索引 cfg->redirect_ifindex = if_nametoindex(cfg->redirect_ifname); if (cfg->redirect_ifindex == 0) { fprintf(stderr, @@ -125,73 +190,110 @@ void parse_cmdline_args(int argc, char **argv, goto error; } break; + case 't': + cfg->show_stats = true; + break; + case 'i': + cfg->ip_filter = true; + break; + case 'm': + cfg->mac_filter = true; + break; + case 'k': + cfg->router = true; + break; + case 'g': + cfg->state = true; + break; + case 'n': + cfg->clear = true; + break; case 'A': + // 设置附加模式为未指定模式 cfg->attach_mode = XDP_MODE_UNSPEC; break; case 'S': + // 设置附加模式为 SKB 模式 cfg->attach_mode = XDP_MODE_SKB; cfg->xsk_bind_flags &= ~XDP_ZEROCOPY; cfg->xsk_bind_flags |= XDP_COPY; break; case 'N': + // 设置附加模式为原生模式 cfg->attach_mode = XDP_MODE_NATIVE; break; case 3: /* --offload-mode */ + // 设置附加模式为硬件模式 cfg->attach_mode = XDP_MODE_HW; break; case 'M': + // 启用重用地图 cfg->reuse_maps = true; break; case 'U': + // 设置卸载标志 cfg->do_unload = true; cfg->unload_all = true; - //cfg->prog_id = atoi(optarg); + // cfg->prog_id = atoi(optarg); break; case 'p': + // 启用轮询模式 cfg->xsk_poll_mode = true; break; case 'q': + // 设置为非详细模式 verbose = false; break; case 'Q': + // 设置接口队列 cfg->xsk_if_queue = atoi(optarg); break; case 1: /* --filename */ + // 设置文件名 dest = (char *)&cfg->filename; strncpy(dest, optarg, sizeof(cfg->filename)); break; case 2: /* --progname */ + // 设置程序名称 dest = (char *)&cfg->progname; strncpy(dest, optarg, sizeof(cfg->progname)); break; case 'L': /* --src-mac */ + // 设置源 MAC 地址 dest = (char *)&cfg->src_mac; strncpy(dest, optarg, sizeof(cfg->src_mac)); break; case 'R': /* --dest-mac */ + // 设置目的 MAC 地址 dest = (char *)&cfg->dest_mac; strncpy(dest, optarg, sizeof(cfg->dest_mac)); break; case 'c': + // 设置绑定标志为复制模式 cfg->xsk_bind_flags &= ~XDP_ZEROCOPY; cfg->xsk_bind_flags |= XDP_COPY; break; case 'z': + // 设置绑定标志为零拷贝模式 cfg->xsk_bind_flags &= ~XDP_COPY; cfg->xsk_bind_flags |= XDP_ZEROCOPY; break; case 4: /* --unload-all */ + // 设置卸载所有标志 cfg->unload_all = true; break; case 'h': + // 设置显示完整帮助信息的标志 full_help = true; /* fall-through */ error: default: + // 打印使用信息并退出 usage(argv[0], doc, options_wrapper, full_help); free(long_options); exit(EXIT_FAIL_OPTION); } } + // 释放分配的内存 free(long_options); } diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/Makefile b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/Makefile new file mode 100644 index 000000000..e29a040e9 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +XDP_TARGETS := xdp_prog_kern +USER_TARGETS := xdp_loader + +COMMON_DIR = ../common + +# Extend with another COMMON_OBJS +COMMON_OBJS += $(COMMON_DIR)/common_user_bpf_xdp.o + + +EXTRA_DEPS := $(COMMON_DIR)/parsing_helpers.h + +include $(COMMON_DIR)/common.mk diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/common_kern_user.h b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/common_kern_user.h new file mode 100644 index 000000000..079c3201f --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/common_kern_user.h @@ -0,0 +1,96 @@ +/* This common_kern_user.h is used by kernel side BPF-progs and + * userspace programs, for sharing common struct's and DEFINEs. + */ +#ifndef __COMMON_KERN_USER_H +#define __COMMON_KERN_USER_H + +#include + +typedef __u32 xdp_act; +#define ETH_ALEN 6 + +#define ALERT_ERR_STR "[XACL] ERROR:" + + +#define MAX_RULES 256 + + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +//#define DEBUG_PRINT +//#define DEBUG_PRINT_EVERY + +struct datarec { + __u64 rx_packets; + __u64 rx_bytes; +}; + +struct conn_ipv4 { + __u32 saddr; + __u32 daddr; + __u16 sport; + __u16 dport; + __u16 ip_proto; +}; + +struct rules_ipv4 { + __u32 saddr; + __u32 daddr; + __u8 saddr_mask; + __u8 daddr_mask; + __u16 sport; + __u16 dport; + __u16 ip_proto; + __u16 action; + __u16 prev_rule; + __u16 next_rule; +}; + + +// 转发表项 +struct rt_item { + __u32 saddr; + __u8 eth_source[ETH_ALEN]; // 封装帧的源MAC地址。 + __u8 eth_dest[ETH_ALEN]; // 封装帧的目标MAC地址。 +}; + +// mac 过滤 +struct mac_addr { + __u8 addr[ETH_ALEN]; +}; + + +// 会话保持 +struct conn_ipv4_key { + __u32 saddr; + __u32 daddr; + __u16 sport; + __u16 dport; + __u16 proto; +}; + +struct conn_ipv4_val { + __u32 tcp_state; + __u32 rid; +}; + +enum { + TCP_S_NONE = 0U, + TCP_S_ESTABLISHED, + TCP_S_SYN_SENT, + TCP_S_SYN_RECV, + TCP_S_FIN_WAIT1, + TCP_S_FIN_WAIT2, + TCP_S_CLOSE_WAIT, + TCP_S_CLOSE, +}; + + + +#ifndef XDP_ACTION_MAX +#define XDP_ACTION_MAX (XDP_REDIRECT + 1) +#endif + +#endif /* __COMMON_KERN_USER_H */ diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/black_ipv4.conf b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/black_ipv4.conf new file mode 100644 index 000000000..730cdf8f6 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/black_ipv4.conf @@ -0,0 +1,2 @@ +192.168.207.138/0 192.168.207.177/0 0 0 0 DENY +192.168.207.129/0 192.168.207.177/0 0 0 0 ALLOW \ No newline at end of file diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/mac_load.conf b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/mac_load.conf new file mode 100644 index 000000000..3b41e21a5 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/mac_load.conf @@ -0,0 +1,2 @@ +00:0c:29:dd:17:2c DENY +00:0c:29:fd:69:58 DENY \ No newline at end of file diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/router_load.conf b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/router_load.conf new file mode 100644 index 000000000..ac552338c --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/conf.d/router_load.conf @@ -0,0 +1,2 @@ +0.0.0.0 00:0c:29:7b:a6:d9 00:0c:29:fd:69:58 +1.2.3.4 00:0c:29:7b:a6:d9 00:0c:29:dd:17:2c \ No newline at end of file diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/xdp_loader.c b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/xdp_loader.c new file mode 100644 index 000000000..78c70ee66 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/net_manager/xdp_loader.c @@ -0,0 +1,817 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +static const char *__doc__ = "XDP loader\n" + " - Allows selecting BPF program --progname name to XDP-attach to --dev\n" + " - Collects and displays stats from XDP program\n"; + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include /* depend on kernel-headers installed */ + +#include "../common/common_params.h" +#include "../common/common_user_bpf_xdp.h" +#include "../common/common_libbpf.h" +#include "common_kern_user.h" + +static const char *default_filename = "xdp_prog_kern.o"; +static const char *default_progname = "xdp_entry_state"; + +static const struct option_wrapper long_options[] = { + + {{"help", no_argument, NULL, 'h' }, + "Show help", false}, + + {{"dev", required_argument, NULL, 'd' }, + "Operate on device ", "", true}, + + {{"skb-mode", no_argument, NULL, 'S' }, + "Install XDP program in SKB (AKA generic) mode"}, + + {{"native-mode", no_argument, NULL, 'N' }, + "Install XDP program in native mode"}, + + {{"auto-mode", no_argument, NULL, 'A' }, + "Auto-detect SKB or native mode"}, + + {{"force", no_argument, NULL, 'F' }, + "Force install, replacing existing program on interface"}, + + {{"unload", no_argument, NULL, 'U' }, + "Unload XDP program instead of loading"}, + + {{"quiet", no_argument, NULL, 'q' }, + "Quiet mode (no output)"}, + + {{"progname", required_argument, NULL, 2 }, + "Load program from function in the ELF file", ""}, + + {{"stats", no_argument, NULL, 't' }, + "Show XDP stats"}, + + {{"ip-filter", no_argument, NULL, 'i' }, + "ip_filter"}, + + {{"mac_filter", no_argument, NULL, 'm' }, + "mac_filter"}, + + {{"router", no_argument, NULL, 'k' }, + "package_router"}, + + {{"clear", no_argument, NULL, 'n' }, + "clear_map"}, + + {{0, 0, NULL, 0 }, NULL, false} +}; + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *pin_basedir = "/sys/fs/bpf"; +const char *map_name = "xdp_stats_map"; +char pin_dir[PATH_MAX]; +char map_filename[PATH_MAX]; + + +static void list_avail_progs(struct bpf_object *obj) +{ + struct bpf_program *pos; + + printf("BPF object (%s) listing available XDP functions\n", + bpf_object__name(obj)); + + bpf_object__for_each_program(pos, obj) { + if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) + printf(" %s\n", bpf_program__name(pos)); + } +} + + +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +static __u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + fprintf(stderr, "Error with gettimeofday! (%i)\n", res); + exit(EXIT_FAIL); + } + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +struct record { + __u64 timestamp; + struct datarec total; /* defined in common_kern_user.h */ +}; + +struct stats_record { + struct record stats[XDP_ACTION_MAX]; +}; + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double) period / NANOSEC_PER_SEC); + + return period_; +} + +static void stats_print_header() +{ + /* Print stats "header" */ + printf("%-12s\n", "XDP-action"); +} + +static void stats_print(struct stats_record *stats_rec, + struct stats_record *stats_prev) +{ + struct record *rec, *prev; + __u64 packets, bytes; + double period; + double pps; /* packets per sec */ + double bps; /* bits per sec */ + int i; + + stats_print_header(); /* Print stats "header" */ + + /* Print for each XDP actions stats */ + for (i = 0; i < XDP_ACTION_MAX; i++) + { + char *fmt = "%-12s %'11lld pkts (%'10.0f pps)" + " %'11lld Kbytes (%'6.0f Mbits/s)" + " period:%f\n"; + const char *action = action2str(i); + + rec = &stats_rec->stats[i]; + prev = &stats_prev->stats[i]; + + period = calc_period(rec, prev); + if (period == 0) + return; + + packets = rec->total.rx_packets - prev->total.rx_packets; + pps = packets / period; + + bytes = rec->total.rx_bytes - prev->total.rx_bytes; + bps = (bytes * 8)/ period / 1000000; + + printf(fmt, action, rec->total.rx_packets, pps, + rec->total.rx_bytes / 1000 , bps, + period); + } + printf("\n"); +} + + +/* BPF_MAP_TYPE_ARRAY */ +void map_get_value_array(int fd, __u32 key, struct datarec *value) +{ + if ((bpf_map_lookup_elem(fd, &key, value)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + } +} + +/* BPF_MAP_TYPE_PERCPU_ARRAY */ +void map_get_value_percpu_array(int fd, __u32 key, struct datarec *value) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + unsigned int nr_cpus = libbpf_num_possible_cpus(); + struct datarec values[nr_cpus]; + __u64 sum_bytes = 0; + __u64 sum_pkts = 0; + int i; + + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { + fprintf(stderr, + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); + return; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum_pkts += values[i].rx_packets; + sum_bytes += values[i].rx_bytes; + } + value->rx_packets = sum_pkts; + value->rx_bytes = sum_bytes; +} + +static bool map_collect(int fd, __u32 map_type, __u32 key, struct record *rec) +{ + struct datarec value; + + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + map_get_value_array(fd, key, &value); + break; + case BPF_MAP_TYPE_PERCPU_ARRAY: + map_get_value_percpu_array(fd, key, &value); + break; + default: + fprintf(stderr, "ERR: Unknown map_type(%u) cannot handle\n", + map_type); + return false; + break; + } + + rec->total.rx_packets = value.rx_packets; + rec->total.rx_bytes = value.rx_bytes; + return true; +} + +static void stats_collect(int map_fd, __u32 map_type, + struct stats_record *stats_rec) +{ + /* Collect all XDP actions stats */ + __u32 key; + + for (key = 0; key < XDP_ACTION_MAX; key++) { + map_collect(map_fd, map_type, key, &stats_rec->stats[key]); + } +} + +static void stats_poll(int map_fd, __u32 map_type, int interval) +{ + struct stats_record prev, record = { 0 }; + + /* Trick to pretty printf with thousands separators use %' */ + setlocale(LC_NUMERIC, "en_US"); + + /* Get initial reading quickly */ + stats_collect(map_fd, map_type, &record); + usleep(1000000/4); + + while (1) { + prev = record; /* struct copy */ + stats_collect(map_fd, map_type, &record); + stats_print(&record, &prev); + sleep(interval); + } +} + + + +int unpin_maps(struct bpf_object *bpf_obj) +{ + int err; + /* Existing/previous XDP prog might not have cleaned up */ + if (access(map_filename, F_OK ) != -1 ) { + if (verbose) + printf(" - Unpinning (remove) prev maps in %s/\n", + pin_dir); + + /* Basically calls unlink(3) on map_filename */ + err = bpf_object__unpin_maps(bpf_obj, pin_dir); + if (err) { + fprintf(stderr, "ERR: UNpinning maps in %s\n", pin_dir); + return EXIT_FAIL_BPF; + } + } + return 0; +} + +/* Pinning maps under /sys/fs/bpf in subdir */ +int pin_maps_in_bpf_object(struct bpf_object *bpf_obj) +{ + int err; + unpin_maps(bpf_obj); + if (verbose) + printf(" - Pinning maps in %s/\n", pin_dir); + + /* This will pin all maps in our bpf_object */ + err = bpf_object__pin_maps(bpf_obj, pin_dir); + if (err) + return EXIT_FAIL_BPF; + + return 0; +} + + +char *ifname; +int rules_ipv4_map; +int rtcache_map4; +int src_macs; + +int print_usage(int id){ + switch(id){ + case 0: + fprintf(stderr, "Usage: \n"); + break; + default: + break; + }; + + return 0; +} + + +int open_map(const char *ifname, const char *map_name){ + int len; + char pin_dir[PATH_MAX]; + const char *pin_basedir = "/sys/fs/bpf"; + struct bpf_map_info info = { 0 }; + + /* Use the --dev name as subdir for finding pinned maps */ + len = snprintf(pin_dir, PATH_MAX, "%s/%s", pin_basedir, ifname); + if (len < 0) { + fprintf(stderr, "ERR: creating pin dirname\n"); + return -1; + } + + int fd = open_bpf_map_file(pin_dir, map_name, &info); + if (fd < 0) { + fprintf(stderr, "ERR: Failed to open map file: %s\n", map_name); + return -1; + } + printf("Opened BPF map\n"); + printf(" - BPF map (bpf_map_type:%d) fd: %d id:%d name:%s" + " key_size:%d value_size:%d max_entries:%d\n", + info.type, fd, info.id, info.name, + info.key_size, info.value_size, info.max_entries); + + return fd; +} + + +int load_bpf_map(){ + rules_ipv4_map = open_map(ifname, "rules_ipv4_map"); + rtcache_map4 = open_map(ifname, "rtcache_map4"); + src_macs = open_map(ifname, "src_macs"); + + // Check if any map failed to open + if (rules_ipv4_map < 0) { + fprintf(stderr, "Failed to open rules_ipv4_map\n"); + } + if (rtcache_map4 < 0) { + fprintf(stderr, "Failed to open rtcache_map4\n"); + } + if (src_macs < 0) { + fprintf(stderr, "Failed to open src_macs\n"); + } + + if (rules_ipv4_map < 0 || rtcache_map4 < 0 || src_macs < 0) { + fprintf(stderr, "load bpf map error, check device name\n"); + return -1; + } + + return 0; +} + + +static __u32 ip_to_u32(__u8 *ip_u8) { + __u32 ip_u32 = 0; + ip_u32 = (ip_u8[0]<<24) | (ip_u8[1]<<16) | (ip_u8[2]<<8) | (ip_u8[3]); + //printf("%hhu.%hhu.%hhu.%hhu,%u\n",ip_u8[0],ip_u8[1],ip_u8[2],ip_u8[3],ip_u32); + return ip_u32; +} + +int clear_map(){ + __u16 keys[MAX_RULES]; + for(int i=0; i +#include +#include +#include + +#include "common_kern_user.h" +#include "../common/parsing_helpers.h" + + + +#ifndef memcpy +#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n)) +#endif + +//重定义 +#undef AF_INET +#define AF_INET 2 +#undef AF_INET6 +#define AF_INET6 10 +#define IPV6_FLOWINFO_MASK bpf_htonl(0x0FFFFFFF) + +// 数据包统计 +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, __u32); + __type(value, struct datarec); + __uint(max_entries, XDP_ACTION_MAX); +} xdp_stats_map SEC(".maps"); + +// ipv4—filter +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u16); + __type(value, struct rules_ipv4); + __uint(max_entries, MAX_RULES); +} rules_ipv4_map SEC(".maps"); + + +// router +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __type(key, int); + __type(value, int); + __uint(max_entries, 256); +} tx_port SEC(".maps"); + +// 路由转发表缓存 +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct rt_item); + __uint(max_entries, MAX_RULES); +} rtcache_map4 SEC(".maps"); + + +/*filter-pass-drop*/ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, ETH_ALEN); + __type(value, __u32); + __uint(max_entries, MAX_RULES); +} src_macs SEC(".maps"); + + +// 会话保持 +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct conn_ipv4_key); + __type(value, struct conn_ipv4_val); + __uint(max_entries, MAX_RULES); +} conn_ipv4_map SEC(".maps"); + + +static __always_inline +__u32 xdp_stats_record_action(struct xdp_md *ctx, __u32 action) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + + if (action >= XDP_ACTION_MAX) + return XDP_ABORTED; + + /* Lookup in kernel BPF-side return pointer to actual data record */ + struct datarec *rec = bpf_map_lookup_elem(&xdp_stats_map, &action); + if (!rec) + return XDP_ABORTED; + + /* Calculate packet length */ + __u64 bytes = data_end - data; + + /* BPF_MAP_TYPE_PERCPU_ARRAY returns a data record specific to current + * CPU and XDP hooks runs under Softirq, which makes it safe to update + * without atomic operations. + */ + rec->rx_packets++; + rec->rx_bytes += bytes; + + return action; +} + + +/*会话保持功能*/ + +// 定义一个始终内联的辅助函数,用于交换连接键中的源和目的地址以及端口号 +static __always_inline +int swap_conn_src_dst(struct conn_ipv4_key *conn) +{ + // 交换源和目的 IPv4 地址 + { + __u32 tmp = conn->daddr; + conn->daddr = conn->saddr; + conn->saddr = tmp; + } + + // 交换源和目的端口号 + { + __u16 tmp = conn->sport; + conn->sport = conn->dport; + conn->dport = tmp; + } + + return 0; +} + + +// 全局变量,用于循环轮询的循环计数器 +int rr = 0; + +// 定义一个始终内联的辅助函数,用于获取轮询循环计数器的值 +static __always_inline +int get_rs_rr(){ + + // 如果循环计数器超过 6,则重置为 0 + if(rr >= 6){ + rr = 0; + } + + // 自增循环计数器并返回其当前值 + rr++; + return rr; +} + +/*路由功能*/ + +/* from include/net/ip.h */ +static __always_inline int ip_decrease_ttl(struct iphdr *iph) +{ + __u32 check = iph->check; + check += bpf_htons(0x0100); + iph->check = (__u16)(check + (check >= 0xFFFF)); + return --iph->ttl; +} + +static __always_inline +int mac_zero(const __u8 *mac_addr) { + // 检查MAC地址是否不全为零 + for (int i = 0; i < ETH_ALEN; i++) { + if (mac_addr[i] != 0) + return 1; // 如果有一个字节不为零,返回1表示不为零 + } + return 0; // 如果所有字节都为零,返回0表示全为零 +} + + +static __always_inline +int ipv4_match(__u32 conn_addr, __u32 rule_addr) { + // 直接比较IPv4地址和网络地址 + if( (!rule_addr) || (conn_addr == rule_addr) ) //0 , match all + return 1; + return 0; +} + +static int match_rules_loop(__u32 index, void *ctx) +{ + struct rt_item *p_ctx = (struct rt_item *)ctx; + + + struct rt_item *p_r = bpf_map_lookup_elem(&rtcache_map4, &index); + if(!p_r){ + return 1; //out of range + } + + + if( ipv4_match(p_ctx->saddr, p_r->saddr) ) { + + memcpy(p_ctx->eth_source, p_r->eth_source, ETH_ALEN); + memcpy(p_ctx->eth_dest, p_r->eth_dest, ETH_ALEN); + + + return 1; + } + + + return 1; +} + +static __always_inline +int match_rules(struct rt_item *conn) +{ + struct rt_item *ctx = conn; + + bpf_loop(MAX_RULES, match_rules_loop, ctx, 0); + + return 1; +} + +/*使用 IP 进行过滤*/ + +struct match_rules_loop_ctx{ + __u16 action; + __u16 next_rule; + struct conn_ipv4 *conn; +}; + +static __always_inline +int ipv4_cidr_match(__u32 ip_addr, __u32 network_addr, __u8 cidr) { + if(network_addr == 0 && cidr == 0) + return 1; + + __u32 subnet_mask = (0xFFFFFFFFU << (32 - cidr)) & 0xFFFFFFFFU; + + __u32 masked_ip = ip_addr & subnet_mask; + __u32 masked_network = network_addr & subnet_mask; + + return masked_ip == masked_network; +} + +static __always_inline +int port_match(__u16 conn_port, __u16 rule_port){ + if( (!rule_port) || (rule_port == conn_port) ) //0 , match all + return 1; + return 0; +} + +static int match_rules_ipv4_loop(__u32 index, void *ctx) +{ + struct match_rules_loop_ctx *p_ctx = (struct match_rules_loop_ctx *)ctx; + if(index != p_ctx->next_rule) + return 0; + + struct rules_ipv4 *p_r = bpf_map_lookup_elem(&rules_ipv4_map, &index); + if(!p_r){ + return 1; //out of range + } + + p_ctx->next_rule = p_r->next_rule; + + if(index == 0) + goto out_match_rules_ipv4_loop; + + if( ipv4_cidr_match(p_ctx->conn->saddr, p_r->saddr, p_r->saddr_mask) && + ipv4_cidr_match(p_ctx->conn->daddr, p_r->daddr, p_r->daddr_mask) && + port_match(p_ctx->conn->sport, p_r->sport) && + port_match(p_ctx->conn->dport, p_r->dport) && + port_match(p_ctx->conn->ip_proto, p_r->ip_proto) ) + { + p_ctx->action = p_r->action; + return 1; + } + +out_match_rules_ipv4_loop: + if(p_r->next_rule == 0) + return 1; //go out loop + + return 0; +} + +static __always_inline +xdp_act match_rules_ipv4(struct conn_ipv4 *conn) +{ + struct match_rules_loop_ctx ctx = {.action = XDP_PASS, .conn = conn, .next_rule = 0}; + + + for(int i=0; idata_end; + void *data = (void *)(long)ctx->data; + struct hdr_cursor nh; + int nh_type; //next header type + struct ethhdr *eth; + struct iphdr *iph; + struct tcphdr *tcph; + struct udphdr *udph; + struct conn_ipv4 conn = {.saddr = 0, .daddr = 0, .sport = 0, .dport = 0, .ip_proto = 0}; + + nh.pos = data; + + nh_type = parse_ethhdr(&nh, data_end, ð); + + if(nh_type < 0) + goto out; + + if (nh_type == bpf_htons(ETH_P_IP)) { + + nh_type = parse_iphdr(&nh, data_end, &iph); + + if(nh_type < 0) + goto out; + + if (nh_type == IPPROTO_TCP) { + if(parse_tcphdr(&nh, data_end, &tcph) < 0) + goto out; + + conn.sport = bpf_ntohs(tcph -> source); + conn.dport = bpf_ntohs(tcph -> dest); + + } + else if(nh_type == IPPROTO_UDP){ + if(parse_udphdr(&nh, data_end, &udph) < 0){ + goto out; + } + conn.sport = bpf_ntohs(udph -> source); + conn.dport = bpf_ntohs(udph -> dest); + } + + conn.saddr = bpf_ntohl(iph -> saddr); + conn.daddr = bpf_ntohl(iph -> daddr); + conn.ip_proto = nh_type; + + #ifdef DEBUG_PRINT_EVERY + if(conn.dport != 22) + bpf_printk("conn(%u:%u to %u:%u)", conn.saddr, conn.sport, conn.daddr, conn.dport); + #endif + + action = match_rules_ipv4(&conn); + + } + + +out: + return xdp_stats_record_action(ctx, action); +} + + + + +/* Solution to packet03/assignment-4 */ +SEC("xdp") +int xdp_entry_router(struct xdp_md *ctx) +{ + xdp_act action = XDP_PASS; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct bpf_fib_lookup ifib = {}; + struct hdr_cursor nh; + int nh_type; //next header type + struct ethhdr *eth = data; + struct ipv6hdr *ip6h; + struct iphdr *iph; + unsigned int ip4_saddr = 0; + //unsigned ifindex = 2; + int rc; + struct rt_item nitem = {.saddr = 0, .eth_source = {0}, .eth_dest = {0}}; + + + nh.pos = data; + + nh_type = parse_ethhdr(&nh, data_end, ð); + + if(nh_type < 0) + goto out; + + if (nh_type == bpf_htons(ETH_P_IP)) { + nh_type = parse_iphdr(&nh, data_end, &iph); + + if(nh_type < 0) + goto out; + + + if (iph->ttl <= 1) + goto out; + + + ip4_saddr = iph->saddr; + + nitem.saddr = ip4_saddr; + + // 首先精确查找转发表,如果找到就直接转发,不必再经历最长前缀匹配的慢速通配查找 + match_rules(&nitem); + + + + if (mac_zero(nitem.eth_dest)) { + ip_decrease_ttl(iph); + memcpy(eth->h_dest, nitem.eth_dest, ETH_ALEN); + memcpy(eth->h_source, nitem.eth_source, ETH_ALEN); + action = bpf_redirect_map(&tx_port, 0, 0); + + goto out; + } + + // 否则执行最长前缀匹配了 + ifib.family = AF_INET; + ifib.tos = iph->tos; + ifib.l4_protocol = iph->protocol; + ifib.sport = 0; + ifib.dport = 0; + ifib.tot_len = bpf_ntohs(iph->tot_len); + ifib.ipv4_src = iph->saddr; + ifib.ipv4_dst = iph->daddr; + ifib.ifindex = ctx->ingress_ifindex; + + + rc = bpf_fib_lookup(ctx, &ifib, sizeof(ifib), 0); + switch (rc) { + case BPF_FIB_LKUP_RET_SUCCESS: /* lookup successful */ + ip_decrease_ttl(iph); + + memcpy(eth->h_dest, ifib.dmac, ETH_ALEN); + memcpy(eth->h_source, ifib.smac, ETH_ALEN); + action = bpf_redirect(ifib.ifindex, 0); + goto out; + break; + case BPF_FIB_LKUP_RET_BLACKHOLE: /* dest is blackholed; can be dropped */ + case BPF_FIB_LKUP_RET_UNREACHABLE: /* dest is unreachable; can be dropped */ + case BPF_FIB_LKUP_RET_PROHIBIT: /* dest not allowed; can be dropped */ + action = XDP_DROP; + goto out; + break; + case BPF_FIB_LKUP_RET_NOT_FWDED: /* packet is not forwarded */ + case BPF_FIB_LKUP_RET_FWD_DISABLED: /* fwding is not enabled on ingress */ + case BPF_FIB_LKUP_RET_UNSUPP_LWT: /* fwd requires encapsulation */ + case BPF_FIB_LKUP_RET_NO_NEIGH: /* no neighbor entry for nh */ + case BPF_FIB_LKUP_RET_FRAG_NEEDED: /* fragmentation required to fwd */ + /* PASS */ + goto out; + break; + } + + } else if (nh_type == bpf_htons(ETH_P_IPV6)) { + nh_type = parse_ip6hdr(&nh, data_end, &ip6h); + + struct in6_addr *src = (struct in6_addr *) ifib.ipv6_src; + struct in6_addr *dst = (struct in6_addr *) ifib.ipv6_dst; + + if(nh_type < 0) + goto out; + + if (ip6h->hop_limit <= 1) + goto out; + + ifib.family = AF_INET6; + ifib.flowinfo = *(__be32 *) ip6h & IPV6_FLOWINFO_MASK; + ifib.l4_protocol = ip6h->nexthdr; + ifib.sport = 0; + ifib.dport = 0; + ifib.tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + ifib.ifindex = ctx->ingress_ifindex; + + rc = bpf_fib_lookup(ctx, &ifib, sizeof(ifib), 0); + switch (rc) { + case BPF_FIB_LKUP_RET_SUCCESS: /* lookup successful */ + ip6h->hop_limit--; + + memcpy(eth->h_dest, ifib.dmac, ETH_ALEN); + memcpy(eth->h_source, ifib.smac, ETH_ALEN); + action = bpf_redirect(ifib.ifindex, 0); + goto out; + break; + case BPF_FIB_LKUP_RET_BLACKHOLE: /* dest is blackholed; can be dropped */ + case BPF_FIB_LKUP_RET_UNREACHABLE: /* dest is unreachable; can be dropped */ + case BPF_FIB_LKUP_RET_PROHIBIT: /* dest not allowed; can be dropped */ + action = XDP_DROP; + break; + case BPF_FIB_LKUP_RET_NOT_FWDED: /* packet is not forwarded */ + case BPF_FIB_LKUP_RET_FWD_DISABLED: /* fwding is not enabled on ingress */ + case BPF_FIB_LKUP_RET_UNSUPP_LWT: /* fwd requires encapsulation */ + case BPF_FIB_LKUP_RET_NO_NEIGH: /* no neighbor entry for nh */ + case BPF_FIB_LKUP_RET_FRAG_NEEDED: /* fragmentation required to fwd */ + /* PASS */ + break; + } + + } + else { + goto out; + } + + + +out: + return xdp_stats_record_action(ctx, action); +} + + +/* accept ethernet addresses and filter everything else */ +SEC("xdp") +int xdp_entry_mac(struct xdp_md *ctx) +{ + xdp_act action = XDP_PASS; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct hdr_cursor nh; + int nh_type; //next header type + struct ethhdr *eth; + __u32 *value; + + + nh.pos = data; + + nh_type = parse_ethhdr(&nh, data_end, ð); + + if(nh_type < 0) + goto out; + + //action = match_rules_ipv4(ð->h_source); + + /* check if src mac is in src_macs map */ + value = bpf_map_lookup_elem(&src_macs, eth->h_source); + if (value) { + action = *value; + goto out; + } + + +out: + return xdp_stats_record_action(ctx, action); +} + +SEC("xdp") +int xdp_entry_state(struct xdp_md *ctx) +{ + __u32 action = XDP_PASS; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct hdr_cursor nh; + int nh_type; //next header type + struct ethhdr *eth; + struct iphdr *iph; + struct tcphdr *tcph; + struct udphdr *udph; + // 定义IPv4连接关键信息 + struct conn_ipv4_key conn_k = {.saddr = 0, .daddr = 0, .sport = 0, .dport = 0, .proto = 0}; + + nh.pos = data; + + // 如果下一个头部类型为IPv4 + nh_type = parse_ethhdr(&nh, data_end, ð); + + if(nh_type < 0) + goto out; + + if (nh_type == bpf_htons(ETH_P_IP)) { + + nh_type = parse_iphdr(&nh, data_end, &iph); + + if(nh_type < 0) + goto out; + + conn_k.saddr = bpf_ntohl(iph -> saddr); + conn_k.daddr = bpf_ntohl(iph -> daddr); + conn_k.proto = nh_type; + + + // 如果下一个头部类型为TCP + if (nh_type == IPPROTO_TCP) { + if(parse_tcphdr(&nh, data_end, &tcph) < 0) + goto out; + + // 获取TCP连接信息 + conn_k.sport = bpf_ntohs(tcph -> source); + conn_k.dport = bpf_ntohs(tcph -> dest); + + // 查找IPv4连接映射表中的值 + // 如果找到,就说明该连接已经存在,可以在原有连接信息的基础上进行处理。 + // 如果没有找到,可能是首次遇到这个连接,可以进行一些初始化操作,例如创建新的连接信息并添加到哈希表中。 + struct conn_ipv4_val *p_conn_v = bpf_map_lookup_elem(&conn_ipv4_map, &conn_k); + if(!p_conn_v){ + // 如果查找失败,交换源目地址和端口信息后再次查找 + swap_conn_src_dst(&conn_k); + p_conn_v = bpf_map_lookup_elem(&conn_ipv4_map, &conn_k); + + // 如果再次查找失败,且TCP报文是SYN并且不是ACK,则创建新的连接项 + if(!p_conn_v){ + if(tcph->syn && !tcph->ack){ + struct conn_ipv4_val conn_v = {.tcp_state = TCP_S_SYN_SENT}; + conn_v.rid = get_rs_rr(); + swap_conn_src_dst(&conn_k); + // 将新的连接项插入到 IPv4 连接映射中 + bpf_map_update_elem(&conn_ipv4_map, &conn_k, &conn_v, BPF_ANY); + // 输出日志信息,表示创建了一个新的连接项 + bpf_printk("conn(%u:%u->%u:%u),state:%s,rid:%d",conn_k.saddr, conn_k.sport, conn_k.daddr, conn_k.dport, "SYN_SENT", conn_v.rid); + } + goto out; + } + } + // 如果查找成功,继续处理连接项 + // 如果TCP报文的标志位包含RST(复位),则删除连接项并输出相应的日志信息 + if(tcph->rst){ + bpf_map_delete_elem(&conn_ipv4_map, &conn_k); + bpf_printk("conn(%u:%u->%u:%u),state:%s,rid:%d",conn_k.saddr, conn_k.sport, conn_k.daddr, conn_k.dport, "RST", p_conn_v->rid); + goto out; + } + + // 如果连接项的TCP状态为SYN_RECV并且收到了ACK,将TCP状态更新为ESTABLISHED + if(p_conn_v->tcp_state == TCP_S_SYN_RECV && tcph->ack){ + p_conn_v->tcp_state = TCP_S_ESTABLISHED; + goto out_tcp_conn; + } + + // 如果连接项的TCP状态为ESTABLISHED并且收到了FIN,将TCP状态更新为FIN_WAIT1 + if(p_conn_v->tcp_state == TCP_S_ESTABLISHED && tcph->fin){ + p_conn_v->tcp_state = TCP_S_FIN_WAIT1; + goto out_tcp_conn; + } + + // 如果连接项的TCP状态为FIN_WAIT2并且收到了ACK,将TCP状态更新为CLOSE + if(p_conn_v->tcp_state == TCP_S_FIN_WAIT2 && tcph->ack){ + p_conn_v->tcp_state = TCP_S_CLOSE; + goto out_tcp_conn; + } + + // 交换源目地址和端口信息 + swap_conn_src_dst(&conn_k); + + + // 如果连接项的TCP状态为SYN_SENT且收到了SYN和ACK,将TCP状态更新为SYN_RECV + if(p_conn_v->tcp_state == TCP_S_SYN_SENT && tcph->syn && tcph->ack){ + p_conn_v->tcp_state = TCP_S_SYN_RECV; + goto out_tcp_conn; + } + + // 如果连接项的TCP状态为FIN_WAIT1且收到了ACK,将TCP状态更新为CLOSE_WAIT + if(p_conn_v->tcp_state == TCP_S_FIN_WAIT1 && tcph->ack){ + p_conn_v->tcp_state = TCP_S_CLOSE_WAIT; + bpf_printk("conn(%u:%u->%u:%u),state:%s,rid:%d",conn_k.saddr, conn_k.sport, conn_k.daddr, conn_k.dport, "CLOSE_WAIT", p_conn_v->rid); + } + + // 如果连接项的TCP状态为CLOSE_WAIT且收到了FIN和ACK,将TCP状态更新为FIN_WAIT2 + if(p_conn_v->tcp_state == TCP_S_CLOSE_WAIT && tcph->fin && tcph->ack){ + p_conn_v->tcp_state = TCP_S_FIN_WAIT2; + goto out_tcp_conn; + } + const char *tcp_state_str; + + // 根据连接状态设置对应的字符串 + out_tcp_conn: + if(p_conn_v->tcp_state == TCP_S_CLOSE){ + // 如果是CLOSE状态,从映射表中删除连接信息 + bpf_map_delete_elem(&conn_ipv4_map, &conn_k); + }else{ + // 否则更新映射表中的连接信息 + bpf_map_update_elem(&conn_ipv4_map, &conn_k, p_conn_v, BPF_EXIST); + } + // 根据连接状态打印日志 + switch(p_conn_v->tcp_state) { + case TCP_S_SYN_SENT: + tcp_state_str = "SYN_SENT"; + break; + case TCP_S_SYN_RECV: + tcp_state_str = "SYN_RECV"; + break; + case TCP_S_ESTABLISHED: + tcp_state_str = "ESTABLISHED"; + break; + case TCP_S_FIN_WAIT1: + tcp_state_str = "FIN_WAIT1"; + break; + case TCP_S_FIN_WAIT2: + tcp_state_str = "FIN_WAIT2"; + break; + case TCP_S_CLOSE_WAIT: + tcp_state_str = "CLOSE_WAIT"; + break; + case TCP_S_CLOSE: + tcp_state_str = "CLOSE"; + break; + default: + tcp_state_str = ""; + } + bpf_printk("conn(%u:%u->%u:%u),state:%s,rid:%d",conn_k.saddr, conn_k.sport, conn_k.daddr, conn_k.dport, tcp_state_str, p_conn_v->rid); + goto out; + } + else if(nh_type == IPPROTO_UDP){ + // 如果是UDP包,解析UDP头部并获取端口信息 + if(parse_udphdr(&nh, data_end, &udph) < 0){ + goto out; + } + conn_k.sport = bpf_ntohs(udph -> source); + conn_k.dport = bpf_ntohs(udph -> dest); + } + + #ifdef DEBUG_PRINT_EVERY + // 打印除SSH协议以外的所有连接信息 + if(conn.dport != 22) + bpf_printk("conn(%u:%u to %u:%u)", conn.saddr, conn.sport, conn.daddr, conn.dport); + #endif + + } + + +out: + return xdp_stats_record_action(ctx, action); +} + + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/README.org b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/README.org new file mode 100644 index 000000000..7bae95ebc --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/README.org @@ -0,0 +1,79 @@ +# -*- fill-column: 76; -*- +#+TITLE: Test environment script +#+OPTIONS: ^:nil + +This directory contains a setup script that you can use to create test +environments for testing your XDP programs. It works by creating virtual +ethernet (veth) interface pairs and moving one end of each pair to another +network namespace. You can load the XDP program in the other namespace and +send traffic to it through the interface that is visible in the root +namespace. + +Run =./testenv.sh= with no parameter to get a list of available commands, or +run =./testenv.sh --help= to get the full help listing with all options. The +script can maintain several environments active at the same time, and you +can switch between them using the =--name= option. + +If you don't specify a name, the most recently used environment will be +used. If you don't specify a name when setting up a new environment, a +random name will be generated for you. + +Examples: + +Setup new environment named "test": +=./testenv.sh setup --name=test= + +Create a shell alias for easy use of script from anywhere: +=eval $(./testenv.sh alias)= + +See the currently active environment, and a list of all active environment +names (with alias defined as above): +=t status= + +Enter the currently active environment: +=t enter= + +Execute a command inside the environment: +=t exec -- ip a= + +Teardown the environment: +=t teardown= + +* Understanding the network topology + +When setting up a test environment, there will be a virtual link between the +environment inside the new namespace, and the interface visible from the +host system root namespace. The new namespace will be named after the +environment name passed to the script, as will the interface visible in the +outer namespace. The interface *inside* the namespace will always be named +'veth0'. + +To illustrate this, creating a test environment with the name 'test01' (with +=t setup --name test01= will result in the following environment being set +up: + +#+begin_example ++-----------------------------+ +-----------------------------+ +| Root namespace | | Testenv namespace 'test01' | +| | From 'test01' | | +| +--------+ TX-> RX-> +--------+ | +| | test01 +--------------------------+ veth0 | | +| +--------+ <-RX <-TX +--------+ | +| | From 'veth0' | | ++-----------------------------+ +-----------------------------+ +#+end_example + +The 'test01' interface visible in the root namespace is the one we will be +installing XDP programs on in the tutorial lessons. The XDP program will see +packets being *received* on this interface; as you can see from the diagram, +this means all packets being transmitted from inside the new namespace. + +The setup is created this way to simulate the case where the host machine +have physical interfaces; but instead of the traffic arriving from outside +hosts on physical interfaces, they will arrive from inside the namespace on +the virtual interface. This also means that when you generate traffic to +test your XDP programs, you need to generate it from *inside* the test +environment. The =t ping= command will start the ping inside the test +environment by default, and you can run arbitrary programs inside the +environment by using =t exec -- =, or simply spawning a shell with +=t enter=. diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/config.sh b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/config.sh new file mode 100644 index 000000000..4b1853b76 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/config.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# These are the config options for the testlab + + +SETUP_SCRIPT="$(dirname "$0")/setup-env.sh" +STATEDIR="${TMPDIR:-/tmp}/xdp-tutorial-testlab" +IP6_SUBNET=fc00:dead:cafe # must have exactly three :-separated elements +IP6_PREFIX_SIZE=64 # Size of assigned prefixes +IP6_FULL_PREFIX_SIZE=48 # Size of IP6_SUBNET +IP4_SUBNET=10.11 +IP4_PREFIX_SIZE=24 # Size of assigned prefixes +IP4_FULL_PREFIX_SIZE=16 # Size of IP4_SUBNET +VLAN_IDS=(1 2) +GENERATED_NAME_PREFIX="xdptut" diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/setup-env.sh b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/setup-env.sh new file mode 100755 index 000000000..b3db42537 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/setup-env.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script to setup things inside a test environment, used by testenv.sh for +# executing commands. +# +# Author: Toke Høiland-Jørgensen (toke@redhat.com) +# Date: 7 March 2019 +# Copyright (c) 2019 Red Hat + + +die() +{ + echo "$1" >&2 + exit 1 +} + +[ -n "$TESTENV_NAME" ] || die "TESTENV_NAME missing from environment" +[ -n "$1" ] || die "Usage: $0 " + +set -o nounset + +mount -t bpf bpf /sys/fs/bpf/ || die "Unable to mount /sys/fs/bpf inside test environment" + +exec "$@" diff --git a/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/testenv.sh b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/testenv.sh new file mode 100755 index 000000000..34016b744 --- /dev/null +++ b/eBPF_Supermarket/Network_Subsystem/net_manager/testenv/testenv.sh @@ -0,0 +1,619 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script to setup and manage test environment for the XDP tutorial. +# See README.org for instructions on how to use. +# +# Author: Toke Høiland-Jørgensen (toke@redhat.com) +# Date: 6 March 2019 +# Copyright (c) 2019 Red Hat + +set -o errexit +set -o nounset +umask 077 + +source "$(dirname "$0")/config.sh" + +NEEDED_TOOLS="ethtool ip tc ping" +MAX_NAMELEN=15 + +# Global state variables that will be set by options etc below +GENERATE_NEW=0 +CLEANUP_FUNC= +STATEFILE= +CMD= +NS= +XDP_LOADER=./xdp_loader +XDP_STATS=./xdp_stats +LEGACY_IP=0 +USE_VLAN=0 +RUN_ON_INNER=0 + +# State variables that are written to and read from statefile +STATEVARS=(IP6_PREFIX IP4_PREFIX + INSIDE_IP6 INSIDE_IP4 INSIDE_MAC + OUTSIDE_IP6 OUTSIDE_IP4 OUTSIDE_MAC + ENABLE_IPV4 ENABLE_VLAN) +IP6_PREFIX= +IP4_PREFIX= +INSIDE_IP6= +INSIDE_IP4= +INSIDE_MAC= +OUTSIDE_IP6= +OUTSIDE_IP4= +OUTSIDE_MAC= +ENABLE_IPV4=0 +ENABLE_VLAN=0 + +die() +{ + echo "$1" >&2 + exit 1 +} + +check_prereq() +{ + local max_locked_mem=$(ulimit -l) + + for t in $NEEDED_TOOLS; do + which "$t" > /dev/null || die "Missing required tools: $t" + done + + if [ "$EUID" -ne "0" ]; then + die "This script needs root permissions to run." + fi + + [ -d "$STATEDIR" ] || mkdir -p "$STATEDIR" || die "Unable to create state dir $STATEDIR" + + if [ "$max_locked_mem" != "unlimited" ]; then + ulimit -l unlimited || die "Unable to set ulimit" + fi +} + +get_nsname() +{ + local GENERATE=${1:-0} + + if [ -z "$NS" ]; then + [ -f "$STATEDIR/current" ] && NS=$(< "$STATEDIR/current") + + if [ "$GENERATE" -eq "1" ] && [ -z "$NS" -o "$GENERATE_NEW" -eq "1" ]; then + NS=$(printf "%s-%04x" "$GENERATED_NAME_PREFIX" $RANDOM) + fi + fi + + if [ "${#NS}" -gt "$MAX_NAMELEN" ]; then + die "Environment name '$NS' is too long (max $MAX_NAMELEN)" + fi + + STATEFILE="$STATEDIR/${NS}.state" +} + +ensure_nsname() +{ + [ -z "$NS" ] && die "No environment selected; use --name to select one or 'setup' to create one" + [ -e "$STATEFILE" ] || die "Environment for $NS doesn't seem to exist" + + echo "$NS" > "$STATEDIR/current" + + read_statefile +} + +get_num() +{ + local num=1 + if [ -f "$STATEDIR/highest_num" ]; then + num=$(( 1 + $(< "$STATEDIR/highest_num" ))) + fi + + echo $num > "$STATEDIR/highest_num" + printf "%x" $num +} + +write_statefile() +{ + [ -z "$STATEFILE" ] && return 1 + echo > "$STATEFILE" + for var in "${STATEVARS[@]}"; do + echo "${var}='$(eval echo '$'$var)'" >> "$STATEFILE" + done +} + +read_statefile() +{ + local value + for var in "${STATEVARS[@]}"; do + value=$(source "$STATEFILE"; eval echo '$'$var) + eval "$var=\"$value\"" + done +} + +cleanup_setup() +{ + echo "Error during setup, removing partially-configured environment '$NS'" >&2 + set +o errexit + ip netns del "$NS" 2>/dev/null + ip link del dev "$NS" 2>/dev/null + rm -f "$STATEFILE" +} + +cleanup_teardown() +{ + echo "Warning: Errors during teardown, partial environment may be left" >&2 +} + + +cleanup() +{ + [ -n "$CLEANUP_FUNC" ] && $CLEANUP_FUNC + + [ -d "$STATEDIR" ] || return 0 + + local statefiles=("$STATEDIR"/*.state) + + if [ "${#statefiles[*]}" -eq 1 ] && [ ! -e "${statefiles[0]}" ]; then + rm -f "${STATEDIR}/highest_num" "${STATEDIR}/current" + rmdir "$STATEDIR" + fi +} + +iface_macaddr() +{ + local iface="$1" + local ns="${2:-}" + local output + + if [ -n "$ns" ]; then + output=$(ip -br -n "$ns" link show dev "$iface") + else + output=$(ip -br link show dev "$iface") + fi + echo "$output" | awk '{print $3}' +} + +set_sysctls() +{ + local iface="$1" + local in_ns="${2:-}" + local nscmd= + + [ -n "$in_ns" ] && nscmd="ip netns exec $in_ns" + local sysctls=(accept_dad + accept_ra + mldv1_unsolicited_report_interval + mldv2_unsolicited_report_interval) + + for s in ${sysctls[*]}; do + $nscmd sysctl -w net.ipv6.conf.$iface.${s}=0 >/dev/null + done +} + +wait_for_dev() +{ + local iface="$1" + local in_ns="${2:-}" + local retries=5 # max retries + local nscmd= + + [ -n "$in_ns" ] && nscmd="ip netns exec $in_ns" + while [ "$retries" -gt "0" ]; do + if ! $nscmd ip addr show dev $iface | grep -q tentative; then return 0; fi + sleep 0.5 + retries=$((retries -1)) + done +} + +get_vlan_prefix() +{ + # Split the IPv6 prefix, and add the VLAN ID to the upper byte of the fourth + # element in the prefix. This will break if the global prefix config doesn't + # have exactly three elements in it. + local prefix="$1" + local vid="$2" + (IFS=:; set -- $prefix; printf "%s:%s:%s:%x::" "$1" "$2" "$3" $(($4 + $vid * 4096))) +} + +setup() +{ + get_nsname 1 + + echo "Setting up new environment '$NS'" + + [ -e "$STATEFILE" ] && die "Environment for '$NS' already exists" + + local NUM=$(get_num "$NS") + local PEERNAME="testl-ve-$NUM" + [ -z "$IP6_PREFIX" ] && IP6_PREFIX="${IP6_SUBNET}:${NUM}::" + [ -z "$IP4_PREFIX" ] && IP4_PREFIX="${IP4_SUBNET}.$((0x$NUM))." + + INSIDE_IP6="${IP6_PREFIX}2" + INSIDE_IP4="${IP4_PREFIX}2" + OUTSIDE_IP6="${IP6_PREFIX}1" + OUTSIDE_IP4="${IP4_PREFIX}1" + + CLEANUP_FUNC=cleanup_setup + + if ! mount | grep -q /sys/fs/bpf; then + mount -t bpf bpf /sys/fs/bpf/ + fi + + ip netns add "$NS" + ip link add dev "$NS" type veth peer name veth0 netns "$NS" + + set_sysctls $NS + ip link set dev "$NS" up + ip addr add dev "$NS" "${OUTSIDE_IP6}/${IP6_PREFIX_SIZE}" + ethtool -K "$NS" rxvlan off txvlan off + # Prevent neighbour queries on the link + INSIDE_MAC=$(iface_macaddr veth0 "$NS") + ip neigh add "$INSIDE_IP6" lladdr "$INSIDE_MAC" dev "$NS" nud permanent + + set_sysctls veth0 "$NS" + ip -n "$NS" link set dev lo up + ip -n "$NS" link set dev veth0 up + ip -n "$NS" addr add dev veth0 "${INSIDE_IP6}/${IP6_PREFIX_SIZE}" + ip netns exec "$NS" ethtool -K veth0 rxvlan off txvlan off + # Prevent neighbour queries on the link + OUTSIDE_MAC=$(iface_macaddr "$NS") + ip -n "$NS" neigh add "$OUTSIDE_IP6" lladdr "$OUTSIDE_MAC" dev veth0 nud permanent + # Add route for whole test subnet, to make it easier to communicate between + # namespaces + ip -n "$NS" route add "${IP6_SUBNET}::/$IP6_FULL_PREFIX_SIZE" via "$OUTSIDE_IP6" dev veth0 + + if [ "$LEGACY_IP" -eq "1" ]; then + ip addr add dev "$NS" "${OUTSIDE_IP4}/${IP4_PREFIX_SIZE}" + ip -n "$NS" addr add dev veth0 "${INSIDE_IP4}/${IP4_PREFIX_SIZE}" + ip neigh add "$INSIDE_IP4" lladdr "$INSIDE_MAC" dev "$NS" nud permanent + ip -n "$NS" neigh add "$OUTSIDE_IP4" lladdr "$OUTSIDE_MAC" dev veth0 nud permanent + ip -n "$NS" route add "${IP4_SUBNET}/${IP4_FULL_PREFIX_SIZE}" via "$OUTSIDE_IP4" dev veth0 + ENABLE_IPV4=1 + else + ENABLE_IPV4=0 + fi + + if [ "$USE_VLAN" -eq "1" ]; then + ENABLE_VLAN=1 + for vid in "${VLAN_IDS[@]}"; do + local vlpx="$(get_vlan_prefix "$IP6_PREFIX" "$vid")" + local inside_ip="${vlpx}2" + local outside_ip="${vlpx}1" + ip link add dev "${NS}.$vid" link "$NS" type vlan id "$vid" + ip link set dev "${NS}.$vid" up + ip addr add dev "${NS}.$vid" "${outside_ip}/${IP6_PREFIX_SIZE}" + ip neigh add "$inside_ip" lladdr "$INSIDE_MAC" dev "${NS}.$vid" nud permanent + set_sysctls "${NS}/$vid" + + ip -n "$NS" link add dev "veth0.$vid" link "veth0" type vlan id "$vid" + ip -n "$NS" link set dev "veth0.$vid" up + ip -n "$NS" addr add dev "veth0.$vid" "${inside_ip}/${IP6_PREFIX_SIZE}" + ip -n "$NS" neigh add "$outside_ip" lladdr "$OUTSIDE_MAC" dev "veth0.$vid" nud permanent + set_sysctls "veth0/$vid" "$NS" + done + else + ENABLE_VLAN=0 + fi + + write_statefile + + CLEANUP_FUNC= + + echo -n "Setup environment '$NS' with peer ip ${INSIDE_IP6}" + [ "$ENABLE_IPV4" -eq "1" ] && echo " and ${INSIDE_IP4}." || echo "." + echo "Waiting for interface configuration to settle..." + echo "" + wait_for_dev "$NS" && wait_for_dev veth0 "$NS" + + LEGACY_IP=0 USE_VLAN=0 run_ping -c 1 + + echo "$NS" > "$STATEDIR/current" +} + +teardown() +{ + get_nsname && ensure_nsname "$NS" + + echo "Tearing down environment '$NS'" + + CLEANUP_FUNC=cleanup_teardown + + ip link del dev "$NS" + ip netns del "$NS" + rm -f "$STATEFILE" + [ -d "/sys/fs/bpf/$NS" ] && rmdir "/sys/fs/bpf/$NS" || true + + if [ -f "$STATEDIR/current" ]; then + local CUR=$(< "$STATEDIR/current" ) + [[ "$CUR" == "$NS" ]] && rm -f "$STATEDIR/current" + fi + + CLEANUP_FUNC= +} + +reset() +{ + teardown && setup +} + +ns_exec() +{ + get_nsname && ensure_nsname "$NS" + + ip netns exec "$NS" env TESTENV_NAME="$NS" "$SETUP_SCRIPT" "$@" +} + +enter() +{ + ns_exec "${SHELL:-bash}" +} + +run_ping() +{ + local PING + local IP + + get_nsname && ensure_nsname "$NS" + + echo "Running ping from inside test environment:" + echo "" + + if [ "$LEGACY_IP" -eq "1" ]; then + PING=$(which ping) + IP="${OUTSIDE_IP4}" + [ "$USE_VLAN" -eq "0" ] || die "Can't use --legacy-ip and --vlan at the same time." + [ "$ENABLE_IPV4" -eq "1" ] || die "No legacy IP addresses configured in environment." + else + PING=$(which ping6 2>/dev/null || which ping) + if [ "$USE_VLAN" -eq "0" ]; then + IP="${OUTSIDE_IP6}" + else + [ "$ENABLE_VLAN" -eq "1" ] || die "No VLANs configured in environment." + IP="$(get_vlan_prefix "$IP6_PREFIX" "${VLAN_IDS[0]}")1" + fi + fi + + ns_exec "$PING" "$IP" "$@" +} + +run_tcpdump() +{ + get_nsname && ensure_nsname "$NS" + + if [ "$RUN_ON_INNER" -eq "1" ]; then + ns_exec tcpdump -nei veth0 "$@" + else + tcpdump -nei "$NS" "$@" + fi +} + +status() +{ + get_nsname + + echo "Currently selected environment: ${NS:-None}" + if [ -n "$NS" ] && [ -e "$STATEFILE" ]; then + read_statefile + echo -n " Namespace: "; ip netns | grep "^$NS" + echo " Prefix: ${IP6_PREFIX}/${IP6_PREFIX_SIZE}" + [ "$ENABLE_IPV4" -eq "1" ] && echo " Legacy prefix: ${IP4_PREFIX}0/${IP4_PREFIX_SIZE}" + echo -n " Iface: "; ip -br a show dev "$NS" | sed 's/\s\+/ /g' + fi + echo "" + + echo "All existing environments:" + for f in "$STATEDIR"/*.state; do + if [ ! -e "$f" ]; then + echo " No environments exist" + break + fi + NAME=$(basename "$f" .state) + echo " $NAME" + done +} + +print_alias() +{ + local scriptname="$(readlink -e "$0")" + local sudo= + + [ -t 1 ] && echo "Eval this with \`eval \$($0 alias)\` to create shell alias" >&2 + + if [ "$EUID" -ne "0" ]; then + sudo="sudo " + echo "WARNING: Creating sudo alias; be careful, this script WILL execute arbitrary programs" >&2 + fi + + echo "" >&2 + + + echo "alias t='$sudo$scriptname'" +} + +# +# This command can be used to populate maps for the assignment 3 of the +# packet03-redirecting lesson. It takes two arguments: the source and the +# destination environment names. +# +populate_redirect_map() +{ + local src="$1" + local dest="$2" + local src_mac=$(ip netns exec $src cat /sys/class/net/veth0/address) + local dest_mac=$(ip netns exec $dest cat /sys/class/net/veth0/address) + + # set bidirectional forwarding + ./xdp_prog_user -d $src -r $dest --src-mac $src_mac --dest-mac $dest_mac + ./xdp_prog_user -d $dest -r $src --src-mac $dest_mac --dest-mac $src_mac +} + +xdp_load() +{ + get_nsname && ensure_nsname + + [ -x "$XDP_LOADER" ] || die "Loader '$XDP_LOADER' is not executable" + $XDP_LOADER --dev "$NS" "$@" +} + +xdp_unload() +{ + get_nsname && ensure_nsname + + [ -x "$XDP_LOADER" ] || die "Loader '$XDP_LOADER' is not executable" + $XDP_LOADER --dev "$NS" --unload "$@" +} + +xdp_stats() +{ + get_nsname && ensure_nsname + + [ -x "$XDP_STATS" ] || die "Stats tool '$XDP_STATS' is not executable" + $XDP_STATS --dev "$NS" "$@" +} + +usage() +{ + local FULL=${1:-} + + echo "Usage: $0 [options] [param]" + echo "" + echo "Commands:" + echo "setup Setup and initialise new environment" + echo "teardown Tear down existing environment" + echo "reset Reset environment to original state" + echo "exec Exec inside test environment" + echo "enter Execute shell inside test environment" + echo "ping Run ping inside test environment" + echo "alias Print shell alias for easy access to this script" + echo "status (or st) Show status of test environment" + echo "load Load XDP program on outer interface" + echo "unload Unload XDP program on outer interface" + echo "tcpdump Run on outer interface (or inner with --inner)" + echo "stats Run the XDP statistics program" + echo "redirect Setup redirects for packet03 lessons" + echo "" + + if [ -z "$FULL" ] ; then + echo "Use --help to see the list of options." + exit 1 + fi + + echo "Options:" + echo "-h, --help Show this usage text" + echo "" + echo "-n, --name Set name of test environment. If not set, the last used" + echo " name will be used, or a new one generated." + echo "" + echo "-g, --gen-new Generate a new test environment name even though an existing" + echo " environment is selected as the current one." + echo "" + echo "-l, --loader Specify program to use for loading XDP programs." + echo " Device name will be passed to it, along with any additional" + echo " command line options passed after --." + echo " Default: '$XDP_LOADER'" + echo "" + echo "-s, --stats Specify program to use for getting statistics ('stats' command)." + echo " Device name will be passed to it, along with any additional" + echo " command line options passed after --." + echo " Default: '$XDP_STATS'" + echo "" + echo " --legacy-ip Enable legacy IP (IPv4) support." + echo " For setup and reset commands this enables configuration of legacy" + echo " IP addresses on the interface, for the ping command it switches to" + echo " legacy ping." + echo "" + echo " --vlan Enable VLAN support." + echo " When used with the setup and reset commands, these VLAN IDs will" + echo " be configured: ${VLAN_IDS[*]}. The VLAN interfaces are named as" + echo " .." + echo " When used with the ping command, the pings will be sent on the" + echo " first VLAN ID (${VLAN_IDS[0]})." + echo "" + echo " --inner Use with tcpdump command to run on inner interface." + echo "" + exit 1 +} + + +OPTS="hn:gl:s:" +LONGOPTS="help,name:,gen-new,loader:,stats:,legacy-ip,vlan,inner" + +OPTIONS=$(getopt -o "$OPTS" --long "$LONGOPTS" -- "$@") +[ "$?" -ne "0" ] && usage >&2 || true + +eval set -- "$OPTIONS" + + +while true; do + arg="$1" + shift + + case "$arg" in + -h | --help) + usage full >&2 + ;; + -n | --name) + NS="$1" + shift + ;; + -l | --loader) + XDP_LOADER="$1" + shift + ;; + -s | --stats) + XDP_STATS="$1" + shift + ;; + -g | --gen-new) + GENERATE_NEW=1 + ;; + --legacy-ip) + LEGACY_IP=1 + ;; + --vlan) + USE_VLAN=1 + ;; + --inner) + RUN_ON_INNER=1 + ;; + -- ) + break + ;; + esac +done + +[ "$#" -eq 0 ] && usage >&2 + +case "$1" in + st|sta|status) + CMD=status + ;; + setup|teardown|reset|enter) + CMD="$1" + ;; + load|unload|stats) + CMD="xdp_$1" + ;; + "exec") + CMD=ns_exec + ;; + ping|tcpdump) + CMD="run_$1" + ;; + redirect) + CMD=populate_redirect_map + ;; + "alias") + print_alias + exit 0 + ;; + "help") + usage full >&2 + ;; + *) + usage >&2 + ;; +esac + +shift +trap cleanup EXIT +check_prereq +$CMD "$@"